LLVM 23.0.0git
RISCVVectorPeephole.cpp
Go to the documentation of this file.
1//===- RISCVVectorPeephole.cpp - MI Vector Pseudo Peepholes ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs various vector pseudo peephole optimisations after
10// instruction selection.
11//
12// Currently it converts vmerge.vvm to vmv.v.v
13// PseudoVMERGE_VVM %false, %false, %true, %allonesmask, %vl, %sew
14// ->
15// PseudoVMV_V_V %false, %true, %vl, %sew
16//
17// And masked pseudos to unmasked pseudos
18// PseudoVADD_V_V_MASK %passthru, %a, %b, %allonesmask, %vl, sew, policy
19// ->
20// PseudoVADD_V_V %passthru %a, %b, %vl, sew, policy
21//
22// It also converts AVLs to VLMAX where possible
23// %vl = VLENB * something
24// PseudoVADD_V_V %passthru, %a, %b, %vl, sew, policy
25// ->
26// PseudoVADD_V_V %passthru, %a, %b, -1, sew, policy
27//
28//===----------------------------------------------------------------------===//
29
30#include "RISCV.h"
31#include "RISCVSubtarget.h"
36
37using namespace llvm;
38
39#define DEBUG_TYPE "riscv-vector-peephole"
40
41namespace {
42
43class RISCVVectorPeephole : public MachineFunctionPass {
44public:
45 static char ID;
46 const TargetInstrInfo *TII;
49 const RISCVSubtarget *ST;
50 RISCVVectorPeephole() : MachineFunctionPass(ID) {}
51
52 bool runOnMachineFunction(MachineFunction &MF) override;
53 MachineFunctionProperties getRequiredProperties() const override {
54 return MachineFunctionProperties().setIsSSA();
55 }
56
57 StringRef getPassName() const override {
58 return "RISC-V Vector Peephole Optimization";
59 }
60
61private:
62 bool tryToReduceVL(MachineInstr &MI) const;
63 bool convertToVLMAX(MachineInstr &MI) const;
64 bool convertToWholeRegister(MachineInstr &MI) const;
65 bool convertToUnmasked(MachineInstr &MI) const;
66 bool convertAllOnesVMergeToVMv(MachineInstr &MI) const;
67 bool convertSameMaskVMergeToVMv(MachineInstr &MI);
68 bool foldUndefPassthruVMV_V_V(MachineInstr &MI);
69 bool foldVMV_V_V(MachineInstr &MI);
70 bool foldVMergeToMask(MachineInstr &MI) const;
71
72 bool hasSameEEW(const MachineInstr &User, const MachineInstr &Src) const;
73 bool isAllOnesMask(const MachineInstr *MaskDef) const;
74 std::optional<unsigned> getConstant(const MachineOperand &VL) const;
75 bool ensureDominates(const MachineOperand &Use, MachineInstr &Src) const;
77 lookThruCopies(Register Reg, bool OneUseOnly = false,
79};
80
81} // namespace
82
83char RISCVVectorPeephole::ID = 0;
84
85INITIALIZE_PASS(RISCVVectorPeephole, DEBUG_TYPE, "RISC-V Fold Masks", false,
86 false)
87
88/// Given \p User that has an input operand with EEW=SEW, which uses the dest
89/// operand of \p Src with an unknown EEW, return true if their EEWs match.
90bool RISCVVectorPeephole::hasSameEEW(const MachineInstr &User,
91 const MachineInstr &Src) const {
92 unsigned UserLog2SEW =
93 User.getOperand(RISCVII::getSEWOpNum(User.getDesc())).getImm();
94 unsigned SrcLog2SEW =
95 Src.getOperand(RISCVII::getSEWOpNum(Src.getDesc())).getImm();
96 unsigned SrcLog2EEW = RISCV::getDestLog2EEW(
97 TII->get(RISCV::getRVVMCOpcode(Src.getOpcode())), SrcLog2SEW);
98 return SrcLog2EEW == UserLog2SEW;
99}
100
101// Attempt to reduce the VL of an instruction whose sole use is feeding a
102// instruction with a narrower VL. This currently works backwards from the
103// user instruction (which might have a smaller VL).
104bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
105 // Note that the goal here is a bit multifaceted.
106 // 1) For store's reducing the VL of the value being stored may help to
107 // reduce VL toggles. This is somewhat of an artifact of the fact we
108 // promote arithmetic instructions but VL predicate stores.
109 // 2) For vmv.v.v reducing VL eagerly on the source instruction allows us
110 // to share code with the foldVMV_V_V transform below.
111 //
112 // Note that to the best of our knowledge, reducing VL is generally not
113 // a significant win on real hardware unless we can also reduce LMUL which
114 // this code doesn't try to do.
115 //
116 // TODO: We can handle a bunch more instructions here, and probably
117 // recurse backwards through operands too.
118 SmallVector<unsigned, 2> SrcIndices = {0};
119 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
120 default:
121 return false;
122 case RISCV::VSE8_V:
123 case RISCV::VSE16_V:
124 case RISCV::VSE32_V:
125 case RISCV::VSE64_V:
126 break;
127 case RISCV::VMV_V_V:
128 SrcIndices[0] = 2;
129 break;
130 case RISCV::VMERGE_VVM:
131 SrcIndices.assign({2, 3});
132 break;
133 case RISCV::VREDSUM_VS:
134 case RISCV::VREDMAXU_VS:
135 case RISCV::VREDMAX_VS:
136 case RISCV::VREDMINU_VS:
137 case RISCV::VREDMIN_VS:
138 case RISCV::VREDAND_VS:
139 case RISCV::VREDOR_VS:
140 case RISCV::VREDXOR_VS:
141 case RISCV::VWREDSUM_VS:
142 case RISCV::VWREDSUMU_VS:
143 case RISCV::VFREDUSUM_VS:
144 case RISCV::VFREDOSUM_VS:
145 case RISCV::VFREDMAX_VS:
146 case RISCV::VFREDMIN_VS:
147 case RISCV::VFWREDUSUM_VS:
148 case RISCV::VFWREDOSUM_VS:
149 SrcIndices[0] = 2;
150 break;
151 }
152
153 MachineOperand &VL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
154 if (VL.isImm() && VL.getImm() == RISCV::VLMaxSentinel)
155 return false;
156
157 bool Changed = false;
158 for (unsigned SrcIdx : SrcIndices) {
159 Register SrcReg = MI.getOperand(SrcIdx).getReg();
160 // Note: one *use*, not one *user*.
161 if (!MRI->hasOneUse(SrcReg))
162 continue;
163
164 MachineInstr *Src = MRI->getVRegDef(SrcReg);
165 if (!Src || Src->hasUnmodeledSideEffects() ||
166 Src->getParent() != MI.getParent() || Src->getNumDefs() != 1 ||
167 !RISCVII::hasVLOp(Src->getDesc().TSFlags) ||
168 !RISCVII::hasSEWOp(Src->getDesc().TSFlags))
169 continue;
170
171 // Src's dest needs to have the same EEW as MI's input.
172 if (!hasSameEEW(MI, *Src))
173 continue;
174
175 bool ElementsDependOnVL = RISCVII::elementsDependOnVL(
176 TII->get(RISCV::getRVVMCOpcode(Src->getOpcode())).TSFlags);
177 if (ElementsDependOnVL || Src->mayRaiseFPException())
178 continue;
179
180 MachineOperand &SrcVL =
181 Src->getOperand(RISCVII::getVLOpNum(Src->getDesc()));
182 if (VL.isIdenticalTo(SrcVL) || !RISCV::isVLKnownLE(VL, SrcVL))
183 continue;
184
185 if (!ensureDominates(VL, *Src))
186 continue;
187
188 if (VL.isImm())
189 SrcVL.ChangeToImmediate(VL.getImm());
190 else if (VL.isReg())
191 SrcVL.ChangeToRegister(VL.getReg(), false);
192
193 Changed = true;
194 }
195
196 // TODO: For instructions with a passthru, we could clear the passthru
197 // and tail policy since we've just proven the tail is not demanded.
198 return Changed;
199}
200
201/// Check if an operand is an immediate or a materialized ADDI $x0, imm.
202std::optional<unsigned>
203RISCVVectorPeephole::getConstant(const MachineOperand &VL) const {
204 if (VL.isImm())
205 return VL.getImm();
206
207 MachineInstr *Def = MRI->getVRegDef(VL.getReg());
208 if (!Def || Def->getOpcode() != RISCV::ADDI ||
209 Def->getOperand(1).getReg() != RISCV::X0)
210 return std::nullopt;
211 return Def->getOperand(2).getImm();
212}
213
214/// Convert AVLs that are known to be VLMAX to the VLMAX sentinel.
215bool RISCVVectorPeephole::convertToVLMAX(MachineInstr &MI) const {
216 if (!RISCVII::hasVLOp(MI.getDesc().TSFlags) ||
217 !RISCVII::hasSEWOp(MI.getDesc().TSFlags))
218 return false;
219
220 auto LMUL = RISCVVType::decodeVLMUL(RISCVII::getLMul(MI.getDesc().TSFlags));
221 // Fixed-point value, denominator=8
222 unsigned LMULFixed = LMUL.second ? (8 / LMUL.first) : 8 * LMUL.first;
223 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
224 // A Log2SEW of 0 is an operation on mask registers only
225 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
226 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
227 assert(8 * LMULFixed / SEW > 0);
228
229 // If the exact VLEN is known then we know VLMAX, check if the AVL == VLMAX.
230 MachineOperand &VL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
231 if (auto VLen = ST->getRealVLen(), AVL = getConstant(VL);
232 VLen && AVL && (*VLen * LMULFixed) / SEW == *AVL * 8) {
234 return true;
235 }
236
237 // If an AVL is a VLENB that's possibly scaled to be equal to VLMAX, convert
238 // it to the VLMAX sentinel value.
239 if (!VL.isReg())
240 return false;
241 MachineInstr *Def = MRI->getVRegDef(VL.getReg());
242 if (!Def)
243 return false;
244
245 // Fixed-point value, denominator=8
246 uint64_t ScaleFixed = 8;
247 // Check if the VLENB was potentially scaled with slli/srli
248 if (Def->getOpcode() == RISCV::SLLI) {
249 assert(Def->getOperand(2).getImm() < 64);
250 ScaleFixed <<= Def->getOperand(2).getImm();
251 Def = MRI->getVRegDef(Def->getOperand(1).getReg());
252 } else if (Def->getOpcode() == RISCV::SRLI) {
253 assert(Def->getOperand(2).getImm() < 64);
254 ScaleFixed >>= Def->getOperand(2).getImm();
255 Def = MRI->getVRegDef(Def->getOperand(1).getReg());
256 }
257
258 if (!Def || Def->getOpcode() != RISCV::PseudoReadVLENB)
259 return false;
260
261 // AVL = (VLENB * Scale)
262 //
263 // VLMAX = (VLENB * 8 * LMUL) / SEW
264 //
265 // AVL == VLMAX
266 // -> VLENB * Scale == (VLENB * 8 * LMUL) / SEW
267 // -> Scale == (8 * LMUL) / SEW
268 if (ScaleFixed != 8 * LMULFixed / SEW)
269 return false;
270
272
273 return true;
274}
275
276bool RISCVVectorPeephole::isAllOnesMask(const MachineInstr *MaskDef) const {
277 while (MaskDef->isCopy() && MaskDef->getOperand(1).getReg().isVirtual())
278 MaskDef = MRI->getVRegDef(MaskDef->getOperand(1).getReg());
279
280 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
281 // undefined behaviour if it's the wrong bitwidth, so we could choose to
282 // assume that it's all-ones? Same applies to its VL.
283 switch (MaskDef->getOpcode()) {
284 case RISCV::PseudoVMSET_M_B1:
285 case RISCV::PseudoVMSET_M_B2:
286 case RISCV::PseudoVMSET_M_B4:
287 case RISCV::PseudoVMSET_M_B8:
288 case RISCV::PseudoVMSET_M_B16:
289 case RISCV::PseudoVMSET_M_B32:
290 case RISCV::PseudoVMSET_M_B64:
291 return true;
292 default:
293 return false;
294 }
295}
296
297/// Convert unit strided unmasked loads and stores to whole-register equivalents
298/// to avoid the dependency on $vl and $vtype.
299///
300/// %x = PseudoVLE8_V_M1 %passthru, %ptr, %vlmax, policy
301/// PseudoVSE8_V_M1 %v, %ptr, %vlmax
302///
303/// ->
304///
305/// %x = VL1RE8_V %ptr
306/// VS1R_V %v, %ptr
307bool RISCVVectorPeephole::convertToWholeRegister(MachineInstr &MI) const {
308#define CASE_WHOLE_REGISTER_LMUL_SEW(lmul, sew) \
309 case RISCV::PseudoVLE##sew##_V_M##lmul: \
310 NewOpc = RISCV::VL##lmul##RE##sew##_V; \
311 break; \
312 case RISCV::PseudoVSE##sew##_V_M##lmul: \
313 NewOpc = RISCV::VS##lmul##R_V; \
314 break;
315#define CASE_WHOLE_REGISTER_LMUL(lmul) \
316 CASE_WHOLE_REGISTER_LMUL_SEW(lmul, 8) \
317 CASE_WHOLE_REGISTER_LMUL_SEW(lmul, 16) \
318 CASE_WHOLE_REGISTER_LMUL_SEW(lmul, 32) \
319 CASE_WHOLE_REGISTER_LMUL_SEW(lmul, 64)
320
321 unsigned NewOpc;
322 switch (MI.getOpcode()) {
327 default:
328 return false;
329 }
330
331 MachineOperand &VLOp = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
332 if (!VLOp.isImm() || VLOp.getImm() != RISCV::VLMaxSentinel)
333 return false;
334
335 // Whole register instructions aren't pseudos so they don't have
336 // policy/SEW/AVL ops, and they don't have passthrus.
337 if (RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags))
338 MI.removeOperand(RISCVII::getVecPolicyOpNum(MI.getDesc()));
339 MI.removeOperand(RISCVII::getSEWOpNum(MI.getDesc()));
340 MI.removeOperand(RISCVII::getVLOpNum(MI.getDesc()));
341 if (RISCVII::isFirstDefTiedToFirstUse(MI.getDesc()))
342 MI.removeOperand(1);
343
344 MI.setDesc(TII->get(NewOpc));
345
346 return true;
347}
348
349static unsigned getVMV_V_VOpcodeForVMERGE_VVM(const MachineInstr &MI) {
350#define CASE_VMERGE_TO_VMV(lmul) \
351 case RISCV::PseudoVMERGE_VVM_##lmul: \
352 return RISCV::PseudoVMV_V_V_##lmul;
353 switch (MI.getOpcode()) {
354 default:
355 return 0;
356 CASE_VMERGE_TO_VMV(MF8)
357 CASE_VMERGE_TO_VMV(MF4)
358 CASE_VMERGE_TO_VMV(MF2)
359 CASE_VMERGE_TO_VMV(M1)
360 CASE_VMERGE_TO_VMV(M2)
361 CASE_VMERGE_TO_VMV(M4)
362 CASE_VMERGE_TO_VMV(M8)
363 }
364}
365
366/// Convert a PseudoVMERGE_VVM with an all ones mask to a PseudoVMV_V_V.
367///
368/// %x = PseudoVMERGE_VVM %passthru, %false, %true, %allones, sew, vl
369/// ->
370/// %x = PseudoVMV_V_V %passthru, %true, vl, sew, tu_mu
371bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const {
372 unsigned NewOpc = getVMV_V_VOpcodeForVMERGE_VVM(MI);
373 if (!NewOpc)
374 return false;
375 if (!isAllOnesMask(MRI->getVRegDef(MI.getOperand(4).getReg())))
376 return false;
377
378 MI.setDesc(TII->get(NewOpc));
379 MI.removeOperand(2); // False operand
380 MI.removeOperand(3); // Mask operand
381 MI.addOperand(
383
384 // vmv.v.v doesn't have a mask operand, so we may be able to inflate the
385 // register class for the destination and passthru operands e.g. VRNoV0 -> VR
386 MRI->recomputeRegClass(MI.getOperand(0).getReg());
387 if (MI.getOperand(1).getReg().isValid())
388 MRI->recomputeRegClass(MI.getOperand(1).getReg());
389 return true;
390}
391
392// If \p Reg is defined by one or more COPYs of virtual registers, traverses
393// the chain and returns the root non-COPY source.
394Register RISCVVectorPeephole::lookThruCopies(
395 Register Reg, bool OneUseOnly,
396 SmallVectorImpl<MachineInstr *> *Copies) const {
397 while (MachineInstr *Def = MRI->getUniqueVRegDef(Reg)) {
398 if (!Def->isFullCopy())
399 break;
400 Register Src = Def->getOperand(1).getReg();
401 if (!Src.isVirtual())
402 break;
403 if (OneUseOnly && !MRI->hasOneNonDBGUse(Reg))
404 break;
405 if (Copies)
406 Copies->push_back(Def);
407 Reg = Src;
408 }
409 return Reg;
410}
411
412/// If a PseudoVMERGE_VVM's true operand is a masked pseudo and both have the
413/// same mask, and the masked pseudo's passthru is the same as the false
414/// operand, we can convert the PseudoVMERGE_VVM to a PseudoVMV_V_V.
415///
416/// %true = PseudoVADD_VV_M1_MASK %false, %x, %y, %mask, vl1, sew, policy
417/// %x = PseudoVMERGE_VVM %passthru, %false, %true, %mask, vl2, sew
418/// ->
419/// %true = PseudoVADD_VV_M1_MASK %false, %x, %y, %mask, vl1, sew, policy
420/// %x = PseudoVMV_V_V %passthru, %true, vl2, sew, tu_mu
421bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
422 unsigned NewOpc = getVMV_V_VOpcodeForVMERGE_VVM(MI);
423 if (!NewOpc)
424 return false;
425 MachineInstr *True = MRI->getVRegDef(MI.getOperand(3).getReg());
426
427 if (!True || True->getParent() != MI.getParent())
428 return false;
429
430 auto *TrueMaskedInfo = RISCV::getMaskedPseudoInfo(True->getOpcode());
431 if (!TrueMaskedInfo || !hasSameEEW(MI, *True))
432 return false;
433
434 Register TrueMaskReg = lookThruCopies(
435 True->getOperand(TrueMaskedInfo->MaskOpIdx + True->getNumExplicitDefs())
436 .getReg());
437 Register MIMaskReg = lookThruCopies(MI.getOperand(4).getReg());
438 if (!TrueMaskReg.isVirtual() || TrueMaskReg != MIMaskReg)
439 return false;
440
441 // Masked off lanes past TrueVL will come from False, and converting to vmv
442 // will lose these lanes unless MIVL <= TrueVL.
443 // TODO: We could relax this for False == Passthru and True policy == TU
444 const MachineOperand &MIVL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
445 const MachineOperand &TrueVL =
446 True->getOperand(RISCVII::getVLOpNum(True->getDesc()));
447 if (!RISCV::isVLKnownLE(MIVL, TrueVL))
448 return false;
449
450 // True's passthru needs to be equivalent to False
451 Register TruePassthruReg = True->getOperand(1).getReg();
452 Register FalseReg = MI.getOperand(2).getReg();
453 if (TruePassthruReg != FalseReg) {
454 // If True's passthru is undef see if we can change it to False
455 if (TruePassthruReg.isValid() ||
456 !MRI->hasOneUse(MI.getOperand(3).getReg()) ||
457 !ensureDominates(MI.getOperand(2), *True))
458 return false;
459 True->getOperand(1).setReg(MI.getOperand(2).getReg());
460 // If True is masked then its passthru needs to be in VRNoV0.
461 MRI->constrainRegClass(True->getOperand(1).getReg(),
462 TII->getRegClass(True->getDesc(), 1));
463 }
464
465 MI.setDesc(TII->get(NewOpc));
466 MI.removeOperand(2); // False operand
467 MI.removeOperand(3); // Mask operand
468 MI.addOperand(
470
471 // vmv.v.v doesn't have a mask operand, so we may be able to inflate the
472 // register class for the destination and passthru operands e.g. VRNoV0 -> VR
473 MRI->recomputeRegClass(MI.getOperand(0).getReg());
474 if (MI.getOperand(1).getReg().isValid())
475 MRI->recomputeRegClass(MI.getOperand(1).getReg());
476 return true;
477}
478
479bool RISCVVectorPeephole::convertToUnmasked(MachineInstr &MI) const {
480 const RISCV::RISCVMaskedPseudoInfo *I =
481 RISCV::getMaskedPseudoInfo(MI.getOpcode());
482 if (!I)
483 return false;
484
485 if (!isAllOnesMask(MRI->getVRegDef(
486 MI.getOperand(I->MaskOpIdx + MI.getNumExplicitDefs()).getReg())))
487 return false;
488
489 // There are two classes of pseudos in the table - compares and
490 // everything else. See the comment on RISCVMaskedPseudo for details.
491 const unsigned Opc = I->UnmaskedPseudo;
492 const MCInstrDesc &MCID = TII->get(Opc);
493 [[maybe_unused]] const bool HasPolicyOp =
495 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID);
496 const MCInstrDesc &MaskedMCID = TII->get(MI.getOpcode());
499 "Unmasked pseudo has policy but masked pseudo doesn't?");
500 assert(HasPolicyOp == HasPassthru && "Unexpected pseudo structure");
501 assert(!(HasPassthru && !RISCVII::isFirstDefTiedToFirstUse(MaskedMCID)) &&
502 "Unmasked with passthru but masked with no passthru?");
503 (void)HasPolicyOp;
504
505 MI.setDesc(MCID);
506
507 // Drop the policy operand if unmasked doesn't need it.
508 if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) &&
510 MI.removeOperand(RISCVII::getVecPolicyOpNum(MaskedMCID));
511
512 // TODO: Increment all MaskOpIdxs in tablegen by num of explicit defs?
513 unsigned MaskOpIdx = I->MaskOpIdx + MI.getNumExplicitDefs();
514 MI.removeOperand(MaskOpIdx);
515
516 // The unmasked pseudo will no longer be constrained to the vrnov0 reg class,
517 // so try and relax it to vr.
518 MRI->recomputeRegClass(MI.getOperand(0).getReg());
519
520 // If the original masked pseudo had a passthru, relax it or remove it.
521 if (RISCVII::isFirstDefTiedToFirstUse(MaskedMCID)) {
522 unsigned PassthruOpIdx = MI.getNumExplicitDefs();
523 if (HasPassthru) {
524 if (MI.getOperand(PassthruOpIdx).getReg())
525 MRI->recomputeRegClass(MI.getOperand(PassthruOpIdx).getReg());
526 } else
527 MI.removeOperand(PassthruOpIdx);
528 }
529
530 return true;
531}
532
533/// Check if it's safe to move From down to To, checking that no physical
534/// registers are clobbered.
535static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To) {
536 assert(From.getParent() == To.getParent());
537 SmallVector<Register> PhysUses, PhysDefs;
538 for (const MachineOperand &MO : From.all_uses())
539 if (MO.getReg().isPhysical())
540 PhysUses.push_back(MO.getReg());
541 for (const MachineOperand &MO : From.all_defs())
542 if (MO.getReg().isPhysical())
543 PhysDefs.push_back(MO.getReg());
544 bool SawStore = false;
545 for (auto II = std::next(From.getIterator()); II != To.getIterator(); II++) {
546 for (Register PhysReg : PhysUses)
547 if (II->definesRegister(PhysReg, nullptr))
548 return false;
549 for (Register PhysReg : PhysDefs)
550 if (II->definesRegister(PhysReg, nullptr) ||
551 II->readsRegister(PhysReg, nullptr))
552 return false;
553 if (II->mayStore()) {
554 SawStore = true;
555 break;
556 }
557 }
558 return From.isSafeToMove(SawStore);
559}
560
561/// Given A and B are in the same MBB, returns true if A comes before B.
564 assert(A->getParent() == B->getParent());
565 const MachineBasicBlock *MBB = A->getParent();
566 auto MBBEnd = MBB->end();
567 if (B == MBBEnd)
568 return true;
569
571 for (; &*I != A && &*I != B; ++I)
572 ;
573
574 return &*I == A;
575}
576
577/// If the register in \p MO doesn't dominate \p Src, try to move \p Src so it
578/// does. Returns false if doesn't dominate and we can't move. \p MO must be in
579/// the same basic block as \Src.
580bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO,
581 MachineInstr &Src) const {
582 assert(MO.getParent()->getParent() == Src.getParent());
583 if (!MO.isReg() || !MO.getReg().isValid())
584 return true;
585
586 MachineInstr *Def = MRI->getVRegDef(MO.getReg());
587 if (Def->getParent() == Src.getParent() && !dominates(Def, Src)) {
588 if (!isSafeToMove(Src, *Def->getNextNode()))
589 return false;
590 Src.moveBefore(Def->getNextNode());
591 }
592
593 return true;
594}
595
596/// If a PseudoVMV_V_V's passthru is undef then we can replace it with its input
597bool RISCVVectorPeephole::foldUndefPassthruVMV_V_V(MachineInstr &MI) {
598 if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VMV_V_V)
599 return false;
600 if (MI.getOperand(1).getReg().isValid())
601 return false;
602
603 // If the input was a pseudo with a policy operand, we can give it a tail
604 // agnostic policy if MI's undef tail subsumes the input's.
605 MachineInstr *Src = MRI->getVRegDef(MI.getOperand(2).getReg());
606 if (Src && !Src->hasUnmodeledSideEffects() &&
607 MRI->hasOneUse(MI.getOperand(2).getReg()) &&
608 RISCVII::hasVLOp(Src->getDesc().TSFlags) &&
609 RISCVII::hasVecPolicyOp(Src->getDesc().TSFlags) && hasSameEEW(MI, *Src)) {
610 const MachineOperand &MIVL = MI.getOperand(3);
611 const MachineOperand &SrcVL =
612 Src->getOperand(RISCVII::getVLOpNum(Src->getDesc()));
613
614 MachineOperand &SrcPolicy =
615 Src->getOperand(RISCVII::getVecPolicyOpNum(Src->getDesc()));
616
617 if (RISCV::isVLKnownLE(MIVL, SrcVL))
618 SrcPolicy.setImm(SrcPolicy.getImm() | RISCVVType::TAIL_AGNOSTIC);
619 }
620
621 MRI->constrainRegClass(MI.getOperand(2).getReg(),
622 MRI->getRegClass(MI.getOperand(0).getReg()));
623 MRI->replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(2).getReg());
624 MRI->clearKillFlags(MI.getOperand(2).getReg());
625 MI.eraseFromParent();
626 return true;
627}
628
629/// If a PseudoVMV_V_V is the only user of its input, fold its passthru and VL
630/// into it.
631///
632/// %x = PseudoVADD_V_V_M1 %passthru, %a, %b, %vl1, sew, policy
633/// %y = PseudoVMV_V_V_M1 %passthru, %x, %vl2, sew, policy
634/// (where %vl1 <= %vl2, see related tryToReduceVL)
635///
636/// ->
637///
638/// %y = PseudoVADD_V_V_M1 %passthru, %a, %b, vl1, sew, policy
639bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
640 if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VMV_V_V)
641 return false;
642
643 MachineOperand &Passthru = MI.getOperand(1);
644
645 if (!MRI->hasOneUse(MI.getOperand(2).getReg()))
646 return false;
647
648 MachineInstr *Src = MRI->getVRegDef(MI.getOperand(2).getReg());
649 if (!Src || Src->hasUnmodeledSideEffects() ||
650 Src->getParent() != MI.getParent() ||
651 !RISCVII::isFirstDefTiedToFirstUse(Src->getDesc()) ||
652 !RISCVII::hasVLOp(Src->getDesc().TSFlags))
653 return false;
654
655 // Src's dest needs to have the same EEW as MI's input.
656 if (!hasSameEEW(MI, *Src))
657 return false;
658
659 std::optional<std::pair<unsigned, unsigned>> NeedsCommute;
660
661 // Src needs to have the same passthru as VMV_V_V
662 MachineOperand &SrcPassthru = Src->getOperand(Src->getNumExplicitDefs());
663 if (SrcPassthru.getReg().isValid() &&
664 SrcPassthru.getReg() != Passthru.getReg()) {
665 // If Src's passthru != Passthru, check if it uses Passthru in another
666 // operand and try to commute it.
667 int OtherIdx = Src->findRegisterUseOperandIdx(Passthru.getReg(), TRI);
668 if (OtherIdx == -1)
669 return false;
670 unsigned OpIdx1 = OtherIdx;
671 unsigned OpIdx2 = Src->getNumExplicitDefs();
672 if (!TII->findCommutedOpIndices(*Src, OpIdx1, OpIdx2))
673 return false;
674 NeedsCommute = {OpIdx1, OpIdx2};
675 }
676
677 // Src VL will have already been reduced if legal (see tryToReduceVL),
678 // so we don't need to handle a smaller source VL here. However, the
679 // user's VL may be larger
680 MachineOperand &SrcVL = Src->getOperand(RISCVII::getVLOpNum(Src->getDesc()));
681 if (!RISCV::isVLKnownLE(SrcVL, MI.getOperand(3)))
682 return false;
683
684 // If the new passthru doesn't dominate Src, try to move Src so it does.
685 if (!ensureDominates(Passthru, *Src))
686 return false;
687
688 if (NeedsCommute) {
689 auto [OpIdx1, OpIdx2] = *NeedsCommute;
690 [[maybe_unused]] bool Commuted =
691 TII->commuteInstruction(*Src, /*NewMI=*/false, OpIdx1, OpIdx2);
692 assert(Commuted && "Failed to commute Src?");
693 }
694
695 if (SrcPassthru.getReg() != Passthru.getReg()) {
696 SrcPassthru.setReg(Passthru.getReg());
697 // If Src is masked then its passthru needs to be in VRNoV0.
698 if (Passthru.getReg().isValid())
699 MRI->constrainRegClass(
700 Passthru.getReg(),
701 TII->getRegClass(Src->getDesc(), SrcPassthru.getOperandNo()));
702 }
703
704 if (RISCVII::hasVecPolicyOp(Src->getDesc().TSFlags)) {
705 // If MI was tail agnostic and the VL didn't increase, preserve it.
707 if ((MI.getOperand(5).getImm() & RISCVVType::TAIL_AGNOSTIC) &&
708 RISCV::isVLKnownLE(MI.getOperand(3), SrcVL))
710 Src->getOperand(RISCVII::getVecPolicyOpNum(Src->getDesc())).setImm(Policy);
711 }
712
713 MRI->constrainRegClass(Src->getOperand(0).getReg(),
714 MRI->getRegClass(MI.getOperand(0).getReg()));
715 MRI->replaceRegWith(MI.getOperand(0).getReg(), Src->getOperand(0).getReg());
716 MI.eraseFromParent();
717
718 return true;
719}
720
721/// Try to fold away VMERGE_VVM instructions into their operands:
722///
723/// %true = PseudoVADD_VV ...
724/// %x = PseudoVMERGE_VVM_M1 %false, %false, %true, %mask
725/// ->
726/// %x = PseudoVADD_VV_M1_MASK %false, ..., %mask
727///
728/// We can only fold if vmerge's passthru operand, vmerge's false operand and
729/// %true's passthru operand (if it has one) are the same. This is because we
730/// have to consolidate them into one passthru operand in the result.
731///
732/// If %true is masked, then we can use its mask instead of vmerge's if vmerge's
733/// mask is all ones.
734///
735/// The resulting VL is the minimum of the two VLs.
736///
737/// The resulting policy is the effective policy the vmerge would have had,
738/// i.e. whether or not it's passthru operand was implicit-def.
739bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const {
740 if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VMERGE_VVM)
741 return false;
742
743 // Collect chain of COPYs on True's result for later cleanup.
744 SmallVector<MachineInstr *, 4> TrueCopies;
745 Register PassthruReg = lookThruCopies(MI.getOperand(1).getReg());
746 Register FalseReg = lookThruCopies(MI.getOperand(2).getReg());
747 Register TrueReg = lookThruCopies(MI.getOperand(3).getReg(),
748 /*OneUseOnly=*/true, &TrueCopies);
749 if (!TrueReg.isVirtual() || !MRI->hasOneUse(TrueReg))
750 return false;
751 MachineInstr &True = *MRI->getUniqueVRegDef(TrueReg);
752 if (True.getParent() != MI.getParent())
753 return false;
754 const MachineOperand &MaskOp = MI.getOperand(4);
755 MachineInstr *Mask = MRI->getUniqueVRegDef(MaskOp.getReg());
756 assert(Mask);
757
758 const RISCV::RISCVMaskedPseudoInfo *Info =
759 RISCV::lookupMaskedIntrinsicByUnmasked(True.getOpcode());
760 if (!Info)
761 return false;
762
763 // If the EEW of True is different from vmerge's SEW, then we can't fold.
764 if (!hasSameEEW(MI, True))
765 return false;
766
767 // We require that either passthru and false are the same, or that passthru
768 // is undefined.
769 if (PassthruReg && !(PassthruReg.isVirtual() && PassthruReg == FalseReg))
770 return false;
771
772 std::optional<std::pair<unsigned, unsigned>> NeedsCommute;
773
774 // If True has a passthru operand then it needs to be the same as vmerge's
775 // False, since False will be used for the result's passthru operand.
776 Register TruePassthru =
777 lookThruCopies(True.getOperand(True.getNumExplicitDefs()).getReg());
778 if (RISCVII::isFirstDefTiedToFirstUse(True.getDesc()) && TruePassthru &&
779 !(TruePassthru.isVirtual() && TruePassthru == FalseReg)) {
780 // If True's passthru != False, check if it uses False in another operand
781 // and try to commute it.
782 int OtherIdx = True.findRegisterUseOperandIdx(FalseReg, TRI);
783 if (OtherIdx == -1)
784 return false;
785 unsigned OpIdx1 = OtherIdx;
786 unsigned OpIdx2 = True.getNumExplicitDefs();
787 if (!TII->findCommutedOpIndices(True, OpIdx1, OpIdx2))
788 return false;
789 NeedsCommute = {OpIdx1, OpIdx2};
790 }
791
792 // Make sure it doesn't raise any observable fp exceptions, since changing the
793 // active elements will affect how fflags is set.
794 if (True.hasUnmodeledSideEffects() || True.mayRaiseFPException())
795 return false;
796
797 const MachineOperand &VMergeVL =
798 MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
799 const MachineOperand &TrueVL =
801
802 MachineOperand MinVL = MachineOperand::CreateImm(0);
803 if (RISCV::isVLKnownLE(TrueVL, VMergeVL))
804 MinVL = TrueVL;
805 else if (RISCV::isVLKnownLE(VMergeVL, TrueVL))
806 MinVL = VMergeVL;
807 else
808 return false;
809
810 unsigned RVVTSFlags =
811 TII->get(RISCV::getRVVMCOpcode(True.getOpcode())).TSFlags;
812 if (RISCVII::elementsDependOnVL(RVVTSFlags) && !TrueVL.isIdenticalTo(MinVL))
813 return false;
814 if (RISCVII::elementsDependOnMask(RVVTSFlags) && !isAllOnesMask(Mask))
815 return false;
816
817 // Use a tumu policy, relaxing it to tail agnostic provided that the passthru
818 // operand is undefined.
819 //
820 // However, if the VL became smaller than what the vmerge had originally, then
821 // elements past VL that were previously in the vmerge's body will have moved
822 // to the tail. In that case we always need to use tail undisturbed to
823 // preserve them.
825 if (!PassthruReg && RISCV::isVLKnownLE(VMergeVL, MinVL))
827
829 "Foldable unmasked pseudo should have a policy op already");
830
831 // Make sure the mask dominates True and its copies, otherwise move down True
832 // so it does. VL will always dominate since if it's a register they need to
833 // be the same.
834 if (!ensureDominates(MaskOp, True))
835 return false;
836
837 if (NeedsCommute) {
838 auto [OpIdx1, OpIdx2] = *NeedsCommute;
839 [[maybe_unused]] bool Commuted =
840 TII->commuteInstruction(True, /*NewMI=*/false, OpIdx1, OpIdx2);
841 assert(Commuted && "Failed to commute True?");
842 Info = RISCV::lookupMaskedIntrinsicByUnmasked(True.getOpcode());
843 }
844
845 True.setDesc(TII->get(Info->MaskedPseudo));
846
847 // Insert the mask operand.
848 // TODO: Increment MaskOpIdx by number of explicit defs?
849 True.insert(True.operands_begin() + Info->MaskOpIdx +
850 True.getNumExplicitDefs(),
851 MachineOperand::CreateReg(MaskOp.getReg(), false));
852
853 // Update the passthru, AVL and policy.
854 True.getOperand(True.getNumExplicitDefs()).setReg(FalseReg);
856 True.insert(True.operands_begin() + RISCVII::getVLOpNum(True.getDesc()),
857 MinVL);
859
860 MRI->replaceRegWith(True.getOperand(0).getReg(), MI.getOperand(0).getReg());
861 // Now that True is masked, constrain its operands from vr -> vrnov0.
862 for (MachineOperand &MO : True.explicit_operands()) {
863 if (!MO.isReg() || !MO.getReg().isVirtual())
864 continue;
865 MRI->constrainRegClass(
866 MO.getReg(), True.getRegClassConstraint(MO.getOperandNo(), TII, TRI));
867 }
868 // We should clear the IsKill flag since we have a new use now.
869 MRI->clearKillFlags(FalseReg);
870 MI.eraseFromParent();
871
872 // Cleanup all the COPYs on True's value. We have to manually do this because
873 // sometimes sinking True causes these COPY to be invalid (use before define).
874 for (MachineInstr *TrueCopy : TrueCopies)
875 TrueCopy->eraseFromParent();
876
877 return true;
878}
879
880bool RISCVVectorPeephole::runOnMachineFunction(MachineFunction &MF) {
881 if (skipFunction(MF.getFunction()))
882 return false;
883
884 // Skip if the vector extension is not enabled.
885 ST = &MF.getSubtarget<RISCVSubtarget>();
886 if (!ST->hasVInstructions())
887 return false;
888
889 TII = ST->getInstrInfo();
890 MRI = &MF.getRegInfo();
891 TRI = MRI->getTargetRegisterInfo();
892
893 bool Changed = false;
894
895 for (MachineBasicBlock &MBB : MF) {
896 for (MachineInstr &MI : make_early_inc_range(MBB))
897 Changed |= foldVMergeToMask(MI);
898
899 for (MachineInstr &MI : make_early_inc_range(MBB)) {
900 Changed |= convertToVLMAX(MI);
901 Changed |= tryToReduceVL(MI);
902 Changed |= convertToUnmasked(MI);
903 Changed |= convertToWholeRegister(MI);
904 Changed |= convertAllOnesVMergeToVMv(MI);
905 Changed |= convertSameMaskVMergeToVMv(MI);
906 if (foldUndefPassthruVMV_V_V(MI)) {
907 Changed |= true;
908 continue; // MI is erased
909 }
910 Changed |= foldVMV_V_V(MI);
911 }
912 }
913
914 return Changed;
915}
916
918 return new RISCVVectorPeephole();
919}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
MachineBasicBlock & MBB
static uint64_t getConstant(const Value *IndexValue)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
uint64_t IntrinsicInst * II
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define CASE_WHOLE_REGISTER_LMUL(lmul)
static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To)
Check if it's safe to move From down to To, checking that no physical registers are clobbered.
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
SI Lower i1 Copies
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
MachineInstrBundleIterator< const MachineInstr > const_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
mop_iterator operands_begin()
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isCopy() const
const MachineBasicBlock * getParent() const
filtered_mop_range all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
LLVM_ABI bool isSafeToMove(bool &SawStore) const
Return true if it is safe to move this instruction.
LLVM_ABI int findRegisterUseOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false) const
Returns the operand index that is a use of the specific register or -1 if it is not found.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
LLVM_ABI void insert(mop_iterator InsertBefore, ArrayRef< MachineOperand > Ops)
Inserts Ops BEFORE It. Can untie/retie tied operands.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI unsigned getNumExplicitDefs() const
Returns the number of non-implicit definitions.
mop_range explicit_operands()
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
filtered_mop_range all_uses()
Returns an iterator range over all operands that are (explicit or implicit) register uses.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI const TargetRegisterClass * getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
Compute the static register class constraint for operand OpIdx.
MachineOperand class - Representation of each machine instruction operand.
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
const RISCVInstrInfo * getInstrInfo() const override
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Value * getOperand(unsigned i) const
Definition User.h:207
self_iterator getIterator()
Definition ilist_node.h:123
Changed
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
static unsigned getVecPolicyOpNum(const MCInstrDesc &Desc)
static RISCVVType::VLMUL getLMul(uint64_t TSFlags)
static unsigned getVLOpNum(const MCInstrDesc &Desc)
static bool hasVLOp(uint64_t TSFlags)
static bool elementsDependOnMask(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
static bool elementsDependOnVL(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
static bool isValidSEW(unsigned SEW)
bool isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS)
Given two VL operands, do we know that LHS <= RHS?
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
unsigned getDestLog2EEW(const MCInstrDesc &Desc, unsigned Log2SEW)
static constexpr int64_t VLMaxSentinel
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
FunctionPass * createRISCVVectorPeepholePass()