LLVM 17.0.0git
RISCVInsertVSETVLI.cpp
Go to the documentation of this file.
1//===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a function pass that inserts VSETVLI instructions where
10// needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11// instructions.
12//
13// This pass consists of 3 phases:
14//
15// Phase 1 collects how each basic block affects VL/VTYPE.
16//
17// Phase 2 uses the information from phase 1 to do a data flow analysis to
18// propagate the VL/VTYPE changes through the function. This gives us the
19// VL/VTYPE at the start of each basic block.
20//
21// Phase 3 inserts VSETVLI instructions in each basic block. Information from
22// phase 2 is used to prevent inserting a VSETVLI before the first vector
23// instruction in the block if possible.
24//
25//===----------------------------------------------------------------------===//
26
27#include "RISCV.h"
28#include "RISCVSubtarget.h"
31#include <queue>
32using namespace llvm;
33
34#define DEBUG_TYPE "riscv-insert-vsetvli"
35#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
36
38 "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
39 cl::desc("Disable looking through phis when inserting vsetvlis."));
40
42 "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden,
43 cl::desc("Enable strict assertion checking for the dataflow algorithm"));
44
45namespace {
46
47static unsigned getVLOpNum(const MachineInstr &MI) {
48 return RISCVII::getVLOpNum(MI.getDesc());
49}
50
51static unsigned getSEWOpNum(const MachineInstr &MI) {
52 return RISCVII::getSEWOpNum(MI.getDesc());
53}
54
55static bool isVectorConfigInstr(const MachineInstr &MI) {
56 return MI.getOpcode() == RISCV::PseudoVSETVLI ||
57 MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
58 MI.getOpcode() == RISCV::PseudoVSETIVLI;
59}
60
61/// Return true if this is 'vsetvli x0, x0, vtype' which preserves
62/// VL and only sets VTYPE.
63static bool isVLPreservingConfig(const MachineInstr &MI) {
64 if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
65 return false;
66 assert(RISCV::X0 == MI.getOperand(1).getReg());
67 return RISCV::X0 == MI.getOperand(0).getReg();
68}
69
70static uint16_t getRVVMCOpcode(uint16_t RVVPseudoOpcode) {
72 RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode);
73 if (!RVV)
74 return 0;
75 return RVV->BaseInstr;
76}
77
78static bool isScalarMoveInstr(const MachineInstr &MI) {
79 switch (getRVVMCOpcode(MI.getOpcode())) {
80 default:
81 return false;
82 case RISCV::VMV_S_X:
83 case RISCV::VFMV_S_F:
84 return true;
85 }
86}
87
88static bool isVSlideInstr(const MachineInstr &MI) {
89 switch (getRVVMCOpcode(MI.getOpcode())) {
90 default:
91 return false;
92 case RISCV::VSLIDEDOWN_VX:
93 case RISCV::VSLIDEDOWN_VI:
94 case RISCV::VSLIDEUP_VX:
95 case RISCV::VSLIDEUP_VI:
96 return true;
97 }
98}
99
100/// Get the EEW for a load or store instruction. Return std::nullopt if MI is
101/// not a load or store which ignores SEW.
102static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
103 switch (getRVVMCOpcode(MI.getOpcode())) {
104 default:
105 return std::nullopt;
106 case RISCV::VLE8_V:
107 case RISCV::VLSE8_V:
108 case RISCV::VSE8_V:
109 case RISCV::VSSE8_V:
110 return 8;
111 case RISCV::VLE16_V:
112 case RISCV::VLSE16_V:
113 case RISCV::VSE16_V:
114 case RISCV::VSSE16_V:
115 return 16;
116 case RISCV::VLE32_V:
117 case RISCV::VLSE32_V:
118 case RISCV::VSE32_V:
119 case RISCV::VSSE32_V:
120 return 32;
121 case RISCV::VLE64_V:
122 case RISCV::VLSE64_V:
123 case RISCV::VSE64_V:
124 case RISCV::VSSE64_V:
125 return 64;
126 }
127}
128
129/// Return true if this is an operation on mask registers. Note that
130/// this includes both arithmetic/logical ops and load/store (vlm/vsm).
131static bool isMaskRegOp(const MachineInstr &MI) {
132 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
133 return false;
134 const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
135 // A Log2SEW of 0 is an operation on mask registers only.
136 return Log2SEW == 0;
137}
138
139/// Which subfields of VL or VTYPE have values we need to preserve?
140struct DemandedFields {
141 // Some unknown property of VL is used. If demanded, must preserve entire
142 // value.
143 bool VLAny = false;
144 // Only zero vs non-zero is used. If demanded, can change non-zero values.
145 bool VLZeroness = false;
146 bool SEW = false;
147 bool LMUL = false;
148 bool SEWLMULRatio = false;
149 bool TailPolicy = false;
150 bool MaskPolicy = false;
151
152 // Return true if any part of VTYPE was used
153 bool usedVTYPE() const {
154 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
155 }
156
157 // Return true if any property of VL was used
158 bool usedVL() {
159 return VLAny || VLZeroness;
160 }
161
162 // Mark all VTYPE subfields and properties as demanded
163 void demandVTYPE() {
164 SEW = true;
165 LMUL = true;
166 SEWLMULRatio = true;
167 TailPolicy = true;
168 MaskPolicy = true;
169 }
170
171 // Mark all VL properties as demanded
172 void demandVL() {
173 VLAny = true;
174 VLZeroness = true;
175 }
176
177#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
178 /// Support for debugging, callable in GDB: V->dump()
179 LLVM_DUMP_METHOD void dump() const {
180 print(dbgs());
181 dbgs() << "\n";
182 }
183
184 /// Implement operator<<.
185 void print(raw_ostream &OS) const {
186 OS << "{";
187 OS << "VLAny=" << VLAny << ", ";
188 OS << "VLZeroness=" << VLZeroness << ", ";
189 OS << "SEW=" << SEW << ", ";
190 OS << "LMUL=" << LMUL << ", ";
191 OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
192 OS << "TailPolicy=" << TailPolicy << ", ";
193 OS << "MaskPolicy=" << MaskPolicy;
194 OS << "}";
195 }
196#endif
197};
198
199#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
201inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
202 DF.print(OS);
203 return OS;
204}
205#endif
206
207
208/// Return true if the two values of the VTYPE register provided are
209/// indistinguishable from the perspective of an instruction (or set of
210/// instructions) which use only the Used subfields and properties.
211static bool areCompatibleVTYPEs(uint64_t VType1,
212 uint64_t VType2,
213 const DemandedFields &Used) {
214 if (Used.SEW &&
215 RISCVVType::getSEW(VType1) != RISCVVType::getSEW(VType2))
216 return false;
217
218 if (Used.LMUL &&
220 return false;
221
222 if (Used.SEWLMULRatio) {
224 RISCVVType::getVLMUL(VType1));
226 RISCVVType::getVLMUL(VType2));
227 if (Ratio1 != Ratio2)
228 return false;
229 }
230
231 if (Used.TailPolicy &&
233 return false;
234 if (Used.MaskPolicy &&
236 return false;
237 return true;
238}
239
240/// Return the fields and properties demanded by the provided instruction.
241static DemandedFields getDemanded(const MachineInstr &MI) {
242 // Warning: This function has to work on both the lowered (i.e. post
243 // emitVSETVLIs) and pre-lowering forms. The main implication of this is
244 // that it can't use the value of a SEW, VL, or Policy operand as they might
245 // be stale after lowering.
246
247 // Most instructions don't use any of these subfeilds.
248 DemandedFields Res;
249 // Start conservative if registers are used
250 if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL))
251 Res.demandVL();;
252 if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE))
253 Res.demandVTYPE();
254 // Start conservative on the unlowered form too
255 uint64_t TSFlags = MI.getDesc().TSFlags;
257 Res.demandVTYPE();
259 Res.demandVL();
260
261 // Behavior is independent of mask policy.
263 Res.MaskPolicy = false;
264 }
265
266 // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
267 // They instead demand the ratio of the two which is used in computing
268 // EMUL, but which allows us the flexibility to change SEW and LMUL
269 // provided we don't change the ratio.
270 // Note: We assume that the instructions initial SEW is the EEW encoded
271 // in the opcode. This is asserted when constructing the VSETVLIInfo.
272 if (getEEWForLoadStore(MI)) {
273 Res.SEW = false;
274 Res.LMUL = false;
275 }
276
277 // Store instructions don't use the policy fields.
278 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
279 Res.TailPolicy = false;
280 Res.MaskPolicy = false;
281 }
282
283 // If this is a mask reg operation, it only cares about VLMAX.
284 // TODO: Possible extensions to this logic
285 // * Probably ok if available VLMax is larger than demanded
286 // * The policy bits can probably be ignored..
287 if (isMaskRegOp(MI)) {
288 Res.SEW = false;
289 Res.LMUL = false;
290 }
291
292 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
293 if (isScalarMoveInstr(MI)) {
294 Res.LMUL = false;
295 Res.SEWLMULRatio = false;
296 Res.VLAny = false;
297 }
298
299 return Res;
300}
301
302/// Defines the abstract state with which the forward dataflow models the
303/// values of the VL and VTYPE registers after insertion.
304class VSETVLIInfo {
305 union {
306 Register AVLReg;
307 unsigned AVLImm;
308 };
309
310 enum : uint8_t {
312 AVLIsReg,
313 AVLIsImm,
314 Unknown,
315 } State = Uninitialized;
316
317 // Fields from VTYPE.
319 uint8_t SEW = 0;
320 uint8_t TailAgnostic : 1;
321 uint8_t MaskAgnostic : 1;
322 uint8_t SEWLMULRatioOnly : 1;
323
324public:
325 VSETVLIInfo()
326 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
327 SEWLMULRatioOnly(false) {}
328
329 static VSETVLIInfo getUnknown() {
330 VSETVLIInfo Info;
331 Info.setUnknown();
332 return Info;
333 }
334
335 bool isValid() const { return State != Uninitialized; }
336 void setUnknown() { State = Unknown; }
337 bool isUnknown() const { return State == Unknown; }
338
339 void setAVLReg(Register Reg) {
340 AVLReg = Reg;
341 State = AVLIsReg;
342 }
343
344 void setAVLImm(unsigned Imm) {
345 AVLImm = Imm;
346 State = AVLIsImm;
347 }
348
349 bool hasAVLImm() const { return State == AVLIsImm; }
350 bool hasAVLReg() const { return State == AVLIsReg; }
351 Register getAVLReg() const {
352 assert(hasAVLReg());
353 return AVLReg;
354 }
355 unsigned getAVLImm() const {
356 assert(hasAVLImm());
357 return AVLImm;
358 }
359
360 unsigned getSEW() const { return SEW; }
361 RISCVII::VLMUL getVLMUL() const { return VLMul; }
362
363 bool hasNonZeroAVL(const MachineRegisterInfo &MRI) const {
364 if (hasAVLImm())
365 return getAVLImm() > 0;
366 if (hasAVLReg()) {
367 if (getAVLReg() == RISCV::X0)
368 return true;
369 if (MachineInstr *MI = MRI.getVRegDef(getAVLReg());
370 MI && MI->getOpcode() == RISCV::ADDI &&
371 MI->getOperand(1).isReg() && MI->getOperand(2).isImm() &&
372 MI->getOperand(1).getReg() == RISCV::X0 &&
373 MI->getOperand(2).getImm() != 0)
374 return true;
375 return false;
376 }
377 return false;
378 }
379
380 bool hasEquallyZeroAVL(const VSETVLIInfo &Other,
381 const MachineRegisterInfo &MRI) const {
382 if (hasSameAVL(Other))
383 return true;
384 return (hasNonZeroAVL(MRI) && Other.hasNonZeroAVL(MRI));
385 }
386
387 bool hasSameAVL(const VSETVLIInfo &Other) const {
388 if (hasAVLReg() && Other.hasAVLReg())
389 return getAVLReg() == Other.getAVLReg();
390
391 if (hasAVLImm() && Other.hasAVLImm())
392 return getAVLImm() == Other.getAVLImm();
393
394 return false;
395 }
396
397 void setVTYPE(unsigned VType) {
398 assert(isValid() && !isUnknown() &&
399 "Can't set VTYPE for uninitialized or unknown");
401 SEW = RISCVVType::getSEW(VType);
404 }
405 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
406 assert(isValid() && !isUnknown() &&
407 "Can't set VTYPE for uninitialized or unknown");
408 VLMul = L;
409 SEW = S;
411 MaskAgnostic = MA;
412 }
413
414 unsigned encodeVTYPE() const {
415 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
416 "Can't encode VTYPE for uninitialized or unknown");
417 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
418 }
419
420 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
421
422 bool hasSameVTYPE(const VSETVLIInfo &Other) const {
423 assert(isValid() && Other.isValid() &&
424 "Can't compare invalid VSETVLIInfos");
425 assert(!isUnknown() && !Other.isUnknown() &&
426 "Can't compare VTYPE in unknown state");
427 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
428 "Can't compare when only LMUL/SEW ratio is valid.");
429 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
430 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
431 Other.MaskAgnostic);
432 }
433
434 unsigned getSEWLMULRatio() const {
435 assert(isValid() && !isUnknown() &&
436 "Can't use VTYPE for uninitialized or unknown");
437 return RISCVVType::getSEWLMULRatio(SEW, VLMul);
438 }
439
440 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
441 // Note that having the same VLMAX ensures that both share the same
442 // function from AVL to VL; that is, they must produce the same VL value
443 // for any given AVL value.
444 bool hasSameVLMAX(const VSETVLIInfo &Other) const {
445 assert(isValid() && Other.isValid() &&
446 "Can't compare invalid VSETVLIInfos");
447 assert(!isUnknown() && !Other.isUnknown() &&
448 "Can't compare VTYPE in unknown state");
449 return getSEWLMULRatio() == Other.getSEWLMULRatio();
450 }
451
452 bool hasCompatibleVTYPE(const DemandedFields &Used,
453 const VSETVLIInfo &Require) const {
454 return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used);
455 }
456
457 // Determine whether the vector instructions requirements represented by
458 // Require are compatible with the previous vsetvli instruction represented
459 // by this. MI is the instruction whose requirements we're considering.
460 bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
461 const MachineRegisterInfo &MRI) const {
462 assert(isValid() && Require.isValid() &&
463 "Can't compare invalid VSETVLIInfos");
464 assert(!Require.SEWLMULRatioOnly &&
465 "Expected a valid VTYPE for instruction!");
466 // Nothing is compatible with Unknown.
467 if (isUnknown() || Require.isUnknown())
468 return false;
469
470 // If only our VLMAX ratio is valid, then this isn't compatible.
471 if (SEWLMULRatioOnly)
472 return false;
473
474 // If the instruction doesn't need an AVLReg and the SEW matches, consider
475 // it compatible.
476 if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister)
477 if (SEW == Require.SEW)
478 return true;
479
480 if (Used.VLAny && !hasSameAVL(Require))
481 return false;
482
483 if (Used.VLZeroness && !hasEquallyZeroAVL(Require, MRI))
484 return false;
485
486 return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used);
487 }
488
489 bool operator==(const VSETVLIInfo &Other) const {
490 // Uninitialized is only equal to another Uninitialized.
491 if (!isValid())
492 return !Other.isValid();
493 if (!Other.isValid())
494 return !isValid();
495
496 // Unknown is only equal to another Unknown.
497 if (isUnknown())
498 return Other.isUnknown();
499 if (Other.isUnknown())
500 return isUnknown();
501
502 if (!hasSameAVL(Other))
503 return false;
504
505 // If the SEWLMULRatioOnly bits are different, then they aren't equal.
506 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
507 return false;
508
509 // If only the VLMAX is valid, check that it is the same.
510 if (SEWLMULRatioOnly)
511 return hasSameVLMAX(Other);
512
513 // If the full VTYPE is valid, check that it is the same.
514 return hasSameVTYPE(Other);
515 }
516
517 bool operator!=(const VSETVLIInfo &Other) const {
518 return !(*this == Other);
519 }
520
521 // Calculate the VSETVLIInfo visible to a block assuming this and Other are
522 // both predecessors.
523 VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
524 // If the new value isn't valid, ignore it.
525 if (!Other.isValid())
526 return *this;
527
528 // If this value isn't valid, this must be the first predecessor, use it.
529 if (!isValid())
530 return Other;
531
532 // If either is unknown, the result is unknown.
533 if (isUnknown() || Other.isUnknown())
534 return VSETVLIInfo::getUnknown();
535
536 // If we have an exact, match return this.
537 if (*this == Other)
538 return *this;
539
540 // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
541 // return an SEW/LMUL ratio only value.
542 if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
543 VSETVLIInfo MergeInfo = *this;
544 MergeInfo.SEWLMULRatioOnly = true;
545 return MergeInfo;
546 }
547
548 // Otherwise the result is unknown.
549 return VSETVLIInfo::getUnknown();
550 }
551
552#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
553 /// Support for debugging, callable in GDB: V->dump()
554 LLVM_DUMP_METHOD void dump() const {
555 print(dbgs());
556 dbgs() << "\n";
557 }
558
559 /// Implement operator<<.
560 /// @{
561 void print(raw_ostream &OS) const {
562 OS << "{";
563 if (!isValid())
564 OS << "Uninitialized";
565 if (isUnknown())
566 OS << "unknown";
567 if (hasAVLReg())
568 OS << "AVLReg=" << (unsigned)AVLReg;
569 if (hasAVLImm())
570 OS << "AVLImm=" << (unsigned)AVLImm;
571 OS << ", "
572 << "VLMul=" << (unsigned)VLMul << ", "
573 << "SEW=" << (unsigned)SEW << ", "
574 << "TailAgnostic=" << (bool)TailAgnostic << ", "
575 << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
576 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
577 }
578#endif
579};
580
581#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
583inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
584 V.print(OS);
585 return OS;
586}
587#endif
588
589struct BlockData {
590 // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers
591 // made by this block. Calculated in Phase 1.
592 VSETVLIInfo Change;
593
594 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
595 // block. Calculated in Phase 2.
596 VSETVLIInfo Exit;
597
598 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
599 // blocks. Calculated in Phase 2, and used by Phase 3.
600 VSETVLIInfo Pred;
601
602 // Keeps track of whether the block is already in the queue.
603 bool InQueue = false;
604
605 BlockData() = default;
606};
607
608class RISCVInsertVSETVLI : public MachineFunctionPass {
609 const TargetInstrInfo *TII;
611
612 std::vector<BlockData> BlockInfo;
613 std::queue<const MachineBasicBlock *> WorkList;
614
615public:
616 static char ID;
617
618 RISCVInsertVSETVLI() : MachineFunctionPass(ID) {
620 }
621 bool runOnMachineFunction(MachineFunction &MF) override;
622
623 void getAnalysisUsage(AnalysisUsage &AU) const override {
624 AU.setPreservesCFG();
626 }
627
628 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
629
630private:
631 bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require,
632 const VSETVLIInfo &CurInfo) const;
633 bool needVSETVLIPHI(const VSETVLIInfo &Require,
634 const MachineBasicBlock &MBB) const;
635 void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
636 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
637 void insertVSETVLI(MachineBasicBlock &MBB,
639 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
640
641 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI);
642 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI);
643 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB);
644 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
645 void emitVSETVLIs(MachineBasicBlock &MBB);
646 void doLocalPostpass(MachineBasicBlock &MBB);
647 void doPRE(MachineBasicBlock &MBB);
648 void insertReadVL(MachineBasicBlock &MBB);
649};
650
651} // end anonymous namespace
652
653char RISCVInsertVSETVLI::ID = 0;
654
656 false, false)
657
658static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
660 VSETVLIInfo InstrInfo;
661
663 unsigned UseOpIdx;
664 if (MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
665 // Start with undisturbed.
666 TailAgnostic = false;
667 MaskAgnostic = false;
668
669 // If there is a policy operand, use it.
671 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
672 uint64_t Policy = Op.getImm();
674 "Invalid Policy Value");
677 }
678
679 // If the tied operand is an IMPLICIT_DEF we can use TailAgnostic and
680 // MaskAgnostic.
681 const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
682 MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg());
683 if (UseMI && UseMI->isImplicitDef()) {
684 TailAgnostic = true;
685 MaskAgnostic = true;
686 }
687 // Some pseudo instructions force a tail agnostic policy despite having a
688 // tied def.
690 TailAgnostic = true;
691
693 MaskAgnostic = true;
694 } else {
695 // If there is no tied operand,, there shouldn't be a policy operand.
696 assert(!RISCVII::hasVecPolicyOp(TSFlags) && "Unexpected policy operand");
697 // No tied operand use agnostic policies.
698 TailAgnostic = true;
699 MaskAgnostic = true;
700 }
701
703
704 unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
705 // A Log2SEW of 0 is an operation on mask registers only.
706 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
707 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
708
710 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
711 if (VLOp.isImm()) {
712 int64_t Imm = VLOp.getImm();
713 // Conver the VLMax sentintel to X0 register.
714 if (Imm == RISCV::VLMaxSentinel)
715 InstrInfo.setAVLReg(RISCV::X0);
716 else
717 InstrInfo.setAVLImm(Imm);
718 } else {
719 InstrInfo.setAVLReg(VLOp.getReg());
720 }
721 } else {
722 InstrInfo.setAVLReg(RISCV::NoRegister);
723 }
724#ifndef NDEBUG
725 if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
726 assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
727 }
728#endif
730
731 return InstrInfo;
732}
733
734void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
735 const VSETVLIInfo &Info,
736 const VSETVLIInfo &PrevInfo) {
737 DebugLoc DL = MI.getDebugLoc();
738 insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
739}
740
741// Return a VSETVLIInfo representing the changes made by this VSETVLI or
742// VSETIVLI instruction.
743static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
744 VSETVLIInfo NewInfo;
745 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
746 NewInfo.setAVLImm(MI.getOperand(1).getImm());
747 } else {
748 assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
749 MI.getOpcode() == RISCV::PseudoVSETVLIX0);
750 Register AVLReg = MI.getOperand(1).getReg();
751 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
752 "Can't handle X0, X0 vsetvli yet");
753 NewInfo.setAVLReg(AVLReg);
754 }
755 NewInfo.setVTYPE(MI.getOperand(2).getImm());
756
757 return NewInfo;
758}
759
760void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
762 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
763
764 if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
765 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
766 // VLMAX.
767 if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
768 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
770 .addReg(RISCV::X0, RegState::Kill)
771 .addImm(Info.encodeVTYPE())
772 .addReg(RISCV::VL, RegState::Implicit);
773 return;
774 }
775
776 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
777 // it has the same VLMAX we want and the last VL/VTYPE we observed is the
778 // same, we can use the X0, X0 form.
779 if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg() &&
780 Info.getAVLReg().isVirtual()) {
781 if (MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg())) {
782 if (isVectorConfigInstr(*DefMI)) {
783 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
784 if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
785 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
787 .addReg(RISCV::X0, RegState::Kill)
788 .addImm(Info.encodeVTYPE())
789 .addReg(RISCV::VL, RegState::Implicit);
790 return;
791 }
792 }
793 }
794 }
795 }
796
797 if (Info.hasAVLImm()) {
798 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
800 .addImm(Info.getAVLImm())
801 .addImm(Info.encodeVTYPE());
802 return;
803 }
804
805 Register AVLReg = Info.getAVLReg();
806 if (AVLReg == RISCV::NoRegister) {
807 // We can only use x0, x0 if there's no chance of the vtype change causing
808 // the previous vl to become invalid.
809 if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
810 Info.hasSameVLMAX(PrevInfo)) {
811 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
813 .addReg(RISCV::X0, RegState::Kill)
814 .addImm(Info.encodeVTYPE())
815 .addReg(RISCV::VL, RegState::Implicit);
816 return;
817 }
818 // Otherwise use an AVL of 0 to avoid depending on previous vl.
819 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
821 .addImm(0)
822 .addImm(Info.encodeVTYPE());
823 return;
824 }
825
826 if (AVLReg.isVirtual())
827 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
828
829 // Use X0 as the DestReg unless AVLReg is X0. We also need to change the
830 // opcode if the AVLReg is X0 as they have different register classes for
831 // the AVL operand.
832 Register DestReg = RISCV::X0;
833 unsigned Opcode = RISCV::PseudoVSETVLI;
834 if (AVLReg == RISCV::X0) {
835 DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
836 Opcode = RISCV::PseudoVSETVLIX0;
837 }
838 BuildMI(MBB, InsertPt, DL, TII->get(Opcode))
840 .addReg(AVLReg)
841 .addImm(Info.encodeVTYPE());
842}
843
845 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
846 return Fractional || LMul == 1;
847}
848
849/// Return true if a VSETVLI is required to transition from CurInfo to Require
850/// before MI.
851bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
852 const VSETVLIInfo &Require,
853 const VSETVLIInfo &CurInfo) const {
854 assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, MRI));
855
856 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
857 return true;
858
859 DemandedFields Used = getDemanded(MI);
860
861 if (isScalarMoveInstr(MI)) {
862 // For vmv.s.x and vfmv.s.f, if writing to an implicit_def operand, we don't
863 // need to preserve any other bits and are thus compatible with any larger,
864 // etype and can disregard policy bits. Warning: It's tempting to try doing
865 // this for any tail agnostic operation, but we can't as TA requires
866 // tail lanes to either be the original value or -1. We are writing
867 // unknown bits to the lanes here.
868 auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg());
869 if (VRegDef && VRegDef->isImplicitDef() &&
870 CurInfo.getSEW() >= Require.getSEW()) {
871 Used.SEW = false;
872 Used.TailPolicy = false;
873 }
874 }
875
876 // A slidedown/slideup with an IMPLICIT_DEF merge op can freely clobber
877 // elements not copied from the source vector (e.g. masked off, tail, or
878 // slideup's prefix). Notes:
879 // * We can't modify SEW here since the slide amount is in units of SEW.
880 // * VL=1 is special only because we have existing support for zero vs
881 // non-zero VL. We could generalize this if we had a VL > C predicate.
882 // * The LMUL1 restriction is for machines whose latency may depend on VL.
883 // * As above, this is only legal for IMPLICIT_DEF, not TA.
884 if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
885 isLMUL1OrSmaller(CurInfo.getVLMUL())) {
886 auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg());
887 if (VRegDef && VRegDef->isImplicitDef()) {
888 Used.VLAny = false;
889 Used.VLZeroness = true;
890 Used.LMUL = false;
891 Used.TailPolicy = false;
892 }
893 }
894
895 if (CurInfo.isCompatible(Used, Require, *MRI))
896 return false;
897
898 // We didn't find a compatible value. If our AVL is a virtual register,
899 // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
900 // and the last VL/VTYPE we observed is the same, we don't need a
901 // VSETVLI here.
902 if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() &&
903 CurInfo.hasCompatibleVTYPE(Used, Require)) {
904 if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
905 if (isVectorConfigInstr(*DefMI)) {
906 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
907 if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
908 return false;
909 }
910 }
911 }
912
913 return true;
914}
915
916// Given an incoming state reaching MI, modifies that state so that it is minimally
917// compatible with MI. The resulting state is guaranteed to be semantically legal
918// for MI, but may not be the state requested by MI.
919void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) {
920 uint64_t TSFlags = MI.getDesc().TSFlags;
922 return;
923
924 const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
925 if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
926 return;
927
928 const VSETVLIInfo PrevInfo = Info;
929 Info = NewInfo;
930
932 return;
933
934 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and
935 // VL > 0. We can discard the user requested AVL and just use the last
936 // one if we can prove it equally zero. This removes a vsetvli entirely
937 // if the types match or allows use of cheaper avl preserving variant
938 // if VLMAX doesn't change. If VLMAX might change, we couldn't use
939 // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to
940 // prevent extending live range of an avl register operand.
941 // TODO: We can probably relax this for immediates.
942 if (isScalarMoveInstr(MI) && PrevInfo.isValid() &&
943 PrevInfo.hasEquallyZeroAVL(Info, *MRI) &&
944 Info.hasSameVLMAX(PrevInfo)) {
945 if (PrevInfo.hasAVLImm())
946 Info.setAVLImm(PrevInfo.getAVLImm());
947 else
948 Info.setAVLReg(PrevInfo.getAVLReg());
949 return;
950 }
951
952 // If AVL is defined by a vsetvli with the same VLMAX, we can
953 // replace the AVL operand with the AVL of the defining vsetvli.
954 // We avoid general register AVLs to avoid extending live ranges
955 // without being sure we can kill the original source reg entirely.
956 if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual())
957 return;
958 MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg());
959 if (!DefMI || !isVectorConfigInstr(*DefMI))
960 return;
961
962 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
963 if (DefInfo.hasSameVLMAX(Info) &&
964 (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) {
965 if (DefInfo.hasAVLImm())
966 Info.setAVLImm(DefInfo.getAVLImm());
967 else
968 Info.setAVLReg(DefInfo.getAVLReg());
969 return;
970 }
971}
972
973// Given a state with which we evaluated MI (see transferBefore above for why
974// this might be different that the state MI requested), modify the state to
975// reflect the changes MI might make.
976void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) {
977 if (isVectorConfigInstr(MI)) {
979 return;
980 }
981
983 // Update AVL to vl-output of the fault first load.
984 Info.setAVLReg(MI.getOperand(1).getReg());
985 return;
986 }
987
988 // If this is something that updates VL/VTYPE that we don't know about, set
989 // the state to unknown.
990 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
991 MI.modifiesRegister(RISCV::VTYPE))
992 Info = VSETVLIInfo::getUnknown();
993}
994
995bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
996 bool HadVectorOp = false;
997
998 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
999 BBInfo.Change = BBInfo.Pred;
1000 for (const MachineInstr &MI : MBB) {
1001 transferBefore(BBInfo.Change, MI);
1002
1003 if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1004 HadVectorOp = true;
1005
1006 transferAfter(BBInfo.Change, MI);
1007 }
1008
1009 return HadVectorOp;
1010}
1011
1012void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1013
1014 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1015
1016 BBInfo.InQueue = false;
1017
1018 // Start with the previous entry so that we keep the most conservative state
1019 // we have ever found.
1020 VSETVLIInfo InInfo = BBInfo.Pred;
1021 if (MBB.pred_empty()) {
1022 // There are no predecessors, so use the default starting status.
1023 InInfo.setUnknown();
1024 } else {
1026 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
1027 }
1028
1029 // If we don't have any valid predecessor value, wait until we do.
1030 if (!InInfo.isValid())
1031 return;
1032
1033 // If no change, no need to rerun block
1034 if (InInfo == BBInfo.Pred)
1035 return;
1036
1037 BBInfo.Pred = InInfo;
1038 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1039 << " changed to " << BBInfo.Pred << "\n");
1040
1041 // Note: It's tempting to cache the state changes here, but due to the
1042 // compatibility checks performed a blocks output state can change based on
1043 // the input state. To cache, we'd have to add logic for finding
1044 // never-compatible state changes.
1045 computeVLVTYPEChanges(MBB);
1046 VSETVLIInfo TmpStatus = BBInfo.Change;
1047
1048 // If the new exit value matches the old exit value, we don't need to revisit
1049 // any blocks.
1050 if (BBInfo.Exit == TmpStatus)
1051 return;
1052
1053 BBInfo.Exit = TmpStatus;
1054 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1055 << " changed to " << BBInfo.Exit << "\n");
1056
1057 // Add the successors to the work list so we can propagate the changed exit
1058 // status.
1059 for (MachineBasicBlock *S : MBB.successors())
1060 if (!BlockInfo[S->getNumber()].InQueue) {
1061 BlockInfo[S->getNumber()].InQueue = true;
1062 WorkList.push(S);
1063 }
1064}
1065
1066// If we weren't able to prove a vsetvli was directly unneeded, it might still
1067// be unneeded if the AVL is a phi node where all incoming values are VL
1068// outputs from the last VSETVLI in their respective basic blocks.
1069bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1070 const MachineBasicBlock &MBB) const {
1072 return true;
1073
1074 if (!Require.hasAVLReg())
1075 return true;
1076
1077 Register AVLReg = Require.getAVLReg();
1078 if (!AVLReg.isVirtual())
1079 return true;
1080
1081 // We need the AVL to be produce by a PHI node in this basic block.
1082 MachineInstr *PHI = MRI->getVRegDef(AVLReg);
1083 if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
1084 return true;
1085
1086 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
1087 PHIOp += 2) {
1088 Register InReg = PHI->getOperand(PHIOp).getReg();
1089 MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
1090 const BlockData &PBBInfo = BlockInfo[PBB->getNumber()];
1091 // If the exit from the predecessor has the VTYPE we are looking for
1092 // we might be able to avoid a VSETVLI.
1093 if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require))
1094 return true;
1095
1096 // We need the PHI input to the be the output of a VSET(I)VLI.
1097 MachineInstr *DefMI = MRI->getVRegDef(InReg);
1098 if (!DefMI || !isVectorConfigInstr(*DefMI))
1099 return true;
1100
1101 // We found a VSET(I)VLI make sure it matches the output of the
1102 // predecessor block.
1103 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1104 if (!DefInfo.hasSameAVL(PBBInfo.Exit) ||
1105 !DefInfo.hasSameVTYPE(PBBInfo.Exit))
1106 return true;
1107 }
1108
1109 // If all the incoming values to the PHI checked out, we don't need
1110 // to insert a VSETVLI.
1111 return false;
1112}
1113
1114void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1115 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1116 // Track whether the prefix of the block we've scanned is transparent
1117 // (meaning has not yet changed the abstract state).
1118 bool PrefixTransparent = true;
1119 for (MachineInstr &MI : MBB) {
1120 const VSETVLIInfo PrevInfo = CurInfo;
1121 transferBefore(CurInfo, MI);
1122
1123 // If this is an explicit VSETVLI or VSETIVLI, update our state.
1124 if (isVectorConfigInstr(MI)) {
1125 // Conservatively, mark the VL and VTYPE as live.
1126 assert(MI.getOperand(3).getReg() == RISCV::VL &&
1127 MI.getOperand(4).getReg() == RISCV::VTYPE &&
1128 "Unexpected operands where VL and VTYPE should be");
1129 MI.getOperand(3).setIsDead(false);
1130 MI.getOperand(4).setIsDead(false);
1131 PrefixTransparent = false;
1132 }
1133
1134 uint64_t TSFlags = MI.getDesc().TSFlags;
1136 if (PrevInfo != CurInfo) {
1137 // If this is the first implicit state change, and the state change
1138 // requested can be proven to produce the same register contents, we
1139 // can skip emitting the actual state change and continue as if we
1140 // had since we know the GPR result of the implicit state change
1141 // wouldn't be used and VL/VTYPE registers are correct. Note that
1142 // we *do* need to model the state as if it changed as while the
1143 // register contents are unchanged, the abstract model can change.
1144 if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
1145 insertVSETVLI(MBB, MI, CurInfo, PrevInfo);
1146 PrefixTransparent = false;
1147 }
1148
1150 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1151 if (VLOp.isReg()) {
1152 // Erase the AVL operand from the instruction.
1153 VLOp.setReg(RISCV::NoRegister);
1154 VLOp.setIsKill(false);
1155 }
1156 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1157 /*isImp*/ true));
1158 }
1159 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1160 /*isImp*/ true));
1161 }
1162
1163 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
1164 MI.modifiesRegister(RISCV::VTYPE))
1165 PrefixTransparent = false;
1166
1167 transferAfter(CurInfo, MI);
1168 }
1169
1170 // If we reach the end of the block and our current info doesn't match the
1171 // expected info, insert a vsetvli to correct.
1172 if (!UseStrictAsserts) {
1173 const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit;
1174 if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() &&
1175 CurInfo != ExitInfo) {
1176 // Note there's an implicit assumption here that terminators never use
1177 // or modify VL or VTYPE. Also, fallthrough will return end().
1178 auto InsertPt = MBB.getFirstInstrTerminator();
1179 insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo,
1180 CurInfo);
1181 CurInfo = ExitInfo;
1182 }
1183 }
1184
1185 if (UseStrictAsserts && CurInfo.isValid()) {
1186 const auto &Info = BlockInfo[MBB.getNumber()];
1187 if (CurInfo != Info.Exit) {
1188 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1189 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
1190 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n");
1191 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n");
1192 }
1193 assert(CurInfo == Info.Exit &&
1194 "InsertVSETVLI dataflow invariant violated");
1195 }
1196}
1197
1198/// Return true if the VL value configured must be equal to the requested one.
1199static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) {
1200 if (!Info.hasAVLImm())
1201 // VLMAX is always the same value.
1202 // TODO: Could extend to other registers by looking at the associated vreg
1203 // def placement.
1204 return RISCV::X0 == Info.getAVLReg();
1205
1206 unsigned AVL = Info.getAVLImm();
1207 unsigned SEW = Info.getSEW();
1208 unsigned AVLInBits = AVL * SEW;
1209
1210 unsigned LMul;
1211 bool Fractional;
1212 std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL());
1213
1214 if (Fractional)
1215 return ST.getRealMinVLen() / LMul >= AVLInBits;
1216 return ST.getRealMinVLen() * LMul >= AVLInBits;
1217}
1218
1219/// Perform simple partial redundancy elimination of the VSETVLI instructions
1220/// we're about to insert by looking for cases where we can PRE from the
1221/// beginning of one block to the end of one of its predecessors. Specifically,
1222/// this is geared to catch the common case of a fixed length vsetvl in a single
1223/// block loop when it could execute once in the preheader instead.
1224void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1225 const MachineFunction &MF = *MBB.getParent();
1227
1228 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1229 return;
1230
1231 MachineBasicBlock *UnavailablePred = nullptr;
1232 VSETVLIInfo AvailableInfo;
1233 for (MachineBasicBlock *P : MBB.predecessors()) {
1234 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1235 if (PredInfo.isUnknown()) {
1236 if (UnavailablePred)
1237 return;
1238 UnavailablePred = P;
1239 } else if (!AvailableInfo.isValid()) {
1240 AvailableInfo = PredInfo;
1241 } else if (AvailableInfo != PredInfo) {
1242 return;
1243 }
1244 }
1245
1246 // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1247 // phase 3.
1248 if (!UnavailablePred || !AvailableInfo.isValid())
1249 return;
1250
1251 // Critical edge - TODO: consider splitting?
1252 if (UnavailablePred->succ_size() != 1)
1253 return;
1254
1255 // If VL can be less than AVL, then we can't reduce the frequency of exec.
1256 if (!hasFixedResult(AvailableInfo, ST))
1257 return;
1258
1259 // Model the effect of changing the input state of the block MBB to
1260 // AvailableInfo. We're looking for two issues here; one legality,
1261 // one profitability.
1262 // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1263 // may hit the end of the block with a different end state. We can
1264 // not make this change without reflowing later blocks as well.
1265 // 2) If we don't actually remove a transition, inserting a vsetvli
1266 // into the predecessor block would be correct, but unprofitable.
1267 VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
1268 VSETVLIInfo CurInfo = AvailableInfo;
1269 int TransitionsRemoved = 0;
1270 for (const MachineInstr &MI : MBB) {
1271 const VSETVLIInfo LastInfo = CurInfo;
1272 const VSETVLIInfo LastOldInfo = OldInfo;
1273 transferBefore(CurInfo, MI);
1274 transferBefore(OldInfo, MI);
1275 if (CurInfo == LastInfo)
1276 TransitionsRemoved++;
1277 if (LastOldInfo == OldInfo)
1278 TransitionsRemoved--;
1279 transferAfter(CurInfo, MI);
1280 transferAfter(OldInfo, MI);
1281 if (CurInfo == OldInfo)
1282 // Convergence. All transitions after this must match by construction.
1283 break;
1284 }
1285 if (CurInfo != OldInfo || TransitionsRemoved <= 0)
1286 // Issues 1 and 2 above
1287 return;
1288
1289 // Finally, update both data flow state and insert the actual vsetvli.
1290 // Doing both keeps the code in sync with the dataflow results, which
1291 // is critical for correctness of phase 3.
1292 auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
1293 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1294 << UnavailablePred->getName() << " with state "
1295 << AvailableInfo << "\n");
1296 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1297 BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1298
1299 // Note there's an implicit assumption here that terminators never use
1300 // or modify VL or VTYPE. Also, fallthrough will return end().
1301 auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1302 insertVSETVLI(*UnavailablePred, InsertPt,
1303 UnavailablePred->findDebugLoc(InsertPt),
1304 AvailableInfo, OldExit);
1305}
1306
1307static void doUnion(DemandedFields &A, DemandedFields B) {
1308 A.VLAny |= B.VLAny;
1309 A.VLZeroness |= B.VLZeroness;
1310 A.SEW |= B.SEW;
1311 A.LMUL |= B.LMUL;
1312 A.SEWLMULRatio |= B.SEWLMULRatio;
1313 A.TailPolicy |= B.TailPolicy;
1314 A.MaskPolicy |= B.MaskPolicy;
1315}
1316
1317static bool isNonZeroAVL(const MachineOperand &MO) {
1318 if (MO.isReg())
1319 return RISCV::X0 == MO.getReg();
1320 assert(MO.isImm());
1321 return 0 != MO.getImm();
1322}
1323
1324// Return true if we can mutate PrevMI to match MI without changing any the
1325// fields which would be observed.
1326static bool canMutatePriorConfig(const MachineInstr &PrevMI,
1327 const MachineInstr &MI,
1328 const DemandedFields &Used) {
1329 // If the VL values aren't equal, return false if either a) the former is
1330 // demanded, or b) we can't rewrite the former to be the later for
1331 // implementation reasons.
1332 if (!isVLPreservingConfig(MI)) {
1333 if (Used.VLAny)
1334 return false;
1335
1336 // TODO: Requires more care in the mutation...
1337 if (isVLPreservingConfig(PrevMI))
1338 return false;
1339
1340 // We don't bother to handle the equally zero case here as it's largely
1341 // uninteresting.
1342 if (Used.VLZeroness &&
1343 (!isNonZeroAVL(MI.getOperand(1)) ||
1344 !isNonZeroAVL(PrevMI.getOperand(1))))
1345 return false;
1346
1347 // TODO: Track whether the register is defined between
1348 // PrevMI and MI.
1349 if (MI.getOperand(1).isReg() &&
1350 RISCV::X0 != MI.getOperand(1).getReg())
1351 return false;
1352
1353 // TODO: We need to change the result register to allow this rewrite
1354 // without the result forming a vl preserving vsetvli which is not
1355 // a correct state merge.
1356 if (PrevMI.getOperand(0).getReg() == RISCV::X0 &&
1357 MI.getOperand(1).isReg())
1358 return false;
1359 }
1360
1361 if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm())
1362 return false;
1363
1364 auto PriorVType = PrevMI.getOperand(2).getImm();
1365 auto VType = MI.getOperand(2).getImm();
1366 return areCompatibleVTYPEs(PriorVType, VType, Used);
1367}
1368
1369void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
1370 MachineInstr *NextMI = nullptr;
1371 // We can have arbitrary code in successors, so VL and VTYPE
1372 // must be considered demanded.
1373 DemandedFields Used;
1374 Used.demandVL();
1375 Used.demandVTYPE();
1377 for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1378
1379 if (!isVectorConfigInstr(MI)) {
1380 doUnion(Used, getDemanded(MI));
1381 continue;
1382 }
1383
1384 Register VRegDef = MI.getOperand(0).getReg();
1385 if (VRegDef != RISCV::X0 &&
1386 !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
1387 Used.demandVL();
1388
1389 if (NextMI) {
1390 if (!Used.usedVL() && !Used.usedVTYPE()) {
1391 ToDelete.push_back(&MI);
1392 // Leave NextMI unchanged
1393 continue;
1394 } else if (canMutatePriorConfig(MI, *NextMI, Used)) {
1395 if (!isVLPreservingConfig(*NextMI)) {
1396 if (NextMI->getOperand(1).isImm())
1397 MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
1398 else
1399 MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
1400 MI.setDesc(NextMI->getDesc());
1401 }
1402 MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
1403 ToDelete.push_back(NextMI);
1404 // fallthrough
1405 }
1406 }
1407 NextMI = &MI;
1408 Used = getDemanded(MI);
1409 }
1410
1411 for (auto *MI : ToDelete)
1412 MI->eraseFromParent();
1413}
1414
1415void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1416 for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1417 MachineInstr &MI = *I++;
1419 Register VLOutput = MI.getOperand(1).getReg();
1420 if (!MRI->use_nodbg_empty(VLOutput))
1421 BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL),
1422 VLOutput);
1423 // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1424 MI.getOperand(1).setReg(RISCV::X0);
1425 }
1426 }
1427}
1428
1429bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1430 // Skip if the vector extension is not enabled.
1432 if (!ST.hasVInstructions())
1433 return false;
1434
1435 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1436
1437 TII = ST.getInstrInfo();
1438 MRI = &MF.getRegInfo();
1439
1440 assert(BlockInfo.empty() && "Expect empty block infos");
1441 BlockInfo.resize(MF.getNumBlockIDs());
1442
1443 bool HaveVectorOp = false;
1444
1445 // Phase 1 - determine how VL/VTYPE are affected by the each block.
1446 for (const MachineBasicBlock &MBB : MF) {
1447 HaveVectorOp |= computeVLVTYPEChanges(MBB);
1448 // Initial exit state is whatever change we found in the block.
1449 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1450 BBInfo.Exit = BBInfo.Change;
1451 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1452 << " is " << BBInfo.Exit << "\n");
1453
1454 }
1455
1456 // If we didn't find any instructions that need VSETVLI, we're done.
1457 if (!HaveVectorOp) {
1458 BlockInfo.clear();
1459 return false;
1460 }
1461
1462 // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1463 // blocks to the list here, but will also add any that need to be revisited
1464 // during Phase 2 processing.
1465 for (const MachineBasicBlock &MBB : MF) {
1466 WorkList.push(&MBB);
1467 BlockInfo[MBB.getNumber()].InQueue = true;
1468 }
1469 while (!WorkList.empty()) {
1470 const MachineBasicBlock &MBB = *WorkList.front();
1471 WorkList.pop();
1472 computeIncomingVLVTYPE(MBB);
1473 }
1474
1475 // Perform partial redundancy elimination of vsetvli transitions.
1476 for (MachineBasicBlock &MBB : MF)
1477 doPRE(MBB);
1478
1479 // Phase 3 - add any vsetvli instructions needed in the block. Use the
1480 // Phase 2 information to avoid adding vsetvlis before the first vector
1481 // instruction in the block if the VL/VTYPE is satisfied by its
1482 // predecessors.
1483 for (MachineBasicBlock &MBB : MF)
1484 emitVSETVLIs(MBB);
1485
1486 // Now that all vsetvlis are explicit, go through and do block local
1487 // DSE and peephole based demanded fields based transforms. Note that
1488 // this *must* be done outside the main dataflow so long as we allow
1489 // any cross block analysis within the dataflow. We can't have both
1490 // demanded fields based mutation and non-local analysis in the
1491 // dataflow at the same time without introducing inconsistencies.
1492 for (MachineBasicBlock &MBB : MF)
1493 doLocalPostpass(MBB);
1494
1495 // Once we're fully done rewriting all the instructions, do a final pass
1496 // through to check for VSETVLIs which write to an unused destination.
1497 // For the non X0, X0 variant, we can replace the destination register
1498 // with X0 to reduce register pressure. This is really a generic
1499 // optimization which can be applied to any dead def (TODO: generalize).
1500 for (MachineBasicBlock &MBB : MF) {
1501 for (MachineInstr &MI : MBB) {
1502 if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
1503 MI.getOpcode() == RISCV::PseudoVSETIVLI) {
1504 Register VRegDef = MI.getOperand(0).getReg();
1505 if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef))
1506 MI.getOperand(0).setReg(RISCV::X0);
1507 }
1508 }
1509 }
1510
1511 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1512 // of VLEFF/VLSEGFF.
1513 for (MachineBasicBlock &MBB : MF)
1514 insertReadVL(MBB);
1515
1516 BlockInfo.clear();
1517 return HaveVectorOp;
1518}
1519
1520/// Returns an instance of the Insert VSETVLI pass.
1522 return new RISCVInsertVSETVLI();
1523}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
aarch64 promote const
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_ATTRIBUTE_USED
Definition: Compiler.h:139
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:492
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static RegisterPass< DebugifyFunctionPass > DF("debugify-function", "Attach debug info to a function")
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1269
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static ValueLatticeElement intersect(const ValueLatticeElement &A, const ValueLatticeElement &B)
Combine two sets of facts about the same value into a single set of facts.
#define I(x, y, z)
Definition: MD5.cpp:58
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI)
static bool isNonZeroAVL(const MachineOperand &MO)
#define RISCV_INSERT_VSETVLI_NAME
uint64_t const MachineRegisterInfo * MRI
return InstrInfo
unsigned UseOpIdx
unsigned Log2SEW
static bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI, const DemandedFields &Used)
RISCVII::VLMUL VLMul
assert(RISCVVType::isValidSEW(SEW) &&"Unexpected SEW")
InstrInfo setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic)
static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL)
static void doUnion(DemandedFields &A, DemandedFields B)
unsigned SEW
static cl::opt< bool > UseStrictAsserts("riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden, cl::desc("Enable strict assertion checking for the dataflow algorithm"))
static cl::opt< bool > DisableInsertVSETVLPHIOpt("riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden, cl::desc("Disable looking through phis when inserting vsetvlis."))
bool MaskAgnostic
bool TailAgnostic
uint64_t TSFlags
#define DEBUG_TYPE
static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST)
Return true if the VL value configured must be equal to the requested one.
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
raw_pwrite_stream & OS
BlockData()=default
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
reverse_iterator rend()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
unsigned succ_size() const
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
reverse_iterator rbegin()
iterator_range< pred_iterator > predecessors()
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
bool isImplicitDef() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:513
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
static bool usesMaskPolicy(uint64_t TSFlags)
static unsigned getVLOpNum(const MCInstrDesc &Desc)
static bool doesForceTailAgnostic(uint64_t TSFlags)
static VLMUL getLMul(uint64_t TSFlags)
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
static bool hasSEWOp(uint64_t TSFlags)
static bool isTailAgnostic(unsigned VType)
static RISCVII::VLMUL getVLMUL(unsigned VType)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul)
static bool isMaskAgnostic(unsigned VType)
static bool isValidSEW(unsigned SEW)
unsigned encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic)
static unsigned getSEW(unsigned VType)
bool isFaultFirstLoad(const MachineInstr &MI)
static constexpr int64_t VLMaxSentinel
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Uninitialized
Definition: Threading.h:61
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2052
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void initializeRISCVInsertVSETVLIPass(PassRegistry &)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FunctionPass * createRISCVInsertVSETVLIPass()
Returns an instance of the Insert VSETVLI pass.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Status intersect(const Status &S) const