LLVM 18.0.0git
RISCVInsertVSETVLI.cpp
Go to the documentation of this file.
1//===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a function pass that inserts VSETVLI instructions where
10// needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11// instructions.
12//
13// This pass consists of 3 phases:
14//
15// Phase 1 collects how each basic block affects VL/VTYPE.
16//
17// Phase 2 uses the information from phase 1 to do a data flow analysis to
18// propagate the VL/VTYPE changes through the function. This gives us the
19// VL/VTYPE at the start of each basic block.
20//
21// Phase 3 inserts VSETVLI instructions in each basic block. Information from
22// phase 2 is used to prevent inserting a VSETVLI before the first vector
23// instruction in the block if possible.
24//
25//===----------------------------------------------------------------------===//
26
27#include "RISCV.h"
28#include "RISCVSubtarget.h"
31#include <queue>
32using namespace llvm;
33
34#define DEBUG_TYPE "riscv-insert-vsetvli"
35#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
36
38 "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
39 cl::desc("Disable looking through phis when inserting vsetvlis."));
40
42 "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden,
43 cl::desc("Enable strict assertion checking for the dataflow algorithm"));
44
45namespace {
46
47static unsigned getVLOpNum(const MachineInstr &MI) {
48 return RISCVII::getVLOpNum(MI.getDesc());
49}
50
51static unsigned getSEWOpNum(const MachineInstr &MI) {
52 return RISCVII::getSEWOpNum(MI.getDesc());
53}
54
55static bool isVectorConfigInstr(const MachineInstr &MI) {
56 return MI.getOpcode() == RISCV::PseudoVSETVLI ||
57 MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
58 MI.getOpcode() == RISCV::PseudoVSETIVLI;
59}
60
61/// Return true if this is 'vsetvli x0, x0, vtype' which preserves
62/// VL and only sets VTYPE.
63static bool isVLPreservingConfig(const MachineInstr &MI) {
64 if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
65 return false;
66 assert(RISCV::X0 == MI.getOperand(1).getReg());
67 return RISCV::X0 == MI.getOperand(0).getReg();
68}
69
70static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) {
71 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
72 default:
73 return false;
74 case RISCV::VFMV_S_F:
75 case RISCV::VFMV_V_F:
76 return true;
77 }
78}
79
80static bool isScalarExtractInstr(const MachineInstr &MI) {
81 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
82 default:
83 return false;
84 case RISCV::VMV_X_S:
85 case RISCV::VFMV_F_S:
86 return true;
87 }
88}
89
90static bool isScalarInsertInstr(const MachineInstr &MI) {
91 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
92 default:
93 return false;
94 case RISCV::VMV_S_X:
95 case RISCV::VFMV_S_F:
96 return true;
97 }
98}
99
100static bool isScalarSplatInstr(const MachineInstr &MI) {
101 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
102 default:
103 return false;
104 case RISCV::VMV_V_I:
105 case RISCV::VMV_V_X:
106 case RISCV::VFMV_V_F:
107 return true;
108 }
109}
110
111static bool isVSlideInstr(const MachineInstr &MI) {
112 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
113 default:
114 return false;
115 case RISCV::VSLIDEDOWN_VX:
116 case RISCV::VSLIDEDOWN_VI:
117 case RISCV::VSLIDEUP_VX:
118 case RISCV::VSLIDEUP_VI:
119 return true;
120 }
121}
122
123/// Get the EEW for a load or store instruction. Return std::nullopt if MI is
124/// not a load or store which ignores SEW.
125static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
126 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
127 default:
128 return std::nullopt;
129 case RISCV::VLE8_V:
130 case RISCV::VLSE8_V:
131 case RISCV::VSE8_V:
132 case RISCV::VSSE8_V:
133 return 8;
134 case RISCV::VLE16_V:
135 case RISCV::VLSE16_V:
136 case RISCV::VSE16_V:
137 case RISCV::VSSE16_V:
138 return 16;
139 case RISCV::VLE32_V:
140 case RISCV::VLSE32_V:
141 case RISCV::VSE32_V:
142 case RISCV::VSSE32_V:
143 return 32;
144 case RISCV::VLE64_V:
145 case RISCV::VLSE64_V:
146 case RISCV::VSE64_V:
147 case RISCV::VSSE64_V:
148 return 64;
149 }
150}
151
152/// Return true if this is an operation on mask registers. Note that
153/// this includes both arithmetic/logical ops and load/store (vlm/vsm).
154static bool isMaskRegOp(const MachineInstr &MI) {
155 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
156 return false;
157 const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
158 // A Log2SEW of 0 is an operation on mask registers only.
159 return Log2SEW == 0;
160}
161
162/// Return true if the inactive elements in the result are entirely undefined.
163/// Note that this is different from "agnostic" as defined by the vector
164/// specification. Agnostic requires each lane to either be undisturbed, or
165/// take the value -1; no other value is allowed.
166static bool hasUndefinedMergeOp(const MachineInstr &MI,
167 const MachineRegisterInfo &MRI) {
168
169 unsigned UseOpIdx;
170 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
171 // If there is no passthrough operand, then the pass through
172 // lanes are undefined.
173 return true;
174
175 // If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose
176 // operands are solely IMPLICIT_DEFS, then the pass through lanes are
177 // undefined.
178 const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
179 if (UseMO.getReg() == RISCV::NoRegister)
180 return true;
181
182 if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) {
183 if (UseMI->isImplicitDef())
184 return true;
185
186 if (UseMI->isRegSequence()) {
187 for (unsigned i = 1, e = UseMI->getNumOperands(); i < e; i += 2) {
188 MachineInstr *SourceMI = MRI.getVRegDef(UseMI->getOperand(i).getReg());
189 if (!SourceMI || !SourceMI->isImplicitDef())
190 return false;
191 }
192 return true;
193 }
194 }
195 return false;
196}
197
198/// Which subfields of VL or VTYPE have values we need to preserve?
199struct DemandedFields {
200 // Some unknown property of VL is used. If demanded, must preserve entire
201 // value.
202 bool VLAny = false;
203 // Only zero vs non-zero is used. If demanded, can change non-zero values.
204 bool VLZeroness = false;
205 // What properties of SEW we need to preserve.
206 enum : uint8_t {
207 SEWEqual = 3, // The exact value of SEW needs to be preserved.
208 SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater
209 // than or equal to the original value.
210 SEWGreaterThanOrEqualAndLessThan64 =
211 1, // SEW can be changed as long as it's greater
212 // than or equal to the original value, but must be less
213 // than 64.
214 SEWNone = 0 // We don't need to preserve SEW at all.
215 } SEW = SEWNone;
216 bool LMUL = false;
217 bool SEWLMULRatio = false;
218 bool TailPolicy = false;
219 bool MaskPolicy = false;
220
221 // Return true if any part of VTYPE was used
222 bool usedVTYPE() const {
223 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
224 }
225
226 // Return true if any property of VL was used
227 bool usedVL() {
228 return VLAny || VLZeroness;
229 }
230
231 // Mark all VTYPE subfields and properties as demanded
232 void demandVTYPE() {
233 SEW = SEWEqual;
234 LMUL = true;
235 SEWLMULRatio = true;
236 TailPolicy = true;
237 MaskPolicy = true;
238 }
239
240 // Mark all VL properties as demanded
241 void demandVL() {
242 VLAny = true;
243 VLZeroness = true;
244 }
245
246#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
247 /// Support for debugging, callable in GDB: V->dump()
248 LLVM_DUMP_METHOD void dump() const {
249 print(dbgs());
250 dbgs() << "\n";
251 }
252
253 /// Implement operator<<.
254 void print(raw_ostream &OS) const {
255 OS << "{";
256 OS << "VLAny=" << VLAny << ", ";
257 OS << "VLZeroness=" << VLZeroness << ", ";
258 OS << "SEW=";
259 switch (SEW) {
260 case SEWEqual:
261 OS << "SEWEqual";
262 break;
263 case SEWGreaterThanOrEqual:
264 OS << "SEWGreaterThanOrEqual";
265 break;
266 case SEWGreaterThanOrEqualAndLessThan64:
267 OS << "SEWGreaterThanOrEqualAndLessThan64";
268 break;
269 case SEWNone:
270 OS << "SEWNone";
271 break;
272 };
273 OS << ", ";
274 OS << "LMUL=" << LMUL << ", ";
275 OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
276 OS << "TailPolicy=" << TailPolicy << ", ";
277 OS << "MaskPolicy=" << MaskPolicy;
278 OS << "}";
279 }
280#endif
281};
282
283#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
285inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
286 DF.print(OS);
287 return OS;
288}
289#endif
290
291/// Return true if moving from CurVType to NewVType is
292/// indistinguishable from the perspective of an instruction (or set
293/// of instructions) which use only the Used subfields and properties.
294static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
295 const DemandedFields &Used) {
296 switch (Used.SEW) {
297 case DemandedFields::SEWNone:
298 break;
299 case DemandedFields::SEWEqual:
300 if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
301 return false;
302 break;
303 case DemandedFields::SEWGreaterThanOrEqual:
304 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
305 return false;
306 break;
307 case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
308 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||
309 RISCVVType::getSEW(NewVType) >= 64)
310 return false;
311 break;
312 }
313
314 if (Used.LMUL &&
315 RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
316 return false;
317
318 if (Used.SEWLMULRatio) {
319 auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
320 RISCVVType::getVLMUL(CurVType));
321 auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),
322 RISCVVType::getVLMUL(NewVType));
323 if (Ratio1 != Ratio2)
324 return false;
325 }
326
327 if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=
329 return false;
330 if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
332 return false;
333 return true;
334}
335
336/// Return the fields and properties demanded by the provided instruction.
337DemandedFields getDemanded(const MachineInstr &MI,
339 const RISCVSubtarget *ST) {
340 // Warning: This function has to work on both the lowered (i.e. post
341 // emitVSETVLIs) and pre-lowering forms. The main implication of this is
342 // that it can't use the value of a SEW, VL, or Policy operand as they might
343 // be stale after lowering.
344
345 // Most instructions don't use any of these subfeilds.
346 DemandedFields Res;
347 // Start conservative if registers are used
348 if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL))
349 Res.demandVL();
350 if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE))
351 Res.demandVTYPE();
352 // Start conservative on the unlowered form too
353 uint64_t TSFlags = MI.getDesc().TSFlags;
354 if (RISCVII::hasSEWOp(TSFlags)) {
355 Res.demandVTYPE();
356 if (RISCVII::hasVLOp(TSFlags))
357 Res.demandVL();
358
359 // Behavior is independent of mask policy.
360 if (!RISCVII::usesMaskPolicy(TSFlags))
361 Res.MaskPolicy = false;
362 }
363
364 // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
365 // They instead demand the ratio of the two which is used in computing
366 // EMUL, but which allows us the flexibility to change SEW and LMUL
367 // provided we don't change the ratio.
368 // Note: We assume that the instructions initial SEW is the EEW encoded
369 // in the opcode. This is asserted when constructing the VSETVLIInfo.
370 if (getEEWForLoadStore(MI)) {
371 Res.SEW = DemandedFields::SEWNone;
372 Res.LMUL = false;
373 }
374
375 // Store instructions don't use the policy fields.
376 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
377 Res.TailPolicy = false;
378 Res.MaskPolicy = false;
379 }
380
381 // If this is a mask reg operation, it only cares about VLMAX.
382 // TODO: Possible extensions to this logic
383 // * Probably ok if available VLMax is larger than demanded
384 // * The policy bits can probably be ignored..
385 if (isMaskRegOp(MI)) {
386 Res.SEW = DemandedFields::SEWNone;
387 Res.LMUL = false;
388 }
389
390 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
391 if (isScalarInsertInstr(MI)) {
392 Res.LMUL = false;
393 Res.SEWLMULRatio = false;
394 Res.VLAny = false;
395 // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't
396 // need to preserve any other bits and are thus compatible with any larger,
397 // etype and can disregard policy bits. Warning: It's tempting to try doing
398 // this for any tail agnostic operation, but we can't as TA requires
399 // tail lanes to either be the original value or -1. We are writing
400 // unknown bits to the lanes here.
401 if (hasUndefinedMergeOp(MI, *MRI)) {
402 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
403 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
404 else
405 Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
406 Res.TailPolicy = false;
407 }
408 }
409
410 // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW.
411 if (isScalarExtractInstr(MI)) {
412 assert(!RISCVII::hasVLOp(TSFlags));
413 Res.LMUL = false;
414 Res.SEWLMULRatio = false;
415 Res.TailPolicy = false;
416 Res.MaskPolicy = false;
417 }
418
419 return Res;
420}
421
422/// Defines the abstract state with which the forward dataflow models the
423/// values of the VL and VTYPE registers after insertion.
424class VSETVLIInfo {
425 union {
426 Register AVLReg;
427 unsigned AVLImm;
428 };
429
430 enum : uint8_t {
432 AVLIsReg,
433 AVLIsImm,
434 Unknown,
435 } State = Uninitialized;
436
437 // Fields from VTYPE.
439 uint8_t SEW = 0;
440 uint8_t TailAgnostic : 1;
441 uint8_t MaskAgnostic : 1;
442 uint8_t SEWLMULRatioOnly : 1;
443
444public:
445 VSETVLIInfo()
446 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
447 SEWLMULRatioOnly(false) {}
448
449 static VSETVLIInfo getUnknown() {
450 VSETVLIInfo Info;
451 Info.setUnknown();
452 return Info;
453 }
454
455 bool isValid() const { return State != Uninitialized; }
456 void setUnknown() { State = Unknown; }
457 bool isUnknown() const { return State == Unknown; }
458
459 void setAVLReg(Register Reg) {
460 AVLReg = Reg;
461 State = AVLIsReg;
462 }
463
464 void setAVLImm(unsigned Imm) {
465 AVLImm = Imm;
466 State = AVLIsImm;
467 }
468
469 bool hasAVLImm() const { return State == AVLIsImm; }
470 bool hasAVLReg() const { return State == AVLIsReg; }
471 Register getAVLReg() const {
472 assert(hasAVLReg());
473 return AVLReg;
474 }
475 unsigned getAVLImm() const {
476 assert(hasAVLImm());
477 return AVLImm;
478 }
479
480 void setAVL(VSETVLIInfo Info) {
481 assert(Info.isValid());
482 if (Info.isUnknown())
483 setUnknown();
484 else if (Info.hasAVLReg())
485 setAVLReg(Info.getAVLReg());
486 else {
487 assert(Info.hasAVLImm());
488 setAVLImm(Info.getAVLImm());
489 }
490 }
491
492 unsigned getSEW() const { return SEW; }
493 RISCVII::VLMUL getVLMUL() const { return VLMul; }
494 bool getTailAgnostic() const { return TailAgnostic; }
495 bool getMaskAgnostic() const { return MaskAgnostic; }
496
497 bool hasNonZeroAVL(const MachineRegisterInfo &MRI) const {
498 if (hasAVLImm())
499 return getAVLImm() > 0;
500 if (hasAVLReg()) {
501 if (getAVLReg() == RISCV::X0)
502 return true;
503 if (MachineInstr *MI = MRI.getVRegDef(getAVLReg());
504 MI && MI->getOpcode() == RISCV::ADDI &&
505 MI->getOperand(1).isReg() && MI->getOperand(2).isImm() &&
506 MI->getOperand(1).getReg() == RISCV::X0 &&
507 MI->getOperand(2).getImm() != 0)
508 return true;
509 return false;
510 }
511 return false;
512 }
513
514 bool hasEquallyZeroAVL(const VSETVLIInfo &Other,
515 const MachineRegisterInfo &MRI) const {
516 if (hasSameAVL(Other))
517 return true;
518 return (hasNonZeroAVL(MRI) && Other.hasNonZeroAVL(MRI));
519 }
520
521 bool hasSameAVL(const VSETVLIInfo &Other) const {
522 if (hasAVLReg() && Other.hasAVLReg())
523 return getAVLReg() == Other.getAVLReg();
524
525 if (hasAVLImm() && Other.hasAVLImm())
526 return getAVLImm() == Other.getAVLImm();
527
528 return false;
529 }
530
531 void setVTYPE(unsigned VType) {
532 assert(isValid() && !isUnknown() &&
533 "Can't set VTYPE for uninitialized or unknown");
534 VLMul = RISCVVType::getVLMUL(VType);
535 SEW = RISCVVType::getSEW(VType);
536 TailAgnostic = RISCVVType::isTailAgnostic(VType);
537 MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
538 }
539 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
540 assert(isValid() && !isUnknown() &&
541 "Can't set VTYPE for uninitialized or unknown");
542 VLMul = L;
543 SEW = S;
544 TailAgnostic = TA;
545 MaskAgnostic = MA;
546 }
547
548 void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; }
549
550 unsigned encodeVTYPE() const {
551 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
552 "Can't encode VTYPE for uninitialized or unknown");
553 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
554 }
555
556 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
557
558 bool hasSameVTYPE(const VSETVLIInfo &Other) const {
559 assert(isValid() && Other.isValid() &&
560 "Can't compare invalid VSETVLIInfos");
561 assert(!isUnknown() && !Other.isUnknown() &&
562 "Can't compare VTYPE in unknown state");
563 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
564 "Can't compare when only LMUL/SEW ratio is valid.");
565 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
566 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
567 Other.MaskAgnostic);
568 }
569
570 unsigned getSEWLMULRatio() const {
571 assert(isValid() && !isUnknown() &&
572 "Can't use VTYPE for uninitialized or unknown");
573 return RISCVVType::getSEWLMULRatio(SEW, VLMul);
574 }
575
576 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
577 // Note that having the same VLMAX ensures that both share the same
578 // function from AVL to VL; that is, they must produce the same VL value
579 // for any given AVL value.
580 bool hasSameVLMAX(const VSETVLIInfo &Other) const {
581 assert(isValid() && Other.isValid() &&
582 "Can't compare invalid VSETVLIInfos");
583 assert(!isUnknown() && !Other.isUnknown() &&
584 "Can't compare VTYPE in unknown state");
585 return getSEWLMULRatio() == Other.getSEWLMULRatio();
586 }
587
588 bool hasCompatibleVTYPE(const DemandedFields &Used,
589 const VSETVLIInfo &Require) const {
590 return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used);
591 }
592
593 // Determine whether the vector instructions requirements represented by
594 // Require are compatible with the previous vsetvli instruction represented
595 // by this. MI is the instruction whose requirements we're considering.
596 bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
597 const MachineRegisterInfo &MRI) const {
598 assert(isValid() && Require.isValid() &&
599 "Can't compare invalid VSETVLIInfos");
600 assert(!Require.SEWLMULRatioOnly &&
601 "Expected a valid VTYPE for instruction!");
602 // Nothing is compatible with Unknown.
603 if (isUnknown() || Require.isUnknown())
604 return false;
605
606 // If only our VLMAX ratio is valid, then this isn't compatible.
607 if (SEWLMULRatioOnly)
608 return false;
609
610 if (Used.VLAny && !hasSameAVL(Require))
611 return false;
612
613 if (Used.VLZeroness && !hasEquallyZeroAVL(Require, MRI))
614 return false;
615
616 return hasCompatibleVTYPE(Used, Require);
617 }
618
619 bool operator==(const VSETVLIInfo &Other) const {
620 // Uninitialized is only equal to another Uninitialized.
621 if (!isValid())
622 return !Other.isValid();
623 if (!Other.isValid())
624 return !isValid();
625
626 // Unknown is only equal to another Unknown.
627 if (isUnknown())
628 return Other.isUnknown();
629 if (Other.isUnknown())
630 return isUnknown();
631
632 if (!hasSameAVL(Other))
633 return false;
634
635 // If the SEWLMULRatioOnly bits are different, then they aren't equal.
636 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
637 return false;
638
639 // If only the VLMAX is valid, check that it is the same.
640 if (SEWLMULRatioOnly)
641 return hasSameVLMAX(Other);
642
643 // If the full VTYPE is valid, check that it is the same.
644 return hasSameVTYPE(Other);
645 }
646
647 bool operator!=(const VSETVLIInfo &Other) const {
648 return !(*this == Other);
649 }
650
651 // Calculate the VSETVLIInfo visible to a block assuming this and Other are
652 // both predecessors.
653 VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
654 // If the new value isn't valid, ignore it.
655 if (!Other.isValid())
656 return *this;
657
658 // If this value isn't valid, this must be the first predecessor, use it.
659 if (!isValid())
660 return Other;
661
662 // If either is unknown, the result is unknown.
663 if (isUnknown() || Other.isUnknown())
664 return VSETVLIInfo::getUnknown();
665
666 // If we have an exact, match return this.
667 if (*this == Other)
668 return *this;
669
670 // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
671 // return an SEW/LMUL ratio only value.
672 if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
673 VSETVLIInfo MergeInfo = *this;
674 MergeInfo.SEWLMULRatioOnly = true;
675 return MergeInfo;
676 }
677
678 // Otherwise the result is unknown.
679 return VSETVLIInfo::getUnknown();
680 }
681
682#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
683 /// Support for debugging, callable in GDB: V->dump()
684 LLVM_DUMP_METHOD void dump() const {
685 print(dbgs());
686 dbgs() << "\n";
687 }
688
689 /// Implement operator<<.
690 /// @{
691 void print(raw_ostream &OS) const {
692 OS << "{";
693 if (!isValid())
694 OS << "Uninitialized";
695 if (isUnknown())
696 OS << "unknown";
697 if (hasAVLReg())
698 OS << "AVLReg=" << (unsigned)AVLReg;
699 if (hasAVLImm())
700 OS << "AVLImm=" << (unsigned)AVLImm;
701 OS << ", "
702 << "VLMul=" << (unsigned)VLMul << ", "
703 << "SEW=" << (unsigned)SEW << ", "
704 << "TailAgnostic=" << (bool)TailAgnostic << ", "
705 << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
706 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
707 }
708#endif
709};
710
711#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
713inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
714 V.print(OS);
715 return OS;
716}
717#endif
718
719struct BlockData {
720 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
721 // block. Calculated in Phase 2.
722 VSETVLIInfo Exit;
723
724 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
725 // blocks. Calculated in Phase 2, and used by Phase 3.
726 VSETVLIInfo Pred;
727
728 // Keeps track of whether the block is already in the queue.
729 bool InQueue = false;
730
731 BlockData() = default;
732};
733
734class RISCVInsertVSETVLI : public MachineFunctionPass {
735 const RISCVSubtarget *ST;
736 const TargetInstrInfo *TII;
738
739 std::vector<BlockData> BlockInfo;
740 std::queue<const MachineBasicBlock *> WorkList;
741
742public:
743 static char ID;
744
745 RISCVInsertVSETVLI() : MachineFunctionPass(ID) {}
746 bool runOnMachineFunction(MachineFunction &MF) override;
747
748 void getAnalysisUsage(AnalysisUsage &AU) const override {
749 AU.setPreservesCFG();
751 }
752
753 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
754
755private:
756 bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require,
757 const VSETVLIInfo &CurInfo) const;
758 bool needVSETVLIPHI(const VSETVLIInfo &Require,
759 const MachineBasicBlock &MBB) const;
760 void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
761 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
762 void insertVSETVLI(MachineBasicBlock &MBB,
764 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
765
766 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
767 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
768 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
769 VSETVLIInfo &Info) const;
770 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
771 void emitVSETVLIs(MachineBasicBlock &MBB);
772 void doLocalPostpass(MachineBasicBlock &MBB);
773 void doPRE(MachineBasicBlock &MBB);
774 void insertReadVL(MachineBasicBlock &MBB);
775};
776
777} // end anonymous namespace
778
779char RISCVInsertVSETVLI::ID = 0;
780
782 false, false)
783
784// Return a VSETVLIInfo representing the changes made by this VSETVLI or
785// VSETIVLI instruction.
786static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
787 VSETVLIInfo NewInfo;
788 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
789 NewInfo.setAVLImm(MI.getOperand(1).getImm());
790 } else {
791 assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
792 MI.getOpcode() == RISCV::PseudoVSETVLIX0);
793 Register AVLReg = MI.getOperand(1).getReg();
794 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
795 "Can't handle X0, X0 vsetvli yet");
796 NewInfo.setAVLReg(AVLReg);
797 }
798 NewInfo.setVTYPE(MI.getOperand(2).getImm());
799
800 return NewInfo;
801}
802
803static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
804 const MachineRegisterInfo *MRI) {
805 VSETVLIInfo InstrInfo;
806
807 bool TailAgnostic = true;
808 bool MaskAgnostic = true;
809 if (!hasUndefinedMergeOp(MI, *MRI)) {
810 // Start with undisturbed.
811 TailAgnostic = false;
812 MaskAgnostic = false;
813
814 // If there is a policy operand, use it.
815 if (RISCVII::hasVecPolicyOp(TSFlags)) {
816 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
817 uint64_t Policy = Op.getImm();
819 "Invalid Policy Value");
820 TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
821 MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
822 }
823
824 // Some pseudo instructions force a tail agnostic policy despite having a
825 // tied def.
827 TailAgnostic = true;
828
829 if (!RISCVII::usesMaskPolicy(TSFlags))
830 MaskAgnostic = true;
831 }
832
833 RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
834
835 unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
836 // A Log2SEW of 0 is an operation on mask registers only.
837 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
838 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
839
840 if (RISCVII::hasVLOp(TSFlags)) {
841 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
842 if (VLOp.isImm()) {
843 int64_t Imm = VLOp.getImm();
844 // Conver the VLMax sentintel to X0 register.
845 if (Imm == RISCV::VLMaxSentinel)
846 InstrInfo.setAVLReg(RISCV::X0);
847 else
848 InstrInfo.setAVLImm(Imm);
849 } else {
850 InstrInfo.setAVLReg(VLOp.getReg());
851 }
852 } else {
853 assert(isScalarExtractInstr(MI));
854 InstrInfo.setAVLReg(RISCV::NoRegister);
855 }
856#ifndef NDEBUG
857 if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
858 assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
859 }
860#endif
861 InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
862
863 // If AVL is defined by a vsetvli with the same VLMAX, we can replace the
864 // AVL operand with the AVL of the defining vsetvli. We avoid general
865 // register AVLs to avoid extending live ranges without being sure we can
866 // kill the original source reg entirely.
867 if (InstrInfo.hasAVLReg() && InstrInfo.getAVLReg().isVirtual()) {
868 MachineInstr *DefMI = MRI->getVRegDef(InstrInfo.getAVLReg());
869 if (DefMI && isVectorConfigInstr(*DefMI)) {
870 VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI);
871 if (DefInstrInfo.hasSameVLMAX(InstrInfo) &&
872 (DefInstrInfo.hasAVLImm() || DefInstrInfo.getAVLReg() == RISCV::X0)) {
873 InstrInfo.setAVL(DefInstrInfo);
874 }
875 }
876 }
877
878 return InstrInfo;
879}
880
881void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
882 const VSETVLIInfo &Info,
883 const VSETVLIInfo &PrevInfo) {
884 DebugLoc DL = MI.getDebugLoc();
885 insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
886}
887
888void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
890 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
891
892 if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
893 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
894 // VLMAX.
895 if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
896 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
898 .addReg(RISCV::X0, RegState::Kill)
899 .addImm(Info.encodeVTYPE())
900 .addReg(RISCV::VL, RegState::Implicit);
901 return;
902 }
903
904 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
905 // it has the same VLMAX we want and the last VL/VTYPE we observed is the
906 // same, we can use the X0, X0 form.
907 if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg() &&
908 Info.getAVLReg().isVirtual()) {
909 if (MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg())) {
910 if (isVectorConfigInstr(*DefMI)) {
911 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
912 if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
913 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
915 .addReg(RISCV::X0, RegState::Kill)
916 .addImm(Info.encodeVTYPE())
917 .addReg(RISCV::VL, RegState::Implicit);
918 return;
919 }
920 }
921 }
922 }
923 }
924
925 if (Info.hasAVLImm()) {
926 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
928 .addImm(Info.getAVLImm())
929 .addImm(Info.encodeVTYPE());
930 return;
931 }
932
933 Register AVLReg = Info.getAVLReg();
934 if (AVLReg == RISCV::NoRegister) {
935 // We can only use x0, x0 if there's no chance of the vtype change causing
936 // the previous vl to become invalid.
937 if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
938 Info.hasSameVLMAX(PrevInfo)) {
939 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
941 .addReg(RISCV::X0, RegState::Kill)
942 .addImm(Info.encodeVTYPE())
943 .addReg(RISCV::VL, RegState::Implicit);
944 return;
945 }
946 // Otherwise use an AVL of 1 to avoid depending on previous vl.
947 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
949 .addImm(1)
950 .addImm(Info.encodeVTYPE());
951 return;
952 }
953
954 if (AVLReg.isVirtual())
955 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
956
957 // Use X0 as the DestReg unless AVLReg is X0. We also need to change the
958 // opcode if the AVLReg is X0 as they have different register classes for
959 // the AVL operand.
960 Register DestReg = RISCV::X0;
961 unsigned Opcode = RISCV::PseudoVSETVLI;
962 if (AVLReg == RISCV::X0) {
963 DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
964 Opcode = RISCV::PseudoVSETVLIX0;
965 }
966 BuildMI(MBB, InsertPt, DL, TII->get(Opcode))
968 .addReg(AVLReg)
969 .addImm(Info.encodeVTYPE());
970}
971
973 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
974 return Fractional || LMul == 1;
975}
976
977/// Return true if a VSETVLI is required to transition from CurInfo to Require
978/// before MI.
979bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
980 const VSETVLIInfo &Require,
981 const VSETVLIInfo &CurInfo) const {
982 assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, MRI));
983
984 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
985 return true;
986
987 DemandedFields Used = getDemanded(MI, MRI, ST);
988
989 // A slidedown/slideup with an *undefined* merge op can freely clobber
990 // elements not copied from the source vector (e.g. masked off, tail, or
991 // slideup's prefix). Notes:
992 // * We can't modify SEW here since the slide amount is in units of SEW.
993 // * VL=1 is special only because we have existing support for zero vs
994 // non-zero VL. We could generalize this if we had a VL > C predicate.
995 // * The LMUL1 restriction is for machines whose latency may depend on VL.
996 // * As above, this is only legal for tail "undefined" not "agnostic".
997 if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
998 isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
999 Used.VLAny = false;
1000 Used.VLZeroness = true;
1001 Used.LMUL = false;
1002 Used.TailPolicy = false;
1003 }
1004
1005 // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same
1006 // semantically as vmv.s.x. This is particularly useful since we don't have an
1007 // immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.
1008 // Since a splat is non-constant time in LMUL, we do need to be careful to not
1009 // increase the number of active vector registers (unlike for vmv.s.x.)
1010 if (isScalarSplatInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
1011 isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
1012 Used.LMUL = false;
1013 Used.SEWLMULRatio = false;
1014 Used.VLAny = false;
1015 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
1016 Used.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
1017 else
1018 Used.SEW = DemandedFields::SEWGreaterThanOrEqual;
1019 Used.TailPolicy = false;
1020 }
1021
1022 if (CurInfo.isCompatible(Used, Require, *MRI))
1023 return false;
1024
1025 // We didn't find a compatible value. If our AVL is a virtual register,
1026 // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
1027 // and the last VL/VTYPE we observed is the same, we don't need a
1028 // VSETVLI here.
1029 if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() &&
1030 CurInfo.hasCompatibleVTYPE(Used, Require)) {
1031 if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
1032 if (isVectorConfigInstr(*DefMI)) {
1033 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1034 if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
1035 return false;
1036 }
1037 }
1038 }
1039
1040 return true;
1041}
1042
1043static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
1044 DemandedFields &Demanded,
1045 const MachineRegisterInfo *MRI);
1046
1047// Given an incoming state reaching MI, minimally modifies that state so that it
1048// is compatible with MI. The resulting state is guaranteed to be semantically
1049// legal for MI, but may not be the state requested by MI.
1050void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
1051 const MachineInstr &MI) const {
1052 uint64_t TSFlags = MI.getDesc().TSFlags;
1053 if (!RISCVII::hasSEWOp(TSFlags))
1054 return;
1055
1056 const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
1057 assert(NewInfo.isValid() && !NewInfo.isUnknown());
1058 if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
1059 return;
1060
1061 const VSETVLIInfo PrevInfo = Info;
1062 if (Info.hasSEWLMULRatioOnly() || !Info.isValid() || Info.isUnknown())
1063 Info = NewInfo;
1064
1065 if (!RISCVII::hasVLOp(TSFlags)) {
1066 Info = NewInfo;
1067 return;
1068 }
1069
1070 DemandedFields Demanded = getDemanded(MI, MRI, ST);
1071 const VSETVLIInfo IncomingInfo =
1072 adjustIncoming(PrevInfo, NewInfo, Demanded, MRI);
1073
1074 if (Demanded.usedVL())
1075 Info.setAVL(IncomingInfo);
1076
1077 Info.setVTYPE(
1078 ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
1079 .getVLMUL(),
1080 ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
1081 // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
1082 // if needed.
1083 (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
1084 IncomingInfo.getTailAgnostic(),
1085 (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
1086 IncomingInfo.getMaskAgnostic());
1087}
1088
1089static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
1090 DemandedFields &Demanded,
1091 const MachineRegisterInfo *MRI) {
1092 VSETVLIInfo Info = NewInfo;
1093
1094 // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1095 // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1096 // places.
1097 if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
1098 !PrevInfo.isUnknown()) {
1099 if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
1100 PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
1101 Info.setVLMul(*NewVLMul);
1102 Demanded.LMUL = true;
1103 }
1104
1105 // If we only demand VL zeroness (i.e. vmv.s.x and vmv.x.s), then there are
1106 // only two behaviors, VL = 0 and VL > 0. We can discard the user requested
1107 // AVL and just use the last one if we can prove it equally zero. This
1108 // removes a vsetvli entirely if the types match or allows use of cheaper avl
1109 // preserving variant if VLMAX doesn't change. If VLMAX might change, we
1110 // couldn't use the 'vsetvli x0, x0, vtype" variant, so we avoid the transform
1111 // to prevent extending live range of an avl register operand.
1112 // TODO: We can probably relax this for immediates.
1113 if (Demanded.VLZeroness && !Demanded.VLAny && PrevInfo.isValid() &&
1114 PrevInfo.hasEquallyZeroAVL(Info, *MRI) && Info.hasSameVLMAX(PrevInfo)) {
1115 Info.setAVL(PrevInfo);
1116 Demanded.demandVL();
1117 }
1118
1119 return Info;
1120}
1121
1122// Given a state with which we evaluated MI (see transferBefore above for why
1123// this might be different that the state MI requested), modify the state to
1124// reflect the changes MI might make.
1125void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
1126 const MachineInstr &MI) const {
1127 if (isVectorConfigInstr(MI)) {
1128 Info = getInfoForVSETVLI(MI);
1129 return;
1130 }
1131
1133 // Update AVL to vl-output of the fault first load.
1134 Info.setAVLReg(MI.getOperand(1).getReg());
1135 return;
1136 }
1137
1138 // If this is something that updates VL/VTYPE that we don't know about, set
1139 // the state to unknown.
1140 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
1141 MI.modifiesRegister(RISCV::VTYPE))
1142 Info = VSETVLIInfo::getUnknown();
1143}
1144
1145bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
1146 VSETVLIInfo &Info) const {
1147 bool HadVectorOp = false;
1148
1149 Info = BlockInfo[MBB.getNumber()].Pred;
1150 for (const MachineInstr &MI : MBB) {
1151 transferBefore(Info, MI);
1152
1153 if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1154 HadVectorOp = true;
1155
1156 transferAfter(Info, MI);
1157 }
1158
1159 return HadVectorOp;
1160}
1161
1162void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1163
1164 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1165
1166 BBInfo.InQueue = false;
1167
1168 // Start with the previous entry so that we keep the most conservative state
1169 // we have ever found.
1170 VSETVLIInfo InInfo = BBInfo.Pred;
1171 if (MBB.pred_empty()) {
1172 // There are no predecessors, so use the default starting status.
1173 InInfo.setUnknown();
1174 } else {
1176 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
1177 }
1178
1179 // If we don't have any valid predecessor value, wait until we do.
1180 if (!InInfo.isValid())
1181 return;
1182
1183 // If no change, no need to rerun block
1184 if (InInfo == BBInfo.Pred)
1185 return;
1186
1187 BBInfo.Pred = InInfo;
1188 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1189 << " changed to " << BBInfo.Pred << "\n");
1190
1191 // Note: It's tempting to cache the state changes here, but due to the
1192 // compatibility checks performed a blocks output state can change based on
1193 // the input state. To cache, we'd have to add logic for finding
1194 // never-compatible state changes.
1195 VSETVLIInfo TmpStatus;
1196 computeVLVTYPEChanges(MBB, TmpStatus);
1197
1198 // If the new exit value matches the old exit value, we don't need to revisit
1199 // any blocks.
1200 if (BBInfo.Exit == TmpStatus)
1201 return;
1202
1203 BBInfo.Exit = TmpStatus;
1204 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1205 << " changed to " << BBInfo.Exit << "\n");
1206
1207 // Add the successors to the work list so we can propagate the changed exit
1208 // status.
1209 for (MachineBasicBlock *S : MBB.successors())
1210 if (!BlockInfo[S->getNumber()].InQueue) {
1211 BlockInfo[S->getNumber()].InQueue = true;
1212 WorkList.push(S);
1213 }
1214}
1215
1216// If we weren't able to prove a vsetvli was directly unneeded, it might still
1217// be unneeded if the AVL is a phi node where all incoming values are VL
1218// outputs from the last VSETVLI in their respective basic blocks.
1219bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1220 const MachineBasicBlock &MBB) const {
1222 return true;
1223
1224 if (!Require.hasAVLReg())
1225 return true;
1226
1227 Register AVLReg = Require.getAVLReg();
1228 if (!AVLReg.isVirtual())
1229 return true;
1230
1231 // We need the AVL to be produce by a PHI node in this basic block.
1232 MachineInstr *PHI = MRI->getVRegDef(AVLReg);
1233 if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
1234 return true;
1235
1236 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
1237 PHIOp += 2) {
1238 Register InReg = PHI->getOperand(PHIOp).getReg();
1239 MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
1240 const BlockData &PBBInfo = BlockInfo[PBB->getNumber()];
1241 // If the exit from the predecessor has the VTYPE we are looking for
1242 // we might be able to avoid a VSETVLI.
1243 if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require))
1244 return true;
1245
1246 // We need the PHI input to the be the output of a VSET(I)VLI.
1247 MachineInstr *DefMI = MRI->getVRegDef(InReg);
1248 if (!DefMI || !isVectorConfigInstr(*DefMI))
1249 return true;
1250
1251 // We found a VSET(I)VLI make sure it matches the output of the
1252 // predecessor block.
1253 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1254 if (!DefInfo.hasSameAVL(PBBInfo.Exit) ||
1255 !DefInfo.hasSameVTYPE(PBBInfo.Exit))
1256 return true;
1257 }
1258
1259 // If all the incoming values to the PHI checked out, we don't need
1260 // to insert a VSETVLI.
1261 return false;
1262}
1263
1264void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1265 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1266 // Track whether the prefix of the block we've scanned is transparent
1267 // (meaning has not yet changed the abstract state).
1268 bool PrefixTransparent = true;
1269 for (MachineInstr &MI : MBB) {
1270 const VSETVLIInfo PrevInfo = CurInfo;
1271 transferBefore(CurInfo, MI);
1272
1273 // If this is an explicit VSETVLI or VSETIVLI, update our state.
1274 if (isVectorConfigInstr(MI)) {
1275 // Conservatively, mark the VL and VTYPE as live.
1276 assert(MI.getOperand(3).getReg() == RISCV::VL &&
1277 MI.getOperand(4).getReg() == RISCV::VTYPE &&
1278 "Unexpected operands where VL and VTYPE should be");
1279 MI.getOperand(3).setIsDead(false);
1280 MI.getOperand(4).setIsDead(false);
1281 PrefixTransparent = false;
1282 }
1283
1284 uint64_t TSFlags = MI.getDesc().TSFlags;
1285 if (RISCVII::hasSEWOp(TSFlags)) {
1286 if (PrevInfo != CurInfo) {
1287 // If this is the first implicit state change, and the state change
1288 // requested can be proven to produce the same register contents, we
1289 // can skip emitting the actual state change and continue as if we
1290 // had since we know the GPR result of the implicit state change
1291 // wouldn't be used and VL/VTYPE registers are correct. Note that
1292 // we *do* need to model the state as if it changed as while the
1293 // register contents are unchanged, the abstract model can change.
1294 if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
1295 insertVSETVLI(MBB, MI, CurInfo, PrevInfo);
1296 PrefixTransparent = false;
1297 }
1298
1299 if (RISCVII::hasVLOp(TSFlags)) {
1300 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1301 if (VLOp.isReg()) {
1302 // Erase the AVL operand from the instruction.
1303 VLOp.setReg(RISCV::NoRegister);
1304 VLOp.setIsKill(false);
1305 }
1306 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1307 /*isImp*/ true));
1308 }
1309 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1310 /*isImp*/ true));
1311 }
1312
1313 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
1314 MI.modifiesRegister(RISCV::VTYPE))
1315 PrefixTransparent = false;
1316
1317 transferAfter(CurInfo, MI);
1318 }
1319
1320 // If we reach the end of the block and our current info doesn't match the
1321 // expected info, insert a vsetvli to correct.
1322 if (!UseStrictAsserts) {
1323 const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit;
1324 if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() &&
1325 CurInfo != ExitInfo) {
1326 // Note there's an implicit assumption here that terminators never use
1327 // or modify VL or VTYPE. Also, fallthrough will return end().
1328 auto InsertPt = MBB.getFirstInstrTerminator();
1329 insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo,
1330 CurInfo);
1331 CurInfo = ExitInfo;
1332 }
1333 }
1334
1335 if (UseStrictAsserts && CurInfo.isValid()) {
1336 const auto &Info = BlockInfo[MBB.getNumber()];
1337 if (CurInfo != Info.Exit) {
1338 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1339 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
1340 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n");
1341 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n");
1342 }
1343 assert(CurInfo == Info.Exit &&
1344 "InsertVSETVLI dataflow invariant violated");
1345 }
1346}
1347
1348/// Perform simple partial redundancy elimination of the VSETVLI instructions
1349/// we're about to insert by looking for cases where we can PRE from the
1350/// beginning of one block to the end of one of its predecessors. Specifically,
1351/// this is geared to catch the common case of a fixed length vsetvl in a single
1352/// block loop when it could execute once in the preheader instead.
1353void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1354 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1355 return;
1356
1357 MachineBasicBlock *UnavailablePred = nullptr;
1358 VSETVLIInfo AvailableInfo;
1359 for (MachineBasicBlock *P : MBB.predecessors()) {
1360 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1361 if (PredInfo.isUnknown()) {
1362 if (UnavailablePred)
1363 return;
1364 UnavailablePred = P;
1365 } else if (!AvailableInfo.isValid()) {
1366 AvailableInfo = PredInfo;
1367 } else if (AvailableInfo != PredInfo) {
1368 return;
1369 }
1370 }
1371
1372 // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1373 // phase 3.
1374 if (!UnavailablePred || !AvailableInfo.isValid())
1375 return;
1376
1377 // Critical edge - TODO: consider splitting?
1378 if (UnavailablePred->succ_size() != 1)
1379 return;
1380
1381 // If the AVL value is a register (other than our VLMAX sentinel),
1382 // we need to prove the value is available at the point we're going
1383 // to insert the vsetvli at.
1384 if (AvailableInfo.hasAVLReg() && RISCV::X0 != AvailableInfo.getAVLReg()) {
1385 MachineInstr *AVLDefMI = MRI->getVRegDef(AvailableInfo.getAVLReg());
1386 if (!AVLDefMI)
1387 return;
1388 // This is an inline dominance check which covers the case of
1389 // UnavailablePred being the preheader of a loop.
1390 if (AVLDefMI->getParent() != UnavailablePred)
1391 return;
1392 for (auto &TermMI : UnavailablePred->terminators())
1393 if (&TermMI == AVLDefMI)
1394 return;
1395 }
1396
1397 // Model the effect of changing the input state of the block MBB to
1398 // AvailableInfo. We're looking for two issues here; one legality,
1399 // one profitability.
1400 // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1401 // may hit the end of the block with a different end state. We can
1402 // not make this change without reflowing later blocks as well.
1403 // 2) If we don't actually remove a transition, inserting a vsetvli
1404 // into the predecessor block would be correct, but unprofitable.
1405 VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
1406 VSETVLIInfo CurInfo = AvailableInfo;
1407 int TransitionsRemoved = 0;
1408 for (const MachineInstr &MI : MBB) {
1409 const VSETVLIInfo LastInfo = CurInfo;
1410 const VSETVLIInfo LastOldInfo = OldInfo;
1411 transferBefore(CurInfo, MI);
1412 transferBefore(OldInfo, MI);
1413 if (CurInfo == LastInfo)
1414 TransitionsRemoved++;
1415 if (LastOldInfo == OldInfo)
1416 TransitionsRemoved--;
1417 transferAfter(CurInfo, MI);
1418 transferAfter(OldInfo, MI);
1419 if (CurInfo == OldInfo)
1420 // Convergence. All transitions after this must match by construction.
1421 break;
1422 }
1423 if (CurInfo != OldInfo || TransitionsRemoved <= 0)
1424 // Issues 1 and 2 above
1425 return;
1426
1427 // Finally, update both data flow state and insert the actual vsetvli.
1428 // Doing both keeps the code in sync with the dataflow results, which
1429 // is critical for correctness of phase 3.
1430 auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
1431 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1432 << UnavailablePred->getName() << " with state "
1433 << AvailableInfo << "\n");
1434 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1435 BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1436
1437 // Note there's an implicit assumption here that terminators never use
1438 // or modify VL or VTYPE. Also, fallthrough will return end().
1439 auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1440 insertVSETVLI(*UnavailablePred, InsertPt,
1441 UnavailablePred->findDebugLoc(InsertPt),
1442 AvailableInfo, OldExit);
1443}
1444
1445static void doUnion(DemandedFields &A, DemandedFields B) {
1446 A.VLAny |= B.VLAny;
1447 A.VLZeroness |= B.VLZeroness;
1448 A.SEW = std::max(A.SEW, B.SEW);
1449 A.LMUL |= B.LMUL;
1450 A.SEWLMULRatio |= B.SEWLMULRatio;
1451 A.TailPolicy |= B.TailPolicy;
1452 A.MaskPolicy |= B.MaskPolicy;
1453}
1454
1455static bool isNonZeroAVL(const MachineOperand &MO) {
1456 if (MO.isReg())
1457 return RISCV::X0 == MO.getReg();
1458 assert(MO.isImm());
1459 return 0 != MO.getImm();
1460}
1461
1462// Return true if we can mutate PrevMI to match MI without changing any the
1463// fields which would be observed.
1464static bool canMutatePriorConfig(const MachineInstr &PrevMI,
1465 const MachineInstr &MI,
1466 const DemandedFields &Used) {
1467 // If the VL values aren't equal, return false if either a) the former is
1468 // demanded, or b) we can't rewrite the former to be the later for
1469 // implementation reasons.
1470 if (!isVLPreservingConfig(MI)) {
1471 if (Used.VLAny)
1472 return false;
1473
1474 // We don't bother to handle the equally zero case here as it's largely
1475 // uninteresting.
1476 if (Used.VLZeroness) {
1477 if (isVLPreservingConfig(PrevMI))
1478 return false;
1479 if (!isNonZeroAVL(MI.getOperand(1)) ||
1480 !isNonZeroAVL(PrevMI.getOperand(1)))
1481 return false;
1482 }
1483
1484 // TODO: Track whether the register is defined between
1485 // PrevMI and MI.
1486 if (MI.getOperand(1).isReg() &&
1487 RISCV::X0 != MI.getOperand(1).getReg())
1488 return false;
1489 }
1490
1491 if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm())
1492 return false;
1493
1494 auto PriorVType = PrevMI.getOperand(2).getImm();
1495 auto VType = MI.getOperand(2).getImm();
1496 return areCompatibleVTYPEs(PriorVType, VType, Used);
1497}
1498
1499void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
1500 MachineInstr *NextMI = nullptr;
1501 // We can have arbitrary code in successors, so VL and VTYPE
1502 // must be considered demanded.
1503 DemandedFields Used;
1504 Used.demandVL();
1505 Used.demandVTYPE();
1507 for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1508
1509 if (!isVectorConfigInstr(MI)) {
1510 doUnion(Used, getDemanded(MI, MRI, ST));
1511 continue;
1512 }
1513
1514 Register VRegDef = MI.getOperand(0).getReg();
1515 if (VRegDef != RISCV::X0 &&
1516 !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
1517 Used.demandVL();
1518
1519 if (NextMI) {
1520 if (!Used.usedVL() && !Used.usedVTYPE()) {
1521 ToDelete.push_back(&MI);
1522 // Leave NextMI unchanged
1523 continue;
1524 } else if (canMutatePriorConfig(MI, *NextMI, Used)) {
1525 if (!isVLPreservingConfig(*NextMI)) {
1526 MI.getOperand(0).setReg(NextMI->getOperand(0).getReg());
1527 MI.getOperand(0).setIsDead(false);
1528 if (NextMI->getOperand(1).isImm())
1529 MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
1530 else
1531 MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
1532 MI.setDesc(NextMI->getDesc());
1533 }
1534 MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
1535 ToDelete.push_back(NextMI);
1536 // fallthrough
1537 }
1538 }
1539 NextMI = &MI;
1540 Used = getDemanded(MI, MRI, ST);
1541 }
1542
1543 for (auto *MI : ToDelete)
1544 MI->eraseFromParent();
1545}
1546
1547void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1548 for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1549 MachineInstr &MI = *I++;
1551 Register VLOutput = MI.getOperand(1).getReg();
1552 if (!MRI->use_nodbg_empty(VLOutput))
1553 BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL),
1554 VLOutput);
1555 // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1556 MI.getOperand(1).setReg(RISCV::X0);
1557 }
1558 }
1559}
1560
1561bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1562 // Skip if the vector extension is not enabled.
1564 if (!ST->hasVInstructions())
1565 return false;
1566
1567 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1568
1569 TII = ST->getInstrInfo();
1570 MRI = &MF.getRegInfo();
1571
1572 assert(BlockInfo.empty() && "Expect empty block infos");
1573 BlockInfo.resize(MF.getNumBlockIDs());
1574
1575 bool HaveVectorOp = false;
1576
1577 // Phase 1 - determine how VL/VTYPE are affected by the each block.
1578 for (const MachineBasicBlock &MBB : MF) {
1579 VSETVLIInfo TmpStatus;
1580 HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus);
1581 // Initial exit state is whatever change we found in the block.
1582 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1583 BBInfo.Exit = TmpStatus;
1584 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1585 << " is " << BBInfo.Exit << "\n");
1586
1587 }
1588
1589 // If we didn't find any instructions that need VSETVLI, we're done.
1590 if (!HaveVectorOp) {
1591 BlockInfo.clear();
1592 return false;
1593 }
1594
1595 // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1596 // blocks to the list here, but will also add any that need to be revisited
1597 // during Phase 2 processing.
1598 for (const MachineBasicBlock &MBB : MF) {
1599 WorkList.push(&MBB);
1600 BlockInfo[MBB.getNumber()].InQueue = true;
1601 }
1602 while (!WorkList.empty()) {
1603 const MachineBasicBlock &MBB = *WorkList.front();
1604 WorkList.pop();
1605 computeIncomingVLVTYPE(MBB);
1606 }
1607
1608 // Perform partial redundancy elimination of vsetvli transitions.
1609 for (MachineBasicBlock &MBB : MF)
1610 doPRE(MBB);
1611
1612 // Phase 3 - add any vsetvli instructions needed in the block. Use the
1613 // Phase 2 information to avoid adding vsetvlis before the first vector
1614 // instruction in the block if the VL/VTYPE is satisfied by its
1615 // predecessors.
1616 for (MachineBasicBlock &MBB : MF)
1617 emitVSETVLIs(MBB);
1618
1619 // Now that all vsetvlis are explicit, go through and do block local
1620 // DSE and peephole based demanded fields based transforms. Note that
1621 // this *must* be done outside the main dataflow so long as we allow
1622 // any cross block analysis within the dataflow. We can't have both
1623 // demanded fields based mutation and non-local analysis in the
1624 // dataflow at the same time without introducing inconsistencies.
1625 for (MachineBasicBlock &MBB : MF)
1626 doLocalPostpass(MBB);
1627
1628 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1629 // of VLEFF/VLSEGFF.
1630 for (MachineBasicBlock &MBB : MF)
1631 insertReadVL(MBB);
1632
1633 BlockInfo.clear();
1634 return HaveVectorOp;
1635}
1636
1637/// Returns an instance of the Insert VSETVLI pass.
1639 return new RISCVInsertVSETVLI();
1640}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
aarch64 promote const
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_ATTRIBUTE_USED
Definition: Compiler.h:151
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:529
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static RegisterPass< DebugifyFunctionPass > DF("debugify-function", "Attach debug info to a function")
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1275
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static ValueLatticeElement intersect(const ValueLatticeElement &A, const ValueLatticeElement &B)
Combine two sets of facts about the same value into a single set of facts.
#define I(x, y, z)
Definition: MD5.cpp:58
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, const MachineRegisterInfo *MRI)
static bool isNonZeroAVL(const MachineOperand &MO)
#define RISCV_INSERT_VSETVLI_NAME
static bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI, const DemandedFields &Used)
static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL)
static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo, DemandedFields &Demanded, const MachineRegisterInfo *MRI)
static void doUnion(DemandedFields &A, DemandedFields B)
static cl::opt< bool > UseStrictAsserts("riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden, cl::desc("Enable strict assertion checking for the dataflow algorithm"))
static cl::opt< bool > DisableInsertVSETVLPHIOpt("riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden, cl::desc("Disable looking through phis when inserting vsetvlis."))
#define DEBUG_TYPE
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
static constexpr uint32_t Opcode
Definition: aarch32.h:200
BlockData()=default
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
reverse_iterator rend()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
unsigned succ_size() const
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
reverse_iterator rbegin()
iterator_range< pred_iterator > predecessors()
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
bool isImplicitDef() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:326
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:546
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:540
bool isRegSequence() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:553
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
static bool usesMaskPolicy(uint64_t TSFlags)
static unsigned getVLOpNum(const MCInstrDesc &Desc)
static bool doesForceTailAgnostic(uint64_t TSFlags)
static VLMUL getLMul(uint64_t TSFlags)
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
static bool hasSEWOp(uint64_t TSFlags)
static bool isTailAgnostic(unsigned VType)
static RISCVII::VLMUL getVLMUL(unsigned VType)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul)
static bool isMaskAgnostic(unsigned VType)
static bool isValidSEW(unsigned SEW)
unsigned encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic)
static unsigned getSEW(unsigned VType)
std::optional< RISCVII::VLMUL > getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL, unsigned EEW)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
bool isFaultFirstLoad(const MachineInstr &MI)
static constexpr int64_t VLMaxSentinel
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Uninitialized
Definition: Threading.h:61
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2036
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FunctionPass * createRISCVInsertVSETVLIPass()
Returns an instance of the Insert VSETVLI pass.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Status intersect(const Status &S) const