LLVM 19.0.0git
RISCVInsertVSETVLI.cpp
Go to the documentation of this file.
1//===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a function pass that inserts VSETVLI instructions where
10// needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11// instructions.
12//
13// This pass consists of 3 phases:
14//
15// Phase 1 collects how each basic block affects VL/VTYPE.
16//
17// Phase 2 uses the information from phase 1 to do a data flow analysis to
18// propagate the VL/VTYPE changes through the function. This gives us the
19// VL/VTYPE at the start of each basic block.
20//
21// Phase 3 inserts VSETVLI instructions in each basic block. Information from
22// phase 2 is used to prevent inserting a VSETVLI before the first vector
23// instruction in the block if possible.
24//
25//===----------------------------------------------------------------------===//
26
27#include "RISCV.h"
28#include "RISCVSubtarget.h"
29#include "llvm/ADT/Statistic.h"
34#include <queue>
35using namespace llvm;
36
37#define DEBUG_TYPE "riscv-insert-vsetvli"
38#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
39#define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass"
40
41STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
42STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
43
45 "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
46 cl::desc("Disable looking through phis when inserting vsetvlis."));
47
48namespace {
49
50/// Given a virtual register \p Reg, return the corresponding VNInfo for it.
51/// This will return nullptr if the virtual register is an implicit_def or
52/// if LiveIntervals is not available.
53static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI,
54 const LiveIntervals *LIS) {
55 assert(Reg.isVirtual());
56 if (!LIS)
57 return nullptr;
58 auto &LI = LIS->getInterval(Reg);
60 return LI.getVNInfoBefore(SI);
61}
62
63static unsigned getVLOpNum(const MachineInstr &MI) {
64 return RISCVII::getVLOpNum(MI.getDesc());
65}
66
67static unsigned getSEWOpNum(const MachineInstr &MI) {
68 return RISCVII::getSEWOpNum(MI.getDesc());
69}
70
71static bool isVectorConfigInstr(const MachineInstr &MI) {
72 return MI.getOpcode() == RISCV::PseudoVSETVLI ||
73 MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
74 MI.getOpcode() == RISCV::PseudoVSETIVLI;
75}
76
77/// Return true if this is 'vsetvli x0, x0, vtype' which preserves
78/// VL and only sets VTYPE.
79static bool isVLPreservingConfig(const MachineInstr &MI) {
80 if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
81 return false;
82 assert(RISCV::X0 == MI.getOperand(1).getReg());
83 return RISCV::X0 == MI.getOperand(0).getReg();
84}
85
86static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) {
87 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
88 default:
89 return false;
90 case RISCV::VFMV_S_F:
91 case RISCV::VFMV_V_F:
92 return true;
93 }
94}
95
96static bool isScalarExtractInstr(const MachineInstr &MI) {
97 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
98 default:
99 return false;
100 case RISCV::VMV_X_S:
101 case RISCV::VFMV_F_S:
102 return true;
103 }
104}
105
106static bool isScalarInsertInstr(const MachineInstr &MI) {
107 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
108 default:
109 return false;
110 case RISCV::VMV_S_X:
111 case RISCV::VFMV_S_F:
112 return true;
113 }
114}
115
116static bool isScalarSplatInstr(const MachineInstr &MI) {
117 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
118 default:
119 return false;
120 case RISCV::VMV_V_I:
121 case RISCV::VMV_V_X:
122 case RISCV::VFMV_V_F:
123 return true;
124 }
125}
126
127static bool isVSlideInstr(const MachineInstr &MI) {
128 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
129 default:
130 return false;
131 case RISCV::VSLIDEDOWN_VX:
132 case RISCV::VSLIDEDOWN_VI:
133 case RISCV::VSLIDEUP_VX:
134 case RISCV::VSLIDEUP_VI:
135 return true;
136 }
137}
138
139/// Get the EEW for a load or store instruction. Return std::nullopt if MI is
140/// not a load or store which ignores SEW.
141static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
142 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
143 default:
144 return std::nullopt;
145 case RISCV::VLE8_V:
146 case RISCV::VLSE8_V:
147 case RISCV::VSE8_V:
148 case RISCV::VSSE8_V:
149 return 8;
150 case RISCV::VLE16_V:
151 case RISCV::VLSE16_V:
152 case RISCV::VSE16_V:
153 case RISCV::VSSE16_V:
154 return 16;
155 case RISCV::VLE32_V:
156 case RISCV::VLSE32_V:
157 case RISCV::VSE32_V:
158 case RISCV::VSSE32_V:
159 return 32;
160 case RISCV::VLE64_V:
161 case RISCV::VLSE64_V:
162 case RISCV::VSE64_V:
163 case RISCV::VSSE64_V:
164 return 64;
165 }
166}
167
168static bool isNonZeroLoadImmediate(const MachineInstr &MI) {
169 return MI.getOpcode() == RISCV::ADDI &&
170 MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
171 MI.getOperand(1).getReg() == RISCV::X0 &&
172 MI.getOperand(2).getImm() != 0;
173}
174
175/// Return true if this is an operation on mask registers. Note that
176/// this includes both arithmetic/logical ops and load/store (vlm/vsm).
177static bool isMaskRegOp(const MachineInstr &MI) {
178 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
179 return false;
180 const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
181 // A Log2SEW of 0 is an operation on mask registers only.
182 return Log2SEW == 0;
183}
184
185/// Return true if the inactive elements in the result are entirely undefined.
186/// Note that this is different from "agnostic" as defined by the vector
187/// specification. Agnostic requires each lane to either be undisturbed, or
188/// take the value -1; no other value is allowed.
189static bool hasUndefinedMergeOp(const MachineInstr &MI) {
190
191 unsigned UseOpIdx;
192 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
193 // If there is no passthrough operand, then the pass through
194 // lanes are undefined.
195 return true;
196
197 // All undefined passthrus should be $noreg: see
198 // RISCVDAGToDAGISel::doPeepholeNoRegPassThru
199 const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
200 return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
201}
202
203/// Which subfields of VL or VTYPE have values we need to preserve?
204struct DemandedFields {
205 // Some unknown property of VL is used. If demanded, must preserve entire
206 // value.
207 bool VLAny = false;
208 // Only zero vs non-zero is used. If demanded, can change non-zero values.
209 bool VLZeroness = false;
210 // What properties of SEW we need to preserve.
211 enum : uint8_t {
212 SEWEqual = 3, // The exact value of SEW needs to be preserved.
213 SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater
214 // than or equal to the original value.
215 SEWGreaterThanOrEqualAndLessThan64 =
216 1, // SEW can be changed as long as it's greater
217 // than or equal to the original value, but must be less
218 // than 64.
219 SEWNone = 0 // We don't need to preserve SEW at all.
220 } SEW = SEWNone;
221 enum : uint8_t {
222 LMULEqual = 2, // The exact value of LMUL needs to be preserved.
223 LMULLessThanOrEqualToM1 = 1, // We can use any LMUL <= M1.
224 LMULNone = 0 // We don't need to preserve LMUL at all.
225 } LMUL = LMULNone;
226 bool SEWLMULRatio = false;
227 bool TailPolicy = false;
228 bool MaskPolicy = false;
229
230 // Return true if any part of VTYPE was used
231 bool usedVTYPE() const {
232 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
233 }
234
235 // Return true if any property of VL was used
236 bool usedVL() {
237 return VLAny || VLZeroness;
238 }
239
240 // Mark all VTYPE subfields and properties as demanded
241 void demandVTYPE() {
242 SEW = SEWEqual;
243 LMUL = LMULEqual;
244 SEWLMULRatio = true;
245 TailPolicy = true;
246 MaskPolicy = true;
247 }
248
249 // Mark all VL properties as demanded
250 void demandVL() {
251 VLAny = true;
252 VLZeroness = true;
253 }
254
255 static DemandedFields all() {
256 DemandedFields DF;
257 DF.demandVTYPE();
258 DF.demandVL();
259 return DF;
260 }
261
262 // Make this the result of demanding both the fields in this and B.
263 void doUnion(const DemandedFields &B) {
264 VLAny |= B.VLAny;
265 VLZeroness |= B.VLZeroness;
266 SEW = std::max(SEW, B.SEW);
267 LMUL = std::max(LMUL, B.LMUL);
268 SEWLMULRatio |= B.SEWLMULRatio;
269 TailPolicy |= B.TailPolicy;
270 MaskPolicy |= B.MaskPolicy;
271 }
272
273#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
274 /// Support for debugging, callable in GDB: V->dump()
275 LLVM_DUMP_METHOD void dump() const {
276 print(dbgs());
277 dbgs() << "\n";
278 }
279
280 /// Implement operator<<.
281 void print(raw_ostream &OS) const {
282 OS << "{";
283 OS << "VLAny=" << VLAny << ", ";
284 OS << "VLZeroness=" << VLZeroness << ", ";
285 OS << "SEW=";
286 switch (SEW) {
287 case SEWEqual:
288 OS << "SEWEqual";
289 break;
290 case SEWGreaterThanOrEqual:
291 OS << "SEWGreaterThanOrEqual";
292 break;
293 case SEWGreaterThanOrEqualAndLessThan64:
294 OS << "SEWGreaterThanOrEqualAndLessThan64";
295 break;
296 case SEWNone:
297 OS << "SEWNone";
298 break;
299 };
300 OS << ", ";
301 OS << "LMUL=";
302 switch (LMUL) {
303 case LMULEqual:
304 OS << "LMULEqual";
305 break;
306 case LMULLessThanOrEqualToM1:
307 OS << "LMULLessThanOrEqualToM1";
308 break;
309 case LMULNone:
310 OS << "LMULNone";
311 break;
312 };
313 OS << ", ";
314 OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
315 OS << "TailPolicy=" << TailPolicy << ", ";
316 OS << "MaskPolicy=" << MaskPolicy;
317 OS << "}";
318 }
319#endif
320};
321
322#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
324inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
325 DF.print(OS);
326 return OS;
327}
328#endif
329
330static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) {
331 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
332 return Fractional || LMul == 1;
333}
334
335/// Return true if moving from CurVType to NewVType is
336/// indistinguishable from the perspective of an instruction (or set
337/// of instructions) which use only the Used subfields and properties.
338static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
339 const DemandedFields &Used) {
340 switch (Used.SEW) {
341 case DemandedFields::SEWNone:
342 break;
343 case DemandedFields::SEWEqual:
344 if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
345 return false;
346 break;
347 case DemandedFields::SEWGreaterThanOrEqual:
348 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
349 return false;
350 break;
351 case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
352 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||
353 RISCVVType::getSEW(NewVType) >= 64)
354 return false;
355 break;
356 }
357
358 switch (Used.LMUL) {
359 case DemandedFields::LMULNone:
360 break;
361 case DemandedFields::LMULEqual:
362 if (RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
363 return false;
364 break;
365 case DemandedFields::LMULLessThanOrEqualToM1:
366 if (!isLMUL1OrSmaller(RISCVVType::getVLMUL(NewVType)))
367 return false;
368 break;
369 }
370
371 if (Used.SEWLMULRatio) {
372 auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
373 RISCVVType::getVLMUL(CurVType));
374 auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),
375 RISCVVType::getVLMUL(NewVType));
376 if (Ratio1 != Ratio2)
377 return false;
378 }
379
380 if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=
382 return false;
383 if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
385 return false;
386 return true;
387}
388
389/// Return the fields and properties demanded by the provided instruction.
390DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
391 // This function works in coalesceVSETVLI too. We can still use the value of a
392 // SEW, VL, or Policy operand even though it might not be the exact value in
393 // the VL or VTYPE, since we only care about what the instruction originally
394 // demanded.
395
396 // Most instructions don't use any of these subfeilds.
397 DemandedFields Res;
398 // Start conservative if registers are used
399 if (MI.isCall() || MI.isInlineAsm() ||
400 MI.readsRegister(RISCV::VL, /*TRI=*/nullptr))
401 Res.demandVL();
402 if (MI.isCall() || MI.isInlineAsm() ||
403 MI.readsRegister(RISCV::VTYPE, /*TRI=*/nullptr))
404 Res.demandVTYPE();
405 // Start conservative on the unlowered form too
406 uint64_t TSFlags = MI.getDesc().TSFlags;
407 if (RISCVII::hasSEWOp(TSFlags)) {
408 Res.demandVTYPE();
409 if (RISCVII::hasVLOp(TSFlags))
410 if (const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
411 !VLOp.isReg() || !VLOp.isUndef())
412 Res.demandVL();
413
414 // Behavior is independent of mask policy.
415 if (!RISCVII::usesMaskPolicy(TSFlags))
416 Res.MaskPolicy = false;
417 }
418
419 // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
420 // They instead demand the ratio of the two which is used in computing
421 // EMUL, but which allows us the flexibility to change SEW and LMUL
422 // provided we don't change the ratio.
423 // Note: We assume that the instructions initial SEW is the EEW encoded
424 // in the opcode. This is asserted when constructing the VSETVLIInfo.
425 if (getEEWForLoadStore(MI)) {
426 Res.SEW = DemandedFields::SEWNone;
427 Res.LMUL = DemandedFields::LMULNone;
428 }
429
430 // Store instructions don't use the policy fields.
431 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
432 Res.TailPolicy = false;
433 Res.MaskPolicy = false;
434 }
435
436 // If this is a mask reg operation, it only cares about VLMAX.
437 // TODO: Possible extensions to this logic
438 // * Probably ok if available VLMax is larger than demanded
439 // * The policy bits can probably be ignored..
440 if (isMaskRegOp(MI)) {
441 Res.SEW = DemandedFields::SEWNone;
442 Res.LMUL = DemandedFields::LMULNone;
443 }
444
445 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
446 if (isScalarInsertInstr(MI)) {
447 Res.LMUL = DemandedFields::LMULNone;
448 Res.SEWLMULRatio = false;
449 Res.VLAny = false;
450 // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't
451 // need to preserve any other bits and are thus compatible with any larger,
452 // etype and can disregard policy bits. Warning: It's tempting to try doing
453 // this for any tail agnostic operation, but we can't as TA requires
454 // tail lanes to either be the original value or -1. We are writing
455 // unknown bits to the lanes here.
456 if (hasUndefinedMergeOp(MI)) {
457 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
458 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
459 else
460 Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
461 Res.TailPolicy = false;
462 }
463 }
464
465 // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW.
466 if (isScalarExtractInstr(MI)) {
467 assert(!RISCVII::hasVLOp(TSFlags));
468 Res.LMUL = DemandedFields::LMULNone;
469 Res.SEWLMULRatio = false;
470 Res.TailPolicy = false;
471 Res.MaskPolicy = false;
472 }
473
474 if (RISCVII::hasVLOp(MI.getDesc().TSFlags)) {
475 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
476 // A slidedown/slideup with an *undefined* merge op can freely clobber
477 // elements not copied from the source vector (e.g. masked off, tail, or
478 // slideup's prefix). Notes:
479 // * We can't modify SEW here since the slide amount is in units of SEW.
480 // * VL=1 is special only because we have existing support for zero vs
481 // non-zero VL. We could generalize this if we had a VL > C predicate.
482 // * The LMUL1 restriction is for machines whose latency may depend on VL.
483 // * As above, this is only legal for tail "undefined" not "agnostic".
484 if (isVSlideInstr(MI) && VLOp.isImm() && VLOp.getImm() == 1 &&
485 hasUndefinedMergeOp(MI)) {
486 Res.VLAny = false;
487 Res.VLZeroness = true;
488 Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
489 Res.TailPolicy = false;
490 }
491
492 // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the
493 // same semantically as vmv.s.x. This is particularly useful since we don't
494 // have an immediate form of vmv.s.x, and thus frequently use vmv.v.i in
495 // it's place. Since a splat is non-constant time in LMUL, we do need to be
496 // careful to not increase the number of active vector registers (unlike for
497 // vmv.s.x.)
498 if (isScalarSplatInstr(MI) && VLOp.isImm() && VLOp.getImm() == 1 &&
499 hasUndefinedMergeOp(MI)) {
500 Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
501 Res.SEWLMULRatio = false;
502 Res.VLAny = false;
503 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
504 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
505 else
506 Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
507 Res.TailPolicy = false;
508 }
509 }
510
511 return Res;
512}
513
514/// Defines the abstract state with which the forward dataflow models the
515/// values of the VL and VTYPE registers after insertion.
516class VSETVLIInfo {
517 struct AVLDef {
518 // Every AVLDef should have a VNInfo, unless we're running without
519 // LiveIntervals in which case this will be nullptr.
520 const VNInfo *ValNo;
521 Register DefReg;
522 };
523 union {
524 AVLDef AVLRegDef;
525 unsigned AVLImm;
526 };
527
528 enum : uint8_t {
530 AVLIsReg,
531 AVLIsImm,
532 AVLIsVLMAX,
533 Unknown, // AVL and VTYPE are fully unknown
534 } State = Uninitialized;
535
536 // Fields from VTYPE.
538 uint8_t SEW = 0;
539 uint8_t TailAgnostic : 1;
540 uint8_t MaskAgnostic : 1;
541 uint8_t SEWLMULRatioOnly : 1;
542
543public:
544 VSETVLIInfo()
545 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
546 SEWLMULRatioOnly(false) {}
547
548 static VSETVLIInfo getUnknown() {
549 VSETVLIInfo Info;
550 Info.setUnknown();
551 return Info;
552 }
553
554 bool isValid() const { return State != Uninitialized; }
555 void setUnknown() { State = Unknown; }
556 bool isUnknown() const { return State == Unknown; }
557
558 void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) {
559 assert(AVLReg.isVirtual());
560 AVLRegDef.ValNo = VNInfo;
561 AVLRegDef.DefReg = AVLReg;
562 State = AVLIsReg;
563 }
564
565 void setAVLImm(unsigned Imm) {
566 AVLImm = Imm;
567 State = AVLIsImm;
568 }
569
570 void setAVLVLMAX() { State = AVLIsVLMAX; }
571
572 bool hasAVLImm() const { return State == AVLIsImm; }
573 bool hasAVLReg() const { return State == AVLIsReg; }
574 bool hasAVLVLMAX() const { return State == AVLIsVLMAX; }
575 Register getAVLReg() const {
576 assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual());
577 return AVLRegDef.DefReg;
578 }
579 unsigned getAVLImm() const {
580 assert(hasAVLImm());
581 return AVLImm;
582 }
583 const VNInfo *getAVLVNInfo() const {
584 assert(hasAVLReg());
585 return AVLRegDef.ValNo;
586 }
587 // Most AVLIsReg infos will have a single defining MachineInstr, unless it was
588 // a PHI node. In that case getAVLVNInfo()->def will point to the block
589 // boundary slot. If LiveIntervals isn't available, then nullptr is returned.
590 const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const {
591 assert(hasAVLReg());
592 if (!LIS)
593 return nullptr;
594 auto *MI = LIS->getInstructionFromIndex(getAVLVNInfo()->def);
595 assert(!(getAVLVNInfo()->isPHIDef() && MI));
596 return MI;
597 }
598
599 void setAVL(VSETVLIInfo Info) {
600 assert(Info.isValid());
601 if (Info.isUnknown())
602 setUnknown();
603 else if (Info.hasAVLReg())
604 setAVLRegDef(Info.getAVLVNInfo(), Info.getAVLReg());
605 else if (Info.hasAVLVLMAX())
606 setAVLVLMAX();
607 else {
608 assert(Info.hasAVLImm());
609 setAVLImm(Info.getAVLImm());
610 }
611 }
612
613 unsigned getSEW() const { return SEW; }
614 RISCVII::VLMUL getVLMUL() const { return VLMul; }
615 bool getTailAgnostic() const { return TailAgnostic; }
616 bool getMaskAgnostic() const { return MaskAgnostic; }
617
618 bool hasNonZeroAVL(const LiveIntervals *LIS) const {
619 if (hasAVLImm())
620 return getAVLImm() > 0;
621 if (hasAVLReg()) {
622 if (auto *DefMI = getAVLDefMI(LIS))
623 return isNonZeroLoadImmediate(*DefMI);
624 }
625 if (hasAVLVLMAX())
626 return true;
627 return false;
628 }
629
630 bool hasEquallyZeroAVL(const VSETVLIInfo &Other,
631 const LiveIntervals *LIS) const {
632 if (hasSameAVL(Other))
633 return true;
634 return (hasNonZeroAVL(LIS) && Other.hasNonZeroAVL(LIS));
635 }
636
637 bool hasSameAVLLatticeValue(const VSETVLIInfo &Other) const {
638 if (hasAVLReg() && Other.hasAVLReg()) {
639 assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
640 "we either have intervals or we don't");
641 if (!getAVLVNInfo())
642 return getAVLReg() == Other.getAVLReg();
643 return getAVLVNInfo()->id == Other.getAVLVNInfo()->id &&
644 getAVLReg() == Other.getAVLReg();
645 }
646
647 if (hasAVLImm() && Other.hasAVLImm())
648 return getAVLImm() == Other.getAVLImm();
649
650 if (hasAVLVLMAX())
651 return Other.hasAVLVLMAX() && hasSameVLMAX(Other);
652
653 return false;
654 }
655
656 // Return true if the two lattice values are guaranteed to have
657 // the same AVL value at runtime.
658 bool hasSameAVL(const VSETVLIInfo &Other) const {
659 // Without LiveIntervals, we don't know which instruction defines a
660 // register. Since a register may be redefined, this means all AVLIsReg
661 // states must be treated as possibly distinct.
662 if (hasAVLReg() && Other.hasAVLReg()) {
663 assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
664 "we either have intervals or we don't");
665 if (!getAVLVNInfo())
666 return false;
667 }
668 return hasSameAVLLatticeValue(Other);
669 }
670
671 void setVTYPE(unsigned VType) {
672 assert(isValid() && !isUnknown() &&
673 "Can't set VTYPE for uninitialized or unknown");
674 VLMul = RISCVVType::getVLMUL(VType);
675 SEW = RISCVVType::getSEW(VType);
676 TailAgnostic = RISCVVType::isTailAgnostic(VType);
677 MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
678 }
679 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
680 assert(isValid() && !isUnknown() &&
681 "Can't set VTYPE for uninitialized or unknown");
682 VLMul = L;
683 SEW = S;
684 TailAgnostic = TA;
685 MaskAgnostic = MA;
686 }
687
688 void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; }
689
690 unsigned encodeVTYPE() const {
691 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
692 "Can't encode VTYPE for uninitialized or unknown");
693 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
694 }
695
696 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
697
698 bool hasSameVTYPE(const VSETVLIInfo &Other) const {
699 assert(isValid() && Other.isValid() &&
700 "Can't compare invalid VSETVLIInfos");
701 assert(!isUnknown() && !Other.isUnknown() &&
702 "Can't compare VTYPE in unknown state");
703 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
704 "Can't compare when only LMUL/SEW ratio is valid.");
705 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
706 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
707 Other.MaskAgnostic);
708 }
709
710 unsigned getSEWLMULRatio() const {
711 assert(isValid() && !isUnknown() &&
712 "Can't use VTYPE for uninitialized or unknown");
713 return RISCVVType::getSEWLMULRatio(SEW, VLMul);
714 }
715
716 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
717 // Note that having the same VLMAX ensures that both share the same
718 // function from AVL to VL; that is, they must produce the same VL value
719 // for any given AVL value.
720 bool hasSameVLMAX(const VSETVLIInfo &Other) const {
721 assert(isValid() && Other.isValid() &&
722 "Can't compare invalid VSETVLIInfos");
723 assert(!isUnknown() && !Other.isUnknown() &&
724 "Can't compare VTYPE in unknown state");
725 return getSEWLMULRatio() == Other.getSEWLMULRatio();
726 }
727
728 bool hasCompatibleVTYPE(const DemandedFields &Used,
729 const VSETVLIInfo &Require) const {
730 return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used);
731 }
732
733 // Determine whether the vector instructions requirements represented by
734 // Require are compatible with the previous vsetvli instruction represented
735 // by this. MI is the instruction whose requirements we're considering.
736 bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
737 const LiveIntervals *LIS) const {
738 assert(isValid() && Require.isValid() &&
739 "Can't compare invalid VSETVLIInfos");
740 // Nothing is compatible with Unknown.
741 if (isUnknown() || Require.isUnknown())
742 return false;
743
744 // If only our VLMAX ratio is valid, then this isn't compatible.
745 if (SEWLMULRatioOnly || Require.SEWLMULRatioOnly)
746 return false;
747
748 if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require)))
749 return false;
750
751 if (Used.VLZeroness && !hasEquallyZeroAVL(Require, LIS))
752 return false;
753
754 return hasCompatibleVTYPE(Used, Require);
755 }
756
757 bool operator==(const VSETVLIInfo &Other) const {
758 // Uninitialized is only equal to another Uninitialized.
759 if (!isValid())
760 return !Other.isValid();
761 if (!Other.isValid())
762 return !isValid();
763
764 // Unknown is only equal to another Unknown.
765 if (isUnknown())
766 return Other.isUnknown();
767 if (Other.isUnknown())
768 return isUnknown();
769
770 if (!hasSameAVLLatticeValue(Other))
771 return false;
772
773 // If the SEWLMULRatioOnly bits are different, then they aren't equal.
774 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
775 return false;
776
777 // If only the VLMAX is valid, check that it is the same.
778 if (SEWLMULRatioOnly)
779 return hasSameVLMAX(Other);
780
781 // If the full VTYPE is valid, check that it is the same.
782 return hasSameVTYPE(Other);
783 }
784
785 bool operator!=(const VSETVLIInfo &Other) const {
786 return !(*this == Other);
787 }
788
789 // Calculate the VSETVLIInfo visible to a block assuming this and Other are
790 // both predecessors.
791 VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
792 // If the new value isn't valid, ignore it.
793 if (!Other.isValid())
794 return *this;
795
796 // If this value isn't valid, this must be the first predecessor, use it.
797 if (!isValid())
798 return Other;
799
800 // If either is unknown, the result is unknown.
801 if (isUnknown() || Other.isUnknown())
802 return VSETVLIInfo::getUnknown();
803
804 // If we have an exact, match return this.
805 if (*this == Other)
806 return *this;
807
808 // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
809 // return an SEW/LMUL ratio only value.
810 if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
811 VSETVLIInfo MergeInfo = *this;
812 MergeInfo.SEWLMULRatioOnly = true;
813 return MergeInfo;
814 }
815
816 // Otherwise the result is unknown.
817 return VSETVLIInfo::getUnknown();
818 }
819
820#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
821 /// Support for debugging, callable in GDB: V->dump()
822 LLVM_DUMP_METHOD void dump() const {
823 print(dbgs());
824 dbgs() << "\n";
825 }
826
827 /// Implement operator<<.
828 /// @{
829 void print(raw_ostream &OS) const {
830 OS << "{";
831 if (!isValid())
832 OS << "Uninitialized";
833 if (isUnknown())
834 OS << "unknown";
835 if (hasAVLReg())
836 OS << "AVLReg=" << llvm::printReg(getAVLReg());
837 if (hasAVLImm())
838 OS << "AVLImm=" << (unsigned)AVLImm;
839 if (hasAVLVLMAX())
840 OS << "AVLVLMAX";
841 OS << ", "
842 << "VLMul=" << (unsigned)VLMul << ", "
843 << "SEW=" << (unsigned)SEW << ", "
844 << "TailAgnostic=" << (bool)TailAgnostic << ", "
845 << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
846 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
847 }
848#endif
849};
850
851#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
853inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
854 V.print(OS);
855 return OS;
856}
857#endif
858
859struct BlockData {
860 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
861 // block. Calculated in Phase 2.
862 VSETVLIInfo Exit;
863
864 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
865 // blocks. Calculated in Phase 2, and used by Phase 3.
866 VSETVLIInfo Pred;
867
868 // Keeps track of whether the block is already in the queue.
869 bool InQueue = false;
870
871 BlockData() = default;
872};
873
874class RISCVInsertVSETVLI : public MachineFunctionPass {
875 const RISCVSubtarget *ST;
876 const TargetInstrInfo *TII;
878 // Possibly null!
879 LiveIntervals *LIS;
880
881 std::vector<BlockData> BlockInfo;
882 std::queue<const MachineBasicBlock *> WorkList;
883
884public:
885 static char ID;
886
887 RISCVInsertVSETVLI() : MachineFunctionPass(ID) {}
888 bool runOnMachineFunction(MachineFunction &MF) override;
889
890 void getAnalysisUsage(AnalysisUsage &AU) const override {
891 AU.setPreservesCFG();
892
898
900 }
901
902 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
903
904private:
905 bool needVSETVLI(const DemandedFields &Used, const VSETVLIInfo &Require,
906 const VSETVLIInfo &CurInfo) const;
907 bool needVSETVLIPHI(const VSETVLIInfo &Require,
908 const MachineBasicBlock &MBB) const;
909 void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
910 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
911 void insertVSETVLI(MachineBasicBlock &MBB,
913 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
914
915 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
916 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
917 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
918 VSETVLIInfo &Info) const;
919 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
920 void emitVSETVLIs(MachineBasicBlock &MBB);
921 void doPRE(MachineBasicBlock &MBB);
922 void insertReadVL(MachineBasicBlock &MBB);
923
924 bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI,
925 const DemandedFields &Used) const;
926 void coalesceVSETVLIs(MachineBasicBlock &MBB) const;
927
928 VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const;
929 VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const;
930};
931
932} // end anonymous namespace
933
934char RISCVInsertVSETVLI::ID = 0;
935char &llvm::RISCVInsertVSETVLIID = RISCVInsertVSETVLI::ID;
936
938 false, false)
939
940// Return a VSETVLIInfo representing the changes made by this VSETVLI or
941// VSETIVLI instruction.
942VSETVLIInfo
943RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const {
944 VSETVLIInfo NewInfo;
945 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
946 NewInfo.setAVLImm(MI.getOperand(1).getImm());
947 } else {
948 assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
949 MI.getOpcode() == RISCV::PseudoVSETVLIX0);
950 Register AVLReg = MI.getOperand(1).getReg();
951 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
952 "Can't handle X0, X0 vsetvli yet");
953 if (AVLReg == RISCV::X0)
954 NewInfo.setAVLVLMAX();
955 else if (MI.getOperand(1).isUndef())
956 // Otherwise use an AVL of 1 to avoid depending on previous vl.
957 NewInfo.setAVLImm(1);
958 else {
959 VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS);
960 NewInfo.setAVLRegDef(VNI, AVLReg);
961 }
962 }
963 NewInfo.setVTYPE(MI.getOperand(2).getImm());
964
965 // If AVL is defined by a vsetvli with the same VLMAX, we can replace the
966 // AVL operand with the AVL of the defining vsetvli.
967 if (NewInfo.hasAVLReg()) {
968 if (const MachineInstr *DefMI = NewInfo.getAVLDefMI(LIS);
969 DefMI && isVectorConfigInstr(*DefMI)) {
970 VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI);
971 if (DefInstrInfo.hasSameVLMAX(NewInfo))
972 NewInfo.setAVL(DefInstrInfo);
973 }
974 }
975
976 return NewInfo;
977}
978
979static unsigned computeVLMAX(unsigned VLEN, unsigned SEW,
980 RISCVII::VLMUL VLMul) {
981 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul);
982 if (Fractional)
983 VLEN = VLEN / LMul;
984 else
985 VLEN = VLEN * LMul;
986 return VLEN/SEW;
987}
988
989VSETVLIInfo
990RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
991 VSETVLIInfo InstrInfo;
992 const uint64_t TSFlags = MI.getDesc().TSFlags;
993
994 bool TailAgnostic = true;
995 bool MaskAgnostic = true;
996 if (!hasUndefinedMergeOp(MI)) {
997 // Start with undisturbed.
998 TailAgnostic = false;
999 MaskAgnostic = false;
1000
1001 // If there is a policy operand, use it.
1002 if (RISCVII::hasVecPolicyOp(TSFlags)) {
1003 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
1004 uint64_t Policy = Op.getImm();
1006 "Invalid Policy Value");
1007 TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
1008 MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
1009 }
1010
1011 // Some pseudo instructions force a tail agnostic policy despite having a
1012 // tied def.
1013 if (RISCVII::doesForceTailAgnostic(TSFlags))
1014 TailAgnostic = true;
1015
1016 if (!RISCVII::usesMaskPolicy(TSFlags))
1017 MaskAgnostic = true;
1018 }
1019
1020 RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
1021
1022 unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
1023 // A Log2SEW of 0 is an operation on mask registers only.
1024 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
1025 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
1026
1027 if (RISCVII::hasVLOp(TSFlags)) {
1028 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1029 if (VLOp.isImm()) {
1030 int64_t Imm = VLOp.getImm();
1031 // Conver the VLMax sentintel to X0 register.
1032 if (Imm == RISCV::VLMaxSentinel) {
1033 // If we know the exact VLEN, see if we can use the constant encoding
1034 // for the VLMAX instead. This reduces register pressure slightly.
1035 const unsigned VLMAX = computeVLMAX(ST->getRealMaxVLen(), SEW, VLMul);
1036 if (ST->getRealMinVLen() == ST->getRealMaxVLen() && VLMAX <= 31)
1037 InstrInfo.setAVLImm(VLMAX);
1038 else
1039 InstrInfo.setAVLVLMAX();
1040 }
1041 else
1042 InstrInfo.setAVLImm(Imm);
1043 } else if (VLOp.isUndef()) {
1044 // Otherwise use an AVL of 1 to avoid depending on previous vl.
1045 InstrInfo.setAVLImm(1);
1046 } else {
1047 VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS);
1048 InstrInfo.setAVLRegDef(VNI, VLOp.getReg());
1049 }
1050 } else {
1051 assert(isScalarExtractInstr(MI));
1052 // Pick a random value for state tracking purposes, will be ignored via
1053 // the demanded fields mechanism
1054 InstrInfo.setAVLImm(1);
1055 }
1056#ifndef NDEBUG
1057 if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
1058 assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
1059 }
1060#endif
1061 InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
1062
1063 // If AVL is defined by a vsetvli with the same VLMAX, we can replace the
1064 // AVL operand with the AVL of the defining vsetvli.
1065 if (InstrInfo.hasAVLReg()) {
1066 if (const MachineInstr *DefMI = InstrInfo.getAVLDefMI(LIS);
1067 DefMI && isVectorConfigInstr(*DefMI)) {
1068 VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI);
1069 if (DefInstrInfo.hasSameVLMAX(InstrInfo))
1070 InstrInfo.setAVL(DefInstrInfo);
1071 }
1072 }
1073
1074 return InstrInfo;
1075}
1076
1077void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
1078 const VSETVLIInfo &Info,
1079 const VSETVLIInfo &PrevInfo) {
1080 DebugLoc DL = MI.getDebugLoc();
1081 insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
1082}
1083
1084void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
1086 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
1087
1088 ++NumInsertedVSETVL;
1089 if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
1090 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
1091 // VLMAX.
1092 if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
1093 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1095 .addReg(RISCV::X0, RegState::Kill)
1096 .addImm(Info.encodeVTYPE())
1097 .addReg(RISCV::VL, RegState::Implicit);
1098 if (LIS)
1100 return;
1101 }
1102
1103 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
1104 // it has the same VLMAX we want and the last VL/VTYPE we observed is the
1105 // same, we can use the X0, X0 form.
1106 if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) {
1107 if (const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
1108 DefMI && isVectorConfigInstr(*DefMI)) {
1109 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1110 if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
1111 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1113 .addReg(RISCV::X0, RegState::Kill)
1114 .addImm(Info.encodeVTYPE())
1115 .addReg(RISCV::VL, RegState::Implicit);
1116 if (LIS)
1118 return;
1119 }
1120 }
1121 }
1122 }
1123
1124 if (Info.hasAVLImm()) {
1125 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
1127 .addImm(Info.getAVLImm())
1128 .addImm(Info.encodeVTYPE());
1129 if (LIS)
1131 return;
1132 }
1133
1134 if (Info.hasAVLVLMAX()) {
1135 Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
1136 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1138 .addReg(RISCV::X0, RegState::Kill)
1139 .addImm(Info.encodeVTYPE());
1140 if (LIS) {
1142 LIS->createAndComputeVirtRegInterval(DestReg);
1143 }
1144 return;
1145 }
1146
1147 Register AVLReg = Info.getAVLReg();
1148 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
1149 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI))
1151 .addReg(AVLReg)
1152 .addImm(Info.encodeVTYPE());
1153 if (LIS) {
1155 // Normally the AVL's live range will already extend past the inserted
1156 // vsetvli because the pseudos below will already use the AVL. But this
1157 // isn't always the case, e.g. PseudoVMV_X_S doesn't have an AVL operand or
1158 // we've taken the AVL from the VL output of another vsetvli.
1159 LiveInterval &LI = LIS->getInterval(AVLReg);
1160 // Need to get non-const VNInfo
1161 VNInfo *VNI = LI.getValNumInfo(Info.getAVLVNInfo()->id);
1163 VNI->def, LIS->getInstructionIndex(*MI).getRegSlot(), VNI));
1164 }
1165}
1166
1167/// Return true if a VSETVLI is required to transition from CurInfo to Require
1168/// given a set of DemandedFields \p Used.
1169bool RISCVInsertVSETVLI::needVSETVLI(const DemandedFields &Used,
1170 const VSETVLIInfo &Require,
1171 const VSETVLIInfo &CurInfo) const {
1172 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
1173 return true;
1174
1175 if (CurInfo.isCompatible(Used, Require, LIS))
1176 return false;
1177
1178 return true;
1179}
1180
1181// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1182// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1183// places.
1184static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
1185 DemandedFields &Demanded) {
1186 VSETVLIInfo Info = NewInfo;
1187
1188 if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
1189 !PrevInfo.isUnknown()) {
1190 if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
1191 PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
1192 Info.setVLMul(*NewVLMul);
1193 Demanded.LMUL = DemandedFields::LMULEqual;
1194 }
1195
1196 return Info;
1197}
1198
1199// Given an incoming state reaching MI, minimally modifies that state so that it
1200// is compatible with MI. The resulting state is guaranteed to be semantically
1201// legal for MI, but may not be the state requested by MI.
1202void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
1203 const MachineInstr &MI) const {
1204 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1205 return;
1206
1207 DemandedFields Demanded = getDemanded(MI, ST);
1208
1209 const VSETVLIInfo NewInfo = computeInfoForInstr(MI);
1210 assert(NewInfo.isValid() && !NewInfo.isUnknown());
1211 if (Info.isValid() && !needVSETVLI(Demanded, NewInfo, Info))
1212 return;
1213
1214 const VSETVLIInfo PrevInfo = Info;
1215 if (!Info.isValid() || Info.isUnknown())
1216 Info = NewInfo;
1217
1218 const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
1219
1220 // If MI only demands that VL has the same zeroness, we only need to set the
1221 // AVL if the zeroness differs. This removes a vsetvli entirely if the types
1222 // match or allows use of cheaper avl preserving variant if VLMAX doesn't
1223 // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
1224 // variant, so we avoid the transform to prevent extending live range of an
1225 // avl register operand.
1226 // TODO: We can probably relax this for immediates.
1227 bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, LIS) &&
1228 IncomingInfo.hasSameVLMAX(PrevInfo);
1229 if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
1230 Info.setAVL(IncomingInfo);
1231
1232 Info.setVTYPE(
1233 ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
1234 .getVLMUL(),
1235 ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
1236 // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
1237 // if needed.
1238 (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
1239 IncomingInfo.getTailAgnostic(),
1240 (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
1241 IncomingInfo.getMaskAgnostic());
1242
1243 // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
1244 // the AVL.
1245 if (Info.hasSEWLMULRatioOnly()) {
1246 VSETVLIInfo RatiolessInfo = IncomingInfo;
1247 RatiolessInfo.setAVL(Info);
1248 Info = RatiolessInfo;
1249 }
1250}
1251
1252// Given a state with which we evaluated MI (see transferBefore above for why
1253// this might be different that the state MI requested), modify the state to
1254// reflect the changes MI might make.
1255void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
1256 const MachineInstr &MI) const {
1257 if (isVectorConfigInstr(MI)) {
1258 Info = getInfoForVSETVLI(MI);
1259 return;
1260 }
1261
1263 // Update AVL to vl-output of the fault first load.
1264 assert(MI.getOperand(1).getReg().isVirtual());
1265 if (LIS) {
1266 auto &LI = LIS->getInterval(MI.getOperand(1).getReg());
1267 SlotIndex SI =
1269 VNInfo *VNI = LI.getVNInfoAt(SI);
1270 Info.setAVLRegDef(VNI, MI.getOperand(1).getReg());
1271 } else
1272 Info.setAVLRegDef(nullptr, MI.getOperand(1).getReg());
1273 return;
1274 }
1275
1276 // If this is something that updates VL/VTYPE that we don't know about, set
1277 // the state to unknown.
1278 if (MI.isCall() || MI.isInlineAsm() ||
1279 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1280 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1281 Info = VSETVLIInfo::getUnknown();
1282}
1283
1284bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
1285 VSETVLIInfo &Info) const {
1286 bool HadVectorOp = false;
1287
1288 Info = BlockInfo[MBB.getNumber()].Pred;
1289 for (const MachineInstr &MI : MBB) {
1290 transferBefore(Info, MI);
1291
1292 if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1293 HadVectorOp = true;
1294
1295 transferAfter(Info, MI);
1296 }
1297
1298 return HadVectorOp;
1299}
1300
1301void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1302
1303 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1304
1305 BBInfo.InQueue = false;
1306
1307 // Start with the previous entry so that we keep the most conservative state
1308 // we have ever found.
1309 VSETVLIInfo InInfo = BBInfo.Pred;
1310 if (MBB.pred_empty()) {
1311 // There are no predecessors, so use the default starting status.
1312 InInfo.setUnknown();
1313 } else {
1315 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
1316 }
1317
1318 // If we don't have any valid predecessor value, wait until we do.
1319 if (!InInfo.isValid())
1320 return;
1321
1322 // If no change, no need to rerun block
1323 if (InInfo == BBInfo.Pred)
1324 return;
1325
1326 BBInfo.Pred = InInfo;
1327 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1328 << " changed to " << BBInfo.Pred << "\n");
1329
1330 // Note: It's tempting to cache the state changes here, but due to the
1331 // compatibility checks performed a blocks output state can change based on
1332 // the input state. To cache, we'd have to add logic for finding
1333 // never-compatible state changes.
1334 VSETVLIInfo TmpStatus;
1335 computeVLVTYPEChanges(MBB, TmpStatus);
1336
1337 // If the new exit value matches the old exit value, we don't need to revisit
1338 // any blocks.
1339 if (BBInfo.Exit == TmpStatus)
1340 return;
1341
1342 BBInfo.Exit = TmpStatus;
1343 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1344 << " changed to " << BBInfo.Exit << "\n");
1345
1346 // Add the successors to the work list so we can propagate the changed exit
1347 // status.
1348 for (MachineBasicBlock *S : MBB.successors())
1349 if (!BlockInfo[S->getNumber()].InQueue) {
1350 BlockInfo[S->getNumber()].InQueue = true;
1351 WorkList.push(S);
1352 }
1353}
1354
1355// If we weren't able to prove a vsetvli was directly unneeded, it might still
1356// be unneeded if the AVL was a phi node where all incoming values are VL
1357// outputs from the last VSETVLI in their respective basic blocks.
1358bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1359 const MachineBasicBlock &MBB) const {
1361 return true;
1362
1363 if (!Require.hasAVLReg())
1364 return true;
1365
1366 if (!LIS)
1367 return true;
1368
1369 // We need the AVL to have been produced by a PHI node in this basic block.
1370 const VNInfo *Valno = Require.getAVLVNInfo();
1371 if (!Valno->isPHIDef() || LIS->getMBBFromIndex(Valno->def) != &MBB)
1372 return true;
1373
1374 const LiveRange &LR = LIS->getInterval(Require.getAVLReg());
1375
1376 for (auto *PBB : MBB.predecessors()) {
1377 const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit;
1378
1379 // We need the PHI input to the be the output of a VSET(I)VLI.
1380 const VNInfo *Value = LR.getVNInfoBefore(LIS->getMBBEndIdx(PBB));
1381 if (!Value)
1382 return true;
1384 if (!DefMI || !isVectorConfigInstr(*DefMI))
1385 return true;
1386
1387 // We found a VSET(I)VLI make sure it matches the output of the
1388 // predecessor block.
1389 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1390 if (DefInfo != PBBExit)
1391 return true;
1392
1393 // Require has the same VL as PBBExit, so if the exit from the
1394 // predecessor has the VTYPE we are looking for we might be able
1395 // to avoid a VSETVLI.
1396 if (PBBExit.isUnknown() || !PBBExit.hasSameVTYPE(Require))
1397 return true;
1398 }
1399
1400 // If all the incoming values to the PHI checked out, we don't need
1401 // to insert a VSETVLI.
1402 return false;
1403}
1404
1405void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1406 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1407 // Track whether the prefix of the block we've scanned is transparent
1408 // (meaning has not yet changed the abstract state).
1409 bool PrefixTransparent = true;
1410 for (MachineInstr &MI : MBB) {
1411 const VSETVLIInfo PrevInfo = CurInfo;
1412 transferBefore(CurInfo, MI);
1413
1414 // If this is an explicit VSETVLI or VSETIVLI, update our state.
1415 if (isVectorConfigInstr(MI)) {
1416 // Conservatively, mark the VL and VTYPE as live.
1417 assert(MI.getOperand(3).getReg() == RISCV::VL &&
1418 MI.getOperand(4).getReg() == RISCV::VTYPE &&
1419 "Unexpected operands where VL and VTYPE should be");
1420 MI.getOperand(3).setIsDead(false);
1421 MI.getOperand(4).setIsDead(false);
1422 PrefixTransparent = false;
1423 }
1424
1425 uint64_t TSFlags = MI.getDesc().TSFlags;
1426 if (RISCVII::hasSEWOp(TSFlags)) {
1427 if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) {
1428 // If this is the first implicit state change, and the state change
1429 // requested can be proven to produce the same register contents, we
1430 // can skip emitting the actual state change and continue as if we
1431 // had since we know the GPR result of the implicit state change
1432 // wouldn't be used and VL/VTYPE registers are correct. Note that
1433 // we *do* need to model the state as if it changed as while the
1434 // register contents are unchanged, the abstract model can change.
1435 if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
1436 insertVSETVLI(MBB, MI, CurInfo, PrevInfo);
1437 PrefixTransparent = false;
1438 }
1439
1440 if (RISCVII::hasVLOp(TSFlags)) {
1441 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1442 if (VLOp.isReg()) {
1443 Register Reg = VLOp.getReg();
1444
1445 // Erase the AVL operand from the instruction.
1446 VLOp.setReg(RISCV::NoRegister);
1447 VLOp.setIsKill(false);
1448 if (LIS) {
1449 LiveInterval &LI = LIS->getInterval(Reg);
1451 LIS->shrinkToUses(&LI, &DeadMIs);
1452 // We might have separate components that need split due to
1453 // needVSETVLIPHI causing us to skip inserting a new VL def.
1455 LIS->splitSeparateComponents(LI, SplitLIs);
1456
1457 // If the AVL was an immediate > 31, then it would have been emitted
1458 // as an ADDI. However, the ADDI might not have been used in the
1459 // vsetvli, or a vsetvli might not have been emitted, so it may be
1460 // dead now.
1461 for (MachineInstr *DeadMI : DeadMIs) {
1462 if (!TII->isAddImmediate(*DeadMI, Reg))
1463 continue;
1464 LIS->RemoveMachineInstrFromMaps(*DeadMI);
1465 DeadMI->eraseFromParent();
1466 }
1467 }
1468 }
1469 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1470 /*isImp*/ true));
1471 }
1472 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1473 /*isImp*/ true));
1474 }
1475
1476 if (MI.isCall() || MI.isInlineAsm() ||
1477 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1478 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1479 PrefixTransparent = false;
1480
1481 transferAfter(CurInfo, MI);
1482 }
1483
1484 const auto &Info = BlockInfo[MBB.getNumber()];
1485 if (CurInfo != Info.Exit) {
1486 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1487 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
1488 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n");
1489 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n");
1490 }
1491 assert(CurInfo == Info.Exit && "InsertVSETVLI dataflow invariant violated");
1492}
1493
1494/// Perform simple partial redundancy elimination of the VSETVLI instructions
1495/// we're about to insert by looking for cases where we can PRE from the
1496/// beginning of one block to the end of one of its predecessors. Specifically,
1497/// this is geared to catch the common case of a fixed length vsetvl in a single
1498/// block loop when it could execute once in the preheader instead.
1499void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1500 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1501 return;
1502
1503 MachineBasicBlock *UnavailablePred = nullptr;
1504 VSETVLIInfo AvailableInfo;
1505 for (MachineBasicBlock *P : MBB.predecessors()) {
1506 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1507 if (PredInfo.isUnknown()) {
1508 if (UnavailablePred)
1509 return;
1510 UnavailablePred = P;
1511 } else if (!AvailableInfo.isValid()) {
1512 AvailableInfo = PredInfo;
1513 } else if (AvailableInfo != PredInfo) {
1514 return;
1515 }
1516 }
1517
1518 // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1519 // phase 3.
1520 if (!UnavailablePred || !AvailableInfo.isValid())
1521 return;
1522
1523 if (!LIS)
1524 return;
1525
1526 // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of
1527 // the unavailable pred.
1528 if (AvailableInfo.hasSEWLMULRatioOnly())
1529 return;
1530
1531 // Critical edge - TODO: consider splitting?
1532 if (UnavailablePred->succ_size() != 1)
1533 return;
1534
1535 // If the AVL value is a register (other than our VLMAX sentinel),
1536 // we need to prove the value is available at the point we're going
1537 // to insert the vsetvli at.
1538 if (AvailableInfo.hasAVLReg()) {
1539 SlotIndex SI = AvailableInfo.getAVLVNInfo()->def;
1540 // This is an inline dominance check which covers the case of
1541 // UnavailablePred being the preheader of a loop.
1542 if (LIS->getMBBFromIndex(SI) != UnavailablePred)
1543 return;
1544 if (!UnavailablePred->terminators().empty() &&
1545 SI >= LIS->getInstructionIndex(*UnavailablePred->getFirstTerminator()))
1546 return;
1547 }
1548
1549 // Model the effect of changing the input state of the block MBB to
1550 // AvailableInfo. We're looking for two issues here; one legality,
1551 // one profitability.
1552 // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1553 // may hit the end of the block with a different end state. We can
1554 // not make this change without reflowing later blocks as well.
1555 // 2) If we don't actually remove a transition, inserting a vsetvli
1556 // into the predecessor block would be correct, but unprofitable.
1557 VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
1558 VSETVLIInfo CurInfo = AvailableInfo;
1559 int TransitionsRemoved = 0;
1560 for (const MachineInstr &MI : MBB) {
1561 const VSETVLIInfo LastInfo = CurInfo;
1562 const VSETVLIInfo LastOldInfo = OldInfo;
1563 transferBefore(CurInfo, MI);
1564 transferBefore(OldInfo, MI);
1565 if (CurInfo == LastInfo)
1566 TransitionsRemoved++;
1567 if (LastOldInfo == OldInfo)
1568 TransitionsRemoved--;
1569 transferAfter(CurInfo, MI);
1570 transferAfter(OldInfo, MI);
1571 if (CurInfo == OldInfo)
1572 // Convergence. All transitions after this must match by construction.
1573 break;
1574 }
1575 if (CurInfo != OldInfo || TransitionsRemoved <= 0)
1576 // Issues 1 and 2 above
1577 return;
1578
1579 // Finally, update both data flow state and insert the actual vsetvli.
1580 // Doing both keeps the code in sync with the dataflow results, which
1581 // is critical for correctness of phase 3.
1582 auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
1583 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1584 << UnavailablePred->getName() << " with state "
1585 << AvailableInfo << "\n");
1586 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1587 BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1588
1589 // Note there's an implicit assumption here that terminators never use
1590 // or modify VL or VTYPE. Also, fallthrough will return end().
1591 auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1592 insertVSETVLI(*UnavailablePred, InsertPt,
1593 UnavailablePred->findDebugLoc(InsertPt),
1594 AvailableInfo, OldExit);
1595}
1596
1597// Return true if we can mutate PrevMI to match MI without changing any the
1598// fields which would be observed.
1599bool RISCVInsertVSETVLI::canMutatePriorConfig(
1600 const MachineInstr &PrevMI, const MachineInstr &MI,
1601 const DemandedFields &Used) const {
1602 // If the VL values aren't equal, return false if either a) the former is
1603 // demanded, or b) we can't rewrite the former to be the later for
1604 // implementation reasons.
1605 if (!isVLPreservingConfig(MI)) {
1606 if (Used.VLAny)
1607 return false;
1608
1609 if (Used.VLZeroness) {
1610 if (isVLPreservingConfig(PrevMI))
1611 return false;
1612 if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI),
1613 LIS))
1614 return false;
1615 }
1616
1617 auto &AVL = MI.getOperand(1);
1618 auto &PrevAVL = PrevMI.getOperand(1);
1619
1620 // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
1621 // For now just check that PrevMI uses the same virtual register.
1622 if (AVL.isReg() && AVL.getReg() != RISCV::X0 &&
1623 (!MRI->hasOneDef(AVL.getReg()) || !PrevAVL.isReg() ||
1624 PrevAVL.getReg() != AVL.getReg()))
1625 return false;
1626 }
1627
1628 assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm());
1629 auto PriorVType = PrevMI.getOperand(2).getImm();
1630 auto VType = MI.getOperand(2).getImm();
1631 return areCompatibleVTYPEs(PriorVType, VType, Used);
1632}
1633
1634void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
1635 MachineInstr *NextMI = nullptr;
1636 // We can have arbitrary code in successors, so VL and VTYPE
1637 // must be considered demanded.
1638 DemandedFields Used;
1639 Used.demandVL();
1640 Used.demandVTYPE();
1642 for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1643
1644 if (!isVectorConfigInstr(MI)) {
1645 Used.doUnion(getDemanded(MI, ST));
1646 if (MI.isCall() || MI.isInlineAsm() ||
1647 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1648 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1649 NextMI = nullptr;
1650 continue;
1651 }
1652
1653 if (!MI.getOperand(0).isDead())
1654 Used.demandVL();
1655
1656 if (NextMI) {
1657 if (!Used.usedVL() && !Used.usedVTYPE()) {
1658 ToDelete.push_back(&MI);
1659 // Leave NextMI unchanged
1660 continue;
1661 }
1662
1663 if (canMutatePriorConfig(MI, *NextMI, Used)) {
1664 if (!isVLPreservingConfig(*NextMI)) {
1665 Register DefReg = NextMI->getOperand(0).getReg();
1666
1667 MI.getOperand(0).setReg(DefReg);
1668 MI.getOperand(0).setIsDead(false);
1669
1670 // The def of DefReg moved to MI, so extend the LiveInterval up to
1671 // it.
1672 if (DefReg.isVirtual() && LIS) {
1673 LiveInterval &DefLI = LIS->getInterval(DefReg);
1674 SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
1675 VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
1676 LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
1677 DefLI.addSegment(S);
1678 DefVNI->def = MISlot;
1679 // Mark DefLI as spillable if it was previously unspillable
1680 DefLI.setWeight(0);
1681
1682 // DefReg may have had no uses, in which case we need to shrink
1683 // the LiveInterval up to MI.
1684 LIS->shrinkToUses(&DefLI);
1685 }
1686
1687 Register OldVLReg;
1688 if (MI.getOperand(1).isReg())
1689 OldVLReg = MI.getOperand(1).getReg();
1690 if (NextMI->getOperand(1).isImm())
1691 MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
1692 else
1693 MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
1694
1695 // Clear NextMI's AVL early so we're not counting it as a use.
1696 if (NextMI->getOperand(1).isReg())
1697 NextMI->getOperand(1).setReg(RISCV::NoRegister);
1698
1699 if (OldVLReg && OldVLReg.isVirtual()) {
1700 // NextMI no longer uses OldVLReg so shrink its LiveInterval.
1701 if (LIS)
1702 LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
1703
1704 MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
1705 if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
1706 MRI->use_nodbg_empty(OldVLReg)) {
1707 VLOpDef->eraseFromParent();
1708 if (LIS)
1709 LIS->removeInterval(OldVLReg);
1710 }
1711 }
1712 MI.setDesc(NextMI->getDesc());
1713 }
1714 MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
1715 ToDelete.push_back(NextMI);
1716 // fallthrough
1717 }
1718 }
1719 NextMI = &MI;
1720 Used = getDemanded(MI, ST);
1721 }
1722
1723 NumCoalescedVSETVL += ToDelete.size();
1724 for (auto *MI : ToDelete) {
1725 if (LIS)
1727 MI->eraseFromParent();
1728 }
1729}
1730
1731void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1732 for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1733 MachineInstr &MI = *I++;
1735 Register VLOutput = MI.getOperand(1).getReg();
1736 assert(VLOutput.isVirtual());
1737 if (!MI.getOperand(1).isDead()) {
1738 auto ReadVLMI = BuildMI(MBB, I, MI.getDebugLoc(),
1739 TII->get(RISCV::PseudoReadVL), VLOutput);
1740 // Move the LiveInterval's definition down to PseudoReadVL.
1741 if (LIS) {
1742 SlotIndex NewDefSI =
1743 LIS->InsertMachineInstrInMaps(*ReadVLMI).getRegSlot();
1744 LiveInterval &DefLI = LIS->getInterval(VLOutput);
1745 VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
1746 DefLI.removeSegment(DefLI.beginIndex(), NewDefSI);
1747 DefVNI->def = NewDefSI;
1748 }
1749 }
1750 // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1751 MI.getOperand(1).setReg(RISCV::X0);
1752 }
1753 }
1754}
1755
1756bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1757 // Skip if the vector extension is not enabled.
1759 if (!ST->hasVInstructions())
1760 return false;
1761
1762 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1763
1764 TII = ST->getInstrInfo();
1765 MRI = &MF.getRegInfo();
1766 LIS = getAnalysisIfAvailable<LiveIntervals>();
1767
1768 assert(BlockInfo.empty() && "Expect empty block infos");
1769 BlockInfo.resize(MF.getNumBlockIDs());
1770
1771 bool HaveVectorOp = false;
1772
1773 // Phase 1 - determine how VL/VTYPE are affected by the each block.
1774 for (const MachineBasicBlock &MBB : MF) {
1775 VSETVLIInfo TmpStatus;
1776 HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus);
1777 // Initial exit state is whatever change we found in the block.
1778 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1779 BBInfo.Exit = TmpStatus;
1780 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1781 << " is " << BBInfo.Exit << "\n");
1782
1783 }
1784
1785 // If we didn't find any instructions that need VSETVLI, we're done.
1786 if (!HaveVectorOp) {
1787 BlockInfo.clear();
1788 return false;
1789 }
1790
1791 // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1792 // blocks to the list here, but will also add any that need to be revisited
1793 // during Phase 2 processing.
1794 for (const MachineBasicBlock &MBB : MF) {
1795 WorkList.push(&MBB);
1796 BlockInfo[MBB.getNumber()].InQueue = true;
1797 }
1798 while (!WorkList.empty()) {
1799 const MachineBasicBlock &MBB = *WorkList.front();
1800 WorkList.pop();
1801 computeIncomingVLVTYPE(MBB);
1802 }
1803
1804 // Perform partial redundancy elimination of vsetvli transitions.
1805 for (MachineBasicBlock &MBB : MF)
1806 doPRE(MBB);
1807
1808 // Phase 3 - add any vsetvli instructions needed in the block. Use the
1809 // Phase 2 information to avoid adding vsetvlis before the first vector
1810 // instruction in the block if the VL/VTYPE is satisfied by its
1811 // predecessors.
1812 for (MachineBasicBlock &MBB : MF)
1813 emitVSETVLIs(MBB);
1814
1815 // Now that all vsetvlis are explicit, go through and do block local
1816 // DSE and peephole based demanded fields based transforms. Note that
1817 // this *must* be done outside the main dataflow so long as we allow
1818 // any cross block analysis within the dataflow. We can't have both
1819 // demanded fields based mutation and non-local analysis in the
1820 // dataflow at the same time without introducing inconsistencies.
1821 for (MachineBasicBlock &MBB : MF)
1822 coalesceVSETVLIs(MBB);
1823
1824 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1825 // of VLEFF/VLSEGFF.
1826 for (MachineBasicBlock &MBB : MF)
1827 insertReadVL(MBB);
1828
1829 BlockInfo.clear();
1830 return HaveVectorOp;
1831}
1832
1833/// Returns an instance of the Insert VSETVLI pass.
1835 return new RISCVInsertVSETVLI();
1836}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
aarch64 promote const
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_ATTRIBUTE_USED
Definition: Compiler.h:151
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:537
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static RegisterPass< DebugifyFunctionPass > DF("debugify-function", "Attach debug info to a function")
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1293
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static ValueLatticeElement intersect(const ValueLatticeElement &A, const ValueLatticeElement &B)
Combine two sets of facts about the same value into a single set of facts.
#define I(x, y, z)
Definition: MD5.cpp:58
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
#define RISCV_INSERT_VSETVLI_NAME
static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo, DemandedFields &Demanded)
static cl::opt< bool > DisableInsertVSETVLPHIOpt("riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden, cl::desc("Disable looking through phis when inserting vsetvlis."))
#define DEBUG_TYPE
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
BlockData()=default
Represent the analysis usage information of a pass.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
void setWeight(float Value)
Definition: LiveInterval.h:721
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
SlotIndexes * getSlotIndexes() const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
void removeInterval(Register Reg)
Interval removal.
bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
void splitSeparateComponents(LiveInterval &LI, SmallVectorImpl< LiveInterval * > &SplitLIs)
Split separate components in LiveInterval LI into separate intervals.
MachineBasicBlock * getMBBFromIndex(SlotIndex index) const
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
VNInfo * getValNumInfo(unsigned ValNo)
getValNumInfo - Returns pointer to the specified val#.
Definition: LiveInterval.h:317
iterator addSegment(Segment S)
Add the specified Segment to this range, merging segments as appropriate.
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx,...
Definition: LiveInterval.h:429
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:385
void removeSegment(SlotIndex Start, SlotIndex End, bool RemoveDeadValNo=false)
Remove the specified interval from this live range.
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:421
reverse_iterator rend()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
reverse_iterator rbegin()
iterator_range< pred_iterator > predecessors()
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:566
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:64
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:236
SlotIndexes pass.
Definition: SlotIndexes.h:296
SlotIndex getInstructionIndex(const MachineInstr &MI, bool IgnoreBundle=false) const
Returns the base index for the given instruction.
Definition: SlotIndexes.h:367
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:78
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
static bool usesMaskPolicy(uint64_t TSFlags)
static unsigned getVLOpNum(const MCInstrDesc &Desc)
static bool doesForceTailAgnostic(uint64_t TSFlags)
static VLMUL getLMul(uint64_t TSFlags)
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
static bool hasSEWOp(uint64_t TSFlags)
static bool isTailAgnostic(unsigned VType)
static RISCVII::VLMUL getVLMUL(unsigned VType)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul)
static bool isMaskAgnostic(unsigned VType)
static bool isValidSEW(unsigned SEW)
unsigned encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic)
static unsigned getSEW(unsigned VType)
std::optional< RISCVII::VLMUL > getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL, unsigned EEW)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
bool isFaultFirstLoad(const MachineInstr &MI)
static constexpr int64_t VLMaxSentinel
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Uninitialized
Definition: Threading.h:61
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2058
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FunctionPass * createRISCVInsertVSETVLIPass()
Returns an instance of the Insert VSETVLI pass.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
char & RISCVInsertVSETVLIID
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Status intersect(const Status &S) const
This represents a simple continuous liveness interval for a value.
Definition: LiveInterval.h:162