LLVM 20.0.0git
RISCVInsertVSETVLI.cpp
Go to the documentation of this file.
1//===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a function pass that inserts VSETVLI instructions where
10// needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11// instructions.
12//
13// This pass consists of 3 phases:
14//
15// Phase 1 collects how each basic block affects VL/VTYPE.
16//
17// Phase 2 uses the information from phase 1 to do a data flow analysis to
18// propagate the VL/VTYPE changes through the function. This gives us the
19// VL/VTYPE at the start of each basic block.
20//
21// Phase 3 inserts VSETVLI instructions in each basic block. Information from
22// phase 2 is used to prevent inserting a VSETVLI before the first vector
23// instruction in the block if possible.
24//
25//===----------------------------------------------------------------------===//
26
27#include "RISCV.h"
28#include "RISCVSubtarget.h"
29#include "llvm/ADT/Statistic.h"
34#include <queue>
35using namespace llvm;
36
37#define DEBUG_TYPE "riscv-insert-vsetvli"
38#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
39
40STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
41STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
42
44 DEBUG_TYPE "-whole-vector-register-move-valid-vtype", cl::Hidden,
45 cl::desc("Insert vsetvlis before vmvNr.vs to ensure vtype is valid and "
46 "vill is cleared"),
47 cl::init(true));
48
49namespace {
50
51/// Given a virtual register \p Reg, return the corresponding VNInfo for it.
52/// This will return nullptr if the virtual register is an implicit_def or
53/// if LiveIntervals is not available.
54static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI,
55 const LiveIntervals *LIS) {
56 assert(Reg.isVirtual());
57 if (!LIS)
58 return nullptr;
59 auto &LI = LIS->getInterval(Reg);
61 return LI.getVNInfoBefore(SI);
62}
63
64static unsigned getVLOpNum(const MachineInstr &MI) {
65 return RISCVII::getVLOpNum(MI.getDesc());
66}
67
68static unsigned getSEWOpNum(const MachineInstr &MI) {
69 return RISCVII::getSEWOpNum(MI.getDesc());
70}
71
72static bool isVectorConfigInstr(const MachineInstr &MI) {
73 return MI.getOpcode() == RISCV::PseudoVSETVLI ||
74 MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
75 MI.getOpcode() == RISCV::PseudoVSETIVLI;
76}
77
78/// Return true if this is 'vsetvli x0, x0, vtype' which preserves
79/// VL and only sets VTYPE.
80static bool isVLPreservingConfig(const MachineInstr &MI) {
81 if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
82 return false;
83 assert(RISCV::X0 == MI.getOperand(1).getReg());
84 return RISCV::X0 == MI.getOperand(0).getReg();
85}
86
87static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) {
88 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
89 default:
90 return false;
91 case RISCV::VFMV_S_F:
92 case RISCV::VFMV_V_F:
93 return true;
94 }
95}
96
97static bool isScalarExtractInstr(const MachineInstr &MI) {
98 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
99 default:
100 return false;
101 case RISCV::VMV_X_S:
102 case RISCV::VFMV_F_S:
103 return true;
104 }
105}
106
107static bool isScalarInsertInstr(const MachineInstr &MI) {
108 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
109 default:
110 return false;
111 case RISCV::VMV_S_X:
112 case RISCV::VFMV_S_F:
113 return true;
114 }
115}
116
117static bool isScalarSplatInstr(const MachineInstr &MI) {
118 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
119 default:
120 return false;
121 case RISCV::VMV_V_I:
122 case RISCV::VMV_V_X:
123 case RISCV::VFMV_V_F:
124 return true;
125 }
126}
127
128static bool isVSlideInstr(const MachineInstr &MI) {
129 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
130 default:
131 return false;
132 case RISCV::VSLIDEDOWN_VX:
133 case RISCV::VSLIDEDOWN_VI:
134 case RISCV::VSLIDEUP_VX:
135 case RISCV::VSLIDEUP_VI:
136 return true;
137 }
138}
139
140/// Get the EEW for a load or store instruction. Return std::nullopt if MI is
141/// not a load or store which ignores SEW.
142static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
143 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
144 default:
145 return std::nullopt;
146 case RISCV::VLE8_V:
147 case RISCV::VLSE8_V:
148 case RISCV::VSE8_V:
149 case RISCV::VSSE8_V:
150 return 8;
151 case RISCV::VLE16_V:
152 case RISCV::VLSE16_V:
153 case RISCV::VSE16_V:
154 case RISCV::VSSE16_V:
155 return 16;
156 case RISCV::VLE32_V:
157 case RISCV::VLSE32_V:
158 case RISCV::VSE32_V:
159 case RISCV::VSSE32_V:
160 return 32;
161 case RISCV::VLE64_V:
162 case RISCV::VLSE64_V:
163 case RISCV::VSE64_V:
164 case RISCV::VSSE64_V:
165 return 64;
166 }
167}
168
169static bool isNonZeroLoadImmediate(const MachineInstr &MI) {
170 return MI.getOpcode() == RISCV::ADDI &&
171 MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
172 MI.getOperand(1).getReg() == RISCV::X0 &&
173 MI.getOperand(2).getImm() != 0;
174}
175
176/// Return true if this is an operation on mask registers. Note that
177/// this includes both arithmetic/logical ops and load/store (vlm/vsm).
178static bool isMaskRegOp(const MachineInstr &MI) {
179 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
180 return false;
181 const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
182 // A Log2SEW of 0 is an operation on mask registers only.
183 return Log2SEW == 0;
184}
185
186/// Return true if the inactive elements in the result are entirely undefined.
187/// Note that this is different from "agnostic" as defined by the vector
188/// specification. Agnostic requires each lane to either be undisturbed, or
189/// take the value -1; no other value is allowed.
190static bool hasUndefinedPassthru(const MachineInstr &MI) {
191
192 unsigned UseOpIdx;
193 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
194 // If there is no passthrough operand, then the pass through
195 // lanes are undefined.
196 return true;
197
198 // All undefined passthrus should be $noreg: see
199 // RISCVDAGToDAGISel::doPeepholeNoRegPassThru
200 const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
201 return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
202}
203
204/// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs.
205static bool isVectorCopy(const TargetRegisterInfo *TRI,
206 const MachineInstr &MI) {
207 return MI.isCopy() && MI.getOperand(0).getReg().isPhysical() &&
209 TRI->getMinimalPhysRegClass(MI.getOperand(0).getReg()));
210}
211
212/// Which subfields of VL or VTYPE have values we need to preserve?
213struct DemandedFields {
214 // Some unknown property of VL is used. If demanded, must preserve entire
215 // value.
216 bool VLAny = false;
217 // Only zero vs non-zero is used. If demanded, can change non-zero values.
218 bool VLZeroness = false;
219 // What properties of SEW we need to preserve.
220 enum : uint8_t {
221 SEWEqual = 3, // The exact value of SEW needs to be preserved.
222 SEWGreaterThanOrEqualAndLessThan64 =
223 2, // SEW can be changed as long as it's greater
224 // than or equal to the original value, but must be less
225 // than 64.
226 SEWGreaterThanOrEqual = 1, // SEW can be changed as long as it's greater
227 // than or equal to the original value.
228 SEWNone = 0 // We don't need to preserve SEW at all.
229 } SEW = SEWNone;
230 enum : uint8_t {
231 LMULEqual = 2, // The exact value of LMUL needs to be preserved.
232 LMULLessThanOrEqualToM1 = 1, // We can use any LMUL <= M1.
233 LMULNone = 0 // We don't need to preserve LMUL at all.
234 } LMUL = LMULNone;
235 bool SEWLMULRatio = false;
236 bool TailPolicy = false;
237 bool MaskPolicy = false;
238 // If this is true, we demand that VTYPE is set to some legal state, i.e. that
239 // vill is unset.
240 bool VILL = false;
241
242 // Return true if any part of VTYPE was used
243 bool usedVTYPE() const {
244 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL;
245 }
246
247 // Return true if any property of VL was used
248 bool usedVL() {
249 return VLAny || VLZeroness;
250 }
251
252 // Mark all VTYPE subfields and properties as demanded
253 void demandVTYPE() {
254 SEW = SEWEqual;
255 LMUL = LMULEqual;
256 SEWLMULRatio = true;
257 TailPolicy = true;
258 MaskPolicy = true;
259 VILL = true;
260 }
261
262 // Mark all VL properties as demanded
263 void demandVL() {
264 VLAny = true;
265 VLZeroness = true;
266 }
267
268 static DemandedFields all() {
269 DemandedFields DF;
270 DF.demandVTYPE();
271 DF.demandVL();
272 return DF;
273 }
274
275 // Make this the result of demanding both the fields in this and B.
276 void doUnion(const DemandedFields &B) {
277 VLAny |= B.VLAny;
278 VLZeroness |= B.VLZeroness;
279 SEW = std::max(SEW, B.SEW);
280 LMUL = std::max(LMUL, B.LMUL);
281 SEWLMULRatio |= B.SEWLMULRatio;
282 TailPolicy |= B.TailPolicy;
283 MaskPolicy |= B.MaskPolicy;
284 VILL |= B.VILL;
285 }
286
287#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
288 /// Support for debugging, callable in GDB: V->dump()
289 LLVM_DUMP_METHOD void dump() const {
290 print(dbgs());
291 dbgs() << "\n";
292 }
293
294 /// Implement operator<<.
295 void print(raw_ostream &OS) const {
296 OS << "{";
297 OS << "VLAny=" << VLAny << ", ";
298 OS << "VLZeroness=" << VLZeroness << ", ";
299 OS << "SEW=";
300 switch (SEW) {
301 case SEWEqual:
302 OS << "SEWEqual";
303 break;
304 case SEWGreaterThanOrEqual:
305 OS << "SEWGreaterThanOrEqual";
306 break;
307 case SEWGreaterThanOrEqualAndLessThan64:
308 OS << "SEWGreaterThanOrEqualAndLessThan64";
309 break;
310 case SEWNone:
311 OS << "SEWNone";
312 break;
313 };
314 OS << ", ";
315 OS << "LMUL=";
316 switch (LMUL) {
317 case LMULEqual:
318 OS << "LMULEqual";
319 break;
320 case LMULLessThanOrEqualToM1:
321 OS << "LMULLessThanOrEqualToM1";
322 break;
323 case LMULNone:
324 OS << "LMULNone";
325 break;
326 };
327 OS << ", ";
328 OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
329 OS << "TailPolicy=" << TailPolicy << ", ";
330 OS << "MaskPolicy=" << MaskPolicy << ", ";
331 OS << "VILL=" << VILL;
332 OS << "}";
333 }
334#endif
335};
336
337#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
339inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
340 DF.print(OS);
341 return OS;
342}
343#endif
344
345static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) {
346 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
347 return Fractional || LMul == 1;
348}
349
350/// Return true if moving from CurVType to NewVType is
351/// indistinguishable from the perspective of an instruction (or set
352/// of instructions) which use only the Used subfields and properties.
353static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
354 const DemandedFields &Used) {
355 switch (Used.SEW) {
356 case DemandedFields::SEWNone:
357 break;
358 case DemandedFields::SEWEqual:
359 if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
360 return false;
361 break;
362 case DemandedFields::SEWGreaterThanOrEqual:
363 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
364 return false;
365 break;
366 case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
367 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||
368 RISCVVType::getSEW(NewVType) >= 64)
369 return false;
370 break;
371 }
372
373 switch (Used.LMUL) {
374 case DemandedFields::LMULNone:
375 break;
376 case DemandedFields::LMULEqual:
377 if (RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
378 return false;
379 break;
380 case DemandedFields::LMULLessThanOrEqualToM1:
381 if (!isLMUL1OrSmaller(RISCVVType::getVLMUL(NewVType)))
382 return false;
383 break;
384 }
385
386 if (Used.SEWLMULRatio) {
387 auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
388 RISCVVType::getVLMUL(CurVType));
389 auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),
390 RISCVVType::getVLMUL(NewVType));
391 if (Ratio1 != Ratio2)
392 return false;
393 }
394
395 if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=
397 return false;
398 if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
400 return false;
401 return true;
402}
403
404/// Return the fields and properties demanded by the provided instruction.
405DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
406 // This function works in coalesceVSETVLI too. We can still use the value of a
407 // SEW, VL, or Policy operand even though it might not be the exact value in
408 // the VL or VTYPE, since we only care about what the instruction originally
409 // demanded.
410
411 // Most instructions don't use any of these subfeilds.
412 DemandedFields Res;
413 // Start conservative if registers are used
414 if (MI.isCall() || MI.isInlineAsm() ||
415 MI.readsRegister(RISCV::VL, /*TRI=*/nullptr))
416 Res.demandVL();
417 if (MI.isCall() || MI.isInlineAsm() ||
418 MI.readsRegister(RISCV::VTYPE, /*TRI=*/nullptr))
419 Res.demandVTYPE();
420 // Start conservative on the unlowered form too
421 uint64_t TSFlags = MI.getDesc().TSFlags;
422 if (RISCVII::hasSEWOp(TSFlags)) {
423 Res.demandVTYPE();
424 if (RISCVII::hasVLOp(TSFlags))
425 if (const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
426 !VLOp.isReg() || !VLOp.isUndef())
427 Res.demandVL();
428
429 // Behavior is independent of mask policy.
430 if (!RISCVII::usesMaskPolicy(TSFlags))
431 Res.MaskPolicy = false;
432 }
433
434 // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
435 // They instead demand the ratio of the two which is used in computing
436 // EMUL, but which allows us the flexibility to change SEW and LMUL
437 // provided we don't change the ratio.
438 // Note: We assume that the instructions initial SEW is the EEW encoded
439 // in the opcode. This is asserted when constructing the VSETVLIInfo.
440 if (getEEWForLoadStore(MI)) {
441 Res.SEW = DemandedFields::SEWNone;
442 Res.LMUL = DemandedFields::LMULNone;
443 }
444
445 // Store instructions don't use the policy fields.
446 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
447 Res.TailPolicy = false;
448 Res.MaskPolicy = false;
449 }
450
451 // If this is a mask reg operation, it only cares about VLMAX.
452 // TODO: Possible extensions to this logic
453 // * Probably ok if available VLMax is larger than demanded
454 // * The policy bits can probably be ignored..
455 if (isMaskRegOp(MI)) {
456 Res.SEW = DemandedFields::SEWNone;
457 Res.LMUL = DemandedFields::LMULNone;
458 }
459
460 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
461 if (isScalarInsertInstr(MI)) {
462 Res.LMUL = DemandedFields::LMULNone;
463 Res.SEWLMULRatio = false;
464 Res.VLAny = false;
465 // For vmv.s.x and vfmv.s.f, if the passthru is *undefined*, we don't
466 // need to preserve any other bits and are thus compatible with any larger,
467 // etype and can disregard policy bits. Warning: It's tempting to try doing
468 // this for any tail agnostic operation, but we can't as TA requires
469 // tail lanes to either be the original value or -1. We are writing
470 // unknown bits to the lanes here.
471 if (hasUndefinedPassthru(MI)) {
472 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
473 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
474 else
475 Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
476 Res.TailPolicy = false;
477 }
478 }
479
480 // vmv.x.s, and vfmv.f.s are unconditional and ignore everything except SEW.
481 if (isScalarExtractInstr(MI)) {
482 assert(!RISCVII::hasVLOp(TSFlags));
483 Res.LMUL = DemandedFields::LMULNone;
484 Res.SEWLMULRatio = false;
485 Res.TailPolicy = false;
486 Res.MaskPolicy = false;
487 }
488
489 if (RISCVII::hasVLOp(MI.getDesc().TSFlags)) {
490 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
491 // A slidedown/slideup with an *undefined* passthru can freely clobber
492 // elements not copied from the source vector (e.g. masked off, tail, or
493 // slideup's prefix). Notes:
494 // * We can't modify SEW here since the slide amount is in units of SEW.
495 // * VL=1 is special only because we have existing support for zero vs
496 // non-zero VL. We could generalize this if we had a VL > C predicate.
497 // * The LMUL1 restriction is for machines whose latency may depend on VL.
498 // * As above, this is only legal for tail "undefined" not "agnostic".
499 if (isVSlideInstr(MI) && VLOp.isImm() && VLOp.getImm() == 1 &&
500 hasUndefinedPassthru(MI)) {
501 Res.VLAny = false;
502 Res.VLZeroness = true;
503 Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
504 Res.TailPolicy = false;
505 }
506
507 // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the
508 // same semantically as vmv.s.x. This is particularly useful since we don't
509 // have an immediate form of vmv.s.x, and thus frequently use vmv.v.i in
510 // it's place. Since a splat is non-constant time in LMUL, we do need to be
511 // careful to not increase the number of active vector registers (unlike for
512 // vmv.s.x.)
513 if (isScalarSplatInstr(MI) && VLOp.isImm() && VLOp.getImm() == 1 &&
514 hasUndefinedPassthru(MI)) {
515 Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
516 Res.SEWLMULRatio = false;
517 Res.VLAny = false;
518 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
519 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
520 else
521 Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
522 Res.TailPolicy = false;
523 }
524 }
525
526 // In §32.16.6, whole vector register moves have a dependency on SEW. At the
527 // MIR level though we don't encode the element type, and it gives the same
528 // result whatever the SEW may be.
529 //
530 // However it does need valid SEW, i.e. vill must be cleared. The entry to a
531 // function, calls and inline assembly may all set it, so make sure we clear
532 // it for whole register copies. Do this by leaving VILL demanded.
533 if (isVectorCopy(ST->getRegisterInfo(), MI)) {
534 Res.LMUL = DemandedFields::LMULNone;
535 Res.SEW = DemandedFields::SEWNone;
536 Res.SEWLMULRatio = false;
537 Res.TailPolicy = false;
538 Res.MaskPolicy = false;
539 }
540
541 return Res;
542}
543
544/// Defines the abstract state with which the forward dataflow models the
545/// values of the VL and VTYPE registers after insertion.
546class VSETVLIInfo {
547 struct AVLDef {
548 // Every AVLDef should have a VNInfo, unless we're running without
549 // LiveIntervals in which case this will be nullptr.
550 const VNInfo *ValNo;
551 Register DefReg;
552 };
553 union {
554 AVLDef AVLRegDef;
555 unsigned AVLImm;
556 };
557
558 enum : uint8_t {
560 AVLIsReg,
561 AVLIsImm,
562 AVLIsVLMAX,
563 Unknown, // AVL and VTYPE are fully unknown
564 } State = Uninitialized;
565
566 // Fields from VTYPE.
568 uint8_t SEW = 0;
569 uint8_t TailAgnostic : 1;
570 uint8_t MaskAgnostic : 1;
571 uint8_t SEWLMULRatioOnly : 1;
572
573public:
574 VSETVLIInfo()
575 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
576 SEWLMULRatioOnly(false) {}
577
578 static VSETVLIInfo getUnknown() {
579 VSETVLIInfo Info;
580 Info.setUnknown();
581 return Info;
582 }
583
584 bool isValid() const { return State != Uninitialized; }
585 void setUnknown() { State = Unknown; }
586 bool isUnknown() const { return State == Unknown; }
587
588 void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) {
589 assert(AVLReg.isVirtual());
590 AVLRegDef.ValNo = VNInfo;
591 AVLRegDef.DefReg = AVLReg;
592 State = AVLIsReg;
593 }
594
595 void setAVLImm(unsigned Imm) {
596 AVLImm = Imm;
597 State = AVLIsImm;
598 }
599
600 void setAVLVLMAX() { State = AVLIsVLMAX; }
601
602 bool hasAVLImm() const { return State == AVLIsImm; }
603 bool hasAVLReg() const { return State == AVLIsReg; }
604 bool hasAVLVLMAX() const { return State == AVLIsVLMAX; }
605 Register getAVLReg() const {
606 assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual());
607 return AVLRegDef.DefReg;
608 }
609 unsigned getAVLImm() const {
610 assert(hasAVLImm());
611 return AVLImm;
612 }
613 const VNInfo *getAVLVNInfo() const {
614 assert(hasAVLReg());
615 return AVLRegDef.ValNo;
616 }
617 // Most AVLIsReg infos will have a single defining MachineInstr, unless it was
618 // a PHI node. In that case getAVLVNInfo()->def will point to the block
619 // boundary slot and this will return nullptr. If LiveIntervals isn't
620 // available, nullptr is also returned.
621 const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const {
622 assert(hasAVLReg());
623 if (!LIS || getAVLVNInfo()->isPHIDef())
624 return nullptr;
625 auto *MI = LIS->getInstructionFromIndex(getAVLVNInfo()->def);
626 assert(MI);
627 return MI;
628 }
629
630 void setAVL(VSETVLIInfo Info) {
631 assert(Info.isValid());
632 if (Info.isUnknown())
633 setUnknown();
634 else if (Info.hasAVLReg())
635 setAVLRegDef(Info.getAVLVNInfo(), Info.getAVLReg());
636 else if (Info.hasAVLVLMAX())
637 setAVLVLMAX();
638 else {
639 assert(Info.hasAVLImm());
640 setAVLImm(Info.getAVLImm());
641 }
642 }
643
644 unsigned getSEW() const { return SEW; }
645 RISCVII::VLMUL getVLMUL() const { return VLMul; }
646 bool getTailAgnostic() const { return TailAgnostic; }
647 bool getMaskAgnostic() const { return MaskAgnostic; }
648
649 bool hasNonZeroAVL(const LiveIntervals *LIS) const {
650 if (hasAVLImm())
651 return getAVLImm() > 0;
652 if (hasAVLReg()) {
653 if (auto *DefMI = getAVLDefMI(LIS))
654 return isNonZeroLoadImmediate(*DefMI);
655 }
656 if (hasAVLVLMAX())
657 return true;
658 return false;
659 }
660
661 bool hasEquallyZeroAVL(const VSETVLIInfo &Other,
662 const LiveIntervals *LIS) const {
663 if (hasSameAVL(Other))
664 return true;
665 return (hasNonZeroAVL(LIS) && Other.hasNonZeroAVL(LIS));
666 }
667
668 bool hasSameAVLLatticeValue(const VSETVLIInfo &Other) const {
669 if (hasAVLReg() && Other.hasAVLReg()) {
670 assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
671 "we either have intervals or we don't");
672 if (!getAVLVNInfo())
673 return getAVLReg() == Other.getAVLReg();
674 return getAVLVNInfo()->id == Other.getAVLVNInfo()->id &&
675 getAVLReg() == Other.getAVLReg();
676 }
677
678 if (hasAVLImm() && Other.hasAVLImm())
679 return getAVLImm() == Other.getAVLImm();
680
681 if (hasAVLVLMAX())
682 return Other.hasAVLVLMAX() && hasSameVLMAX(Other);
683
684 return false;
685 }
686
687 // Return true if the two lattice values are guaranteed to have
688 // the same AVL value at runtime.
689 bool hasSameAVL(const VSETVLIInfo &Other) const {
690 // Without LiveIntervals, we don't know which instruction defines a
691 // register. Since a register may be redefined, this means all AVLIsReg
692 // states must be treated as possibly distinct.
693 if (hasAVLReg() && Other.hasAVLReg()) {
694 assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
695 "we either have intervals or we don't");
696 if (!getAVLVNInfo())
697 return false;
698 }
699 return hasSameAVLLatticeValue(Other);
700 }
701
702 void setVTYPE(unsigned VType) {
703 assert(isValid() && !isUnknown() &&
704 "Can't set VTYPE for uninitialized or unknown");
705 VLMul = RISCVVType::getVLMUL(VType);
706 SEW = RISCVVType::getSEW(VType);
707 TailAgnostic = RISCVVType::isTailAgnostic(VType);
708 MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
709 }
710 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
711 assert(isValid() && !isUnknown() &&
712 "Can't set VTYPE for uninitialized or unknown");
713 VLMul = L;
714 SEW = S;
715 TailAgnostic = TA;
716 MaskAgnostic = MA;
717 }
718
719 void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; }
720
721 unsigned encodeVTYPE() const {
722 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
723 "Can't encode VTYPE for uninitialized or unknown");
724 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
725 }
726
727 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
728
729 bool hasSameVTYPE(const VSETVLIInfo &Other) const {
730 assert(isValid() && Other.isValid() &&
731 "Can't compare invalid VSETVLIInfos");
732 assert(!isUnknown() && !Other.isUnknown() &&
733 "Can't compare VTYPE in unknown state");
734 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
735 "Can't compare when only LMUL/SEW ratio is valid.");
736 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
737 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
738 Other.MaskAgnostic);
739 }
740
741 unsigned getSEWLMULRatio() const {
742 assert(isValid() && !isUnknown() &&
743 "Can't use VTYPE for uninitialized or unknown");
744 return RISCVVType::getSEWLMULRatio(SEW, VLMul);
745 }
746
747 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
748 // Note that having the same VLMAX ensures that both share the same
749 // function from AVL to VL; that is, they must produce the same VL value
750 // for any given AVL value.
751 bool hasSameVLMAX(const VSETVLIInfo &Other) const {
752 assert(isValid() && Other.isValid() &&
753 "Can't compare invalid VSETVLIInfos");
754 assert(!isUnknown() && !Other.isUnknown() &&
755 "Can't compare VTYPE in unknown state");
756 return getSEWLMULRatio() == Other.getSEWLMULRatio();
757 }
758
759 bool hasCompatibleVTYPE(const DemandedFields &Used,
760 const VSETVLIInfo &Require) const {
761 return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used);
762 }
763
764 // Determine whether the vector instructions requirements represented by
765 // Require are compatible with the previous vsetvli instruction represented
766 // by this. MI is the instruction whose requirements we're considering.
767 bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
768 const LiveIntervals *LIS) const {
769 assert(isValid() && Require.isValid() &&
770 "Can't compare invalid VSETVLIInfos");
771 // Nothing is compatible with Unknown.
772 if (isUnknown() || Require.isUnknown())
773 return false;
774
775 // If only our VLMAX ratio is valid, then this isn't compatible.
776 if (SEWLMULRatioOnly || Require.SEWLMULRatioOnly)
777 return false;
778
779 if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require)))
780 return false;
781
782 if (Used.VLZeroness && !hasEquallyZeroAVL(Require, LIS))
783 return false;
784
785 return hasCompatibleVTYPE(Used, Require);
786 }
787
788 bool operator==(const VSETVLIInfo &Other) const {
789 // Uninitialized is only equal to another Uninitialized.
790 if (!isValid())
791 return !Other.isValid();
792 if (!Other.isValid())
793 return !isValid();
794
795 // Unknown is only equal to another Unknown.
796 if (isUnknown())
797 return Other.isUnknown();
798 if (Other.isUnknown())
799 return isUnknown();
800
801 if (!hasSameAVLLatticeValue(Other))
802 return false;
803
804 // If the SEWLMULRatioOnly bits are different, then they aren't equal.
805 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
806 return false;
807
808 // If only the VLMAX is valid, check that it is the same.
809 if (SEWLMULRatioOnly)
810 return hasSameVLMAX(Other);
811
812 // If the full VTYPE is valid, check that it is the same.
813 return hasSameVTYPE(Other);
814 }
815
816 bool operator!=(const VSETVLIInfo &Other) const {
817 return !(*this == Other);
818 }
819
820 // Calculate the VSETVLIInfo visible to a block assuming this and Other are
821 // both predecessors.
822 VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
823 // If the new value isn't valid, ignore it.
824 if (!Other.isValid())
825 return *this;
826
827 // If this value isn't valid, this must be the first predecessor, use it.
828 if (!isValid())
829 return Other;
830
831 // If either is unknown, the result is unknown.
832 if (isUnknown() || Other.isUnknown())
833 return VSETVLIInfo::getUnknown();
834
835 // If we have an exact, match return this.
836 if (*this == Other)
837 return *this;
838
839 // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
840 // return an SEW/LMUL ratio only value.
841 if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
842 VSETVLIInfo MergeInfo = *this;
843 MergeInfo.SEWLMULRatioOnly = true;
844 return MergeInfo;
845 }
846
847 // Otherwise the result is unknown.
848 return VSETVLIInfo::getUnknown();
849 }
850
851#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
852 /// Support for debugging, callable in GDB: V->dump()
853 LLVM_DUMP_METHOD void dump() const {
854 print(dbgs());
855 dbgs() << "\n";
856 }
857
858 /// Implement operator<<.
859 /// @{
860 void print(raw_ostream &OS) const {
861 OS << "{";
862 if (!isValid())
863 OS << "Uninitialized";
864 if (isUnknown())
865 OS << "unknown";
866 if (hasAVLReg())
867 OS << "AVLReg=" << llvm::printReg(getAVLReg());
868 if (hasAVLImm())
869 OS << "AVLImm=" << (unsigned)AVLImm;
870 if (hasAVLVLMAX())
871 OS << "AVLVLMAX";
872 OS << ", "
873 << "VLMul=" << (unsigned)VLMul << ", "
874 << "SEW=" << (unsigned)SEW << ", "
875 << "TailAgnostic=" << (bool)TailAgnostic << ", "
876 << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
877 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
878 }
879#endif
880};
881
882#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
884inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
885 V.print(OS);
886 return OS;
887}
888#endif
889
890struct BlockData {
891 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
892 // block. Calculated in Phase 2.
893 VSETVLIInfo Exit;
894
895 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
896 // blocks. Calculated in Phase 2, and used by Phase 3.
897 VSETVLIInfo Pred;
898
899 // Keeps track of whether the block is already in the queue.
900 bool InQueue = false;
901
902 BlockData() = default;
903};
904
905class RISCVInsertVSETVLI : public MachineFunctionPass {
906 const RISCVSubtarget *ST;
907 const TargetInstrInfo *TII;
909 // Possibly null!
910 LiveIntervals *LIS;
911
912 std::vector<BlockData> BlockInfo;
913 std::queue<const MachineBasicBlock *> WorkList;
914
915public:
916 static char ID;
917
918 RISCVInsertVSETVLI() : MachineFunctionPass(ID) {}
919 bool runOnMachineFunction(MachineFunction &MF) override;
920
921 void getAnalysisUsage(AnalysisUsage &AU) const override {
922 AU.setPreservesCFG();
923
929
931 }
932
933 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
934
935private:
936 bool needVSETVLI(const DemandedFields &Used, const VSETVLIInfo &Require,
937 const VSETVLIInfo &CurInfo) const;
938 bool needVSETVLIPHI(const VSETVLIInfo &Require,
939 const MachineBasicBlock &MBB) const;
940 void insertVSETVLI(MachineBasicBlock &MBB,
942 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
943
944 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
945 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
946 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
947 VSETVLIInfo &Info) const;
948 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
949 void emitVSETVLIs(MachineBasicBlock &MBB);
950 void doPRE(MachineBasicBlock &MBB);
951 void insertReadVL(MachineBasicBlock &MBB);
952
953 bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI,
954 const DemandedFields &Used) const;
955 void coalesceVSETVLIs(MachineBasicBlock &MBB) const;
956
957 VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const;
958 VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const;
959 void forwardVSETVLIAVL(VSETVLIInfo &Info) const;
960};
961
962} // end anonymous namespace
963
964char RISCVInsertVSETVLI::ID = 0;
965char &llvm::RISCVInsertVSETVLIID = RISCVInsertVSETVLI::ID;
966
968 false, false)
969
970// If the AVL is defined by a vsetvli's output vl with the same VLMAX, we can
971// replace the AVL operand with the AVL of the defining vsetvli. E.g.
972//
973// %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
974// $x0 = PseudoVSETVLI %vl:gpr, SEW=32, LMUL=M1
975// ->
976// %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
977// $x0 = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
978void RISCVInsertVSETVLI::forwardVSETVLIAVL(VSETVLIInfo &Info) const {
979 if (!Info.hasAVLReg())
980 return;
981 const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
982 if (!DefMI || !isVectorConfigInstr(*DefMI))
983 return;
984 VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI);
985 if (!DefInstrInfo.hasSameVLMAX(Info))
986 return;
987 Info.setAVL(DefInstrInfo);
988}
989
990// Return a VSETVLIInfo representing the changes made by this VSETVLI or
991// VSETIVLI instruction.
992VSETVLIInfo
993RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const {
994 VSETVLIInfo NewInfo;
995 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
996 NewInfo.setAVLImm(MI.getOperand(1).getImm());
997 } else {
998 assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
999 MI.getOpcode() == RISCV::PseudoVSETVLIX0);
1000 Register AVLReg = MI.getOperand(1).getReg();
1001 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
1002 "Can't handle X0, X0 vsetvli yet");
1003 if (AVLReg == RISCV::X0)
1004 NewInfo.setAVLVLMAX();
1005 else if (MI.getOperand(1).isUndef())
1006 // Otherwise use an AVL of 1 to avoid depending on previous vl.
1007 NewInfo.setAVLImm(1);
1008 else {
1009 VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS);
1010 NewInfo.setAVLRegDef(VNI, AVLReg);
1011 }
1012 }
1013 NewInfo.setVTYPE(MI.getOperand(2).getImm());
1014
1015 forwardVSETVLIAVL(NewInfo);
1016
1017 return NewInfo;
1018}
1019
1020static unsigned computeVLMAX(unsigned VLEN, unsigned SEW,
1021 RISCVII::VLMUL VLMul) {
1022 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul);
1023 if (Fractional)
1024 VLEN = VLEN / LMul;
1025 else
1026 VLEN = VLEN * LMul;
1027 return VLEN/SEW;
1028}
1029
1030VSETVLIInfo
1031RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
1032 VSETVLIInfo InstrInfo;
1033 const uint64_t TSFlags = MI.getDesc().TSFlags;
1034
1035 bool TailAgnostic = true;
1036 bool MaskAgnostic = true;
1037 if (!hasUndefinedPassthru(MI)) {
1038 // Start with undisturbed.
1039 TailAgnostic = false;
1040 MaskAgnostic = false;
1041
1042 // If there is a policy operand, use it.
1043 if (RISCVII::hasVecPolicyOp(TSFlags)) {
1044 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
1045 uint64_t Policy = Op.getImm();
1047 "Invalid Policy Value");
1048 TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
1049 MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
1050 }
1051
1052 // Some pseudo instructions force a tail agnostic policy despite having a
1053 // tied def.
1054 if (RISCVII::doesForceTailAgnostic(TSFlags))
1055 TailAgnostic = true;
1056
1057 if (!RISCVII::usesMaskPolicy(TSFlags))
1058 MaskAgnostic = true;
1059 }
1060
1061 RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
1062
1063 unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
1064 // A Log2SEW of 0 is an operation on mask registers only.
1065 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
1066 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
1067
1068 if (RISCVII::hasVLOp(TSFlags)) {
1069 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1070 if (VLOp.isImm()) {
1071 int64_t Imm = VLOp.getImm();
1072 // Conver the VLMax sentintel to X0 register.
1073 if (Imm == RISCV::VLMaxSentinel) {
1074 // If we know the exact VLEN, see if we can use the constant encoding
1075 // for the VLMAX instead. This reduces register pressure slightly.
1076 const unsigned VLMAX = computeVLMAX(ST->getRealMaxVLen(), SEW, VLMul);
1077 if (ST->getRealMinVLen() == ST->getRealMaxVLen() && VLMAX <= 31)
1078 InstrInfo.setAVLImm(VLMAX);
1079 else
1080 InstrInfo.setAVLVLMAX();
1081 }
1082 else
1083 InstrInfo.setAVLImm(Imm);
1084 } else if (VLOp.isUndef()) {
1085 // Otherwise use an AVL of 1 to avoid depending on previous vl.
1086 InstrInfo.setAVLImm(1);
1087 } else {
1088 VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS);
1089 InstrInfo.setAVLRegDef(VNI, VLOp.getReg());
1090 }
1091 } else {
1092 assert(isScalarExtractInstr(MI));
1093 // Pick a random value for state tracking purposes, will be ignored via
1094 // the demanded fields mechanism
1095 InstrInfo.setAVLImm(1);
1096 }
1097#ifndef NDEBUG
1098 if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
1099 assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
1100 }
1101#endif
1102 InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
1103
1104 forwardVSETVLIAVL(InstrInfo);
1105
1106 return InstrInfo;
1107}
1108
1109void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
1111 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
1112
1113 ++NumInsertedVSETVL;
1114 if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
1115 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
1116 // VLMAX.
1117 if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
1118 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1120 .addReg(RISCV::X0, RegState::Kill)
1121 .addImm(Info.encodeVTYPE())
1122 .addReg(RISCV::VL, RegState::Implicit);
1123 if (LIS)
1125 return;
1126 }
1127
1128 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
1129 // it has the same VLMAX we want and the last VL/VTYPE we observed is the
1130 // same, we can use the X0, X0 form.
1131 if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) {
1132 if (const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
1133 DefMI && isVectorConfigInstr(*DefMI)) {
1134 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1135 if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
1136 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1138 .addReg(RISCV::X0, RegState::Kill)
1139 .addImm(Info.encodeVTYPE())
1140 .addReg(RISCV::VL, RegState::Implicit);
1141 if (LIS)
1143 return;
1144 }
1145 }
1146 }
1147 }
1148
1149 if (Info.hasAVLImm()) {
1150 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
1152 .addImm(Info.getAVLImm())
1153 .addImm(Info.encodeVTYPE());
1154 if (LIS)
1156 return;
1157 }
1158
1159 if (Info.hasAVLVLMAX()) {
1160 Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
1161 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1163 .addReg(RISCV::X0, RegState::Kill)
1164 .addImm(Info.encodeVTYPE());
1165 if (LIS) {
1167 LIS->createAndComputeVirtRegInterval(DestReg);
1168 }
1169 return;
1170 }
1171
1172 Register AVLReg = Info.getAVLReg();
1173 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
1174 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI))
1176 .addReg(AVLReg)
1177 .addImm(Info.encodeVTYPE());
1178 if (LIS) {
1180 LiveInterval &LI = LIS->getInterval(AVLReg);
1182 // If the AVL value isn't live at MI, do a quick check to see if it's easily
1183 // extendable. Otherwise, we need to copy it.
1184 if (LI.getVNInfoBefore(SI) != Info.getAVLVNInfo()) {
1185 if (!LI.liveAt(SI) && LI.containsOneValue())
1186 LIS->extendToIndices(LI, SI);
1187 else {
1188 Register AVLCopyReg =
1189 MRI->createVirtualRegister(&RISCV::GPRNoX0RegClass);
1191 if (Info.getAVLVNInfo()->isPHIDef())
1192 II = LIS->getMBBFromIndex(Info.getAVLVNInfo()->def)->getFirstNonPHI();
1193 else {
1194 II = LIS->getInstructionFromIndex(Info.getAVLVNInfo()->def);
1195 II = std::next(II);
1196 }
1197 assert(II.isValid());
1198 auto AVLCopy =
1199 BuildMI(*II->getParent(), II, DL, TII->get(RISCV::COPY), AVLCopyReg)
1200 .addReg(AVLReg);
1201 LIS->InsertMachineInstrInMaps(*AVLCopy);
1202 MI->getOperand(1).setReg(AVLCopyReg);
1203 LIS->createAndComputeVirtRegInterval(AVLCopyReg);
1204 }
1205 }
1206 }
1207}
1208
1209/// Return true if a VSETVLI is required to transition from CurInfo to Require
1210/// given a set of DemandedFields \p Used.
1211bool RISCVInsertVSETVLI::needVSETVLI(const DemandedFields &Used,
1212 const VSETVLIInfo &Require,
1213 const VSETVLIInfo &CurInfo) const {
1214 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
1215 return true;
1216
1217 if (CurInfo.isCompatible(Used, Require, LIS))
1218 return false;
1219
1220 return true;
1221}
1222
1223// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1224// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1225// places.
1226static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
1227 DemandedFields &Demanded) {
1228 VSETVLIInfo Info = NewInfo;
1229
1230 if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
1231 !PrevInfo.isUnknown()) {
1232 if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
1233 PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
1234 Info.setVLMul(*NewVLMul);
1235 Demanded.LMUL = DemandedFields::LMULEqual;
1236 }
1237
1238 return Info;
1239}
1240
1241// Given an incoming state reaching MI, minimally modifies that state so that it
1242// is compatible with MI. The resulting state is guaranteed to be semantically
1243// legal for MI, but may not be the state requested by MI.
1244void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
1245 const MachineInstr &MI) const {
1246 if (isVectorCopy(ST->getRegisterInfo(), MI) &&
1247 (Info.isUnknown() || !Info.isValid() || Info.hasSEWLMULRatioOnly())) {
1248 // Use an arbitrary but valid AVL and VTYPE so vill will be cleared. It may
1249 // be coalesced into another vsetvli since we won't demand any fields.
1250 VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly
1251 NewInfo.setAVLImm(1);
1252 NewInfo.setVTYPE(RISCVII::VLMUL::LMUL_1, /*sew*/ 8, /*ta*/ true,
1253 /*ma*/ true);
1254 Info = NewInfo;
1255 return;
1256 }
1257
1258 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1259 return;
1260
1261 DemandedFields Demanded = getDemanded(MI, ST);
1262
1263 const VSETVLIInfo NewInfo = computeInfoForInstr(MI);
1264 assert(NewInfo.isValid() && !NewInfo.isUnknown());
1265 if (Info.isValid() && !needVSETVLI(Demanded, NewInfo, Info))
1266 return;
1267
1268 const VSETVLIInfo PrevInfo = Info;
1269 if (!Info.isValid() || Info.isUnknown())
1270 Info = NewInfo;
1271
1272 const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
1273
1274 // If MI only demands that VL has the same zeroness, we only need to set the
1275 // AVL if the zeroness differs. This removes a vsetvli entirely if the types
1276 // match or allows use of cheaper avl preserving variant if VLMAX doesn't
1277 // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
1278 // variant, so we avoid the transform to prevent extending live range of an
1279 // avl register operand.
1280 // TODO: We can probably relax this for immediates.
1281 bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, LIS) &&
1282 IncomingInfo.hasSameVLMAX(PrevInfo);
1283 if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
1284 Info.setAVL(IncomingInfo);
1285
1286 Info.setVTYPE(
1287 ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
1288 .getVLMUL(),
1289 ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
1290 // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
1291 // if needed.
1292 (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
1293 IncomingInfo.getTailAgnostic(),
1294 (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
1295 IncomingInfo.getMaskAgnostic());
1296
1297 // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
1298 // the AVL.
1299 if (Info.hasSEWLMULRatioOnly()) {
1300 VSETVLIInfo RatiolessInfo = IncomingInfo;
1301 RatiolessInfo.setAVL(Info);
1302 Info = RatiolessInfo;
1303 }
1304}
1305
1306// Given a state with which we evaluated MI (see transferBefore above for why
1307// this might be different that the state MI requested), modify the state to
1308// reflect the changes MI might make.
1309void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
1310 const MachineInstr &MI) const {
1311 if (isVectorConfigInstr(MI)) {
1312 Info = getInfoForVSETVLI(MI);
1313 return;
1314 }
1315
1317 // Update AVL to vl-output of the fault first load.
1318 assert(MI.getOperand(1).getReg().isVirtual());
1319 if (LIS) {
1320 auto &LI = LIS->getInterval(MI.getOperand(1).getReg());
1321 SlotIndex SI =
1323 VNInfo *VNI = LI.getVNInfoAt(SI);
1324 Info.setAVLRegDef(VNI, MI.getOperand(1).getReg());
1325 } else
1326 Info.setAVLRegDef(nullptr, MI.getOperand(1).getReg());
1327 return;
1328 }
1329
1330 // If this is something that updates VL/VTYPE that we don't know about, set
1331 // the state to unknown.
1332 if (MI.isCall() || MI.isInlineAsm() ||
1333 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1334 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1335 Info = VSETVLIInfo::getUnknown();
1336}
1337
1338bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
1339 VSETVLIInfo &Info) const {
1340 bool HadVectorOp = false;
1341
1342 Info = BlockInfo[MBB.getNumber()].Pred;
1343 for (const MachineInstr &MI : MBB) {
1344 transferBefore(Info, MI);
1345
1346 if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags) ||
1347 isVectorCopy(ST->getRegisterInfo(), MI))
1348 HadVectorOp = true;
1349
1350 transferAfter(Info, MI);
1351 }
1352
1353 return HadVectorOp;
1354}
1355
1356void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1357
1358 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1359
1360 BBInfo.InQueue = false;
1361
1362 // Start with the previous entry so that we keep the most conservative state
1363 // we have ever found.
1364 VSETVLIInfo InInfo = BBInfo.Pred;
1365 if (MBB.pred_empty()) {
1366 // There are no predecessors, so use the default starting status.
1367 InInfo.setUnknown();
1368 } else {
1370 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
1371 }
1372
1373 // If we don't have any valid predecessor value, wait until we do.
1374 if (!InInfo.isValid())
1375 return;
1376
1377 // If no change, no need to rerun block
1378 if (InInfo == BBInfo.Pred)
1379 return;
1380
1381 BBInfo.Pred = InInfo;
1382 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1383 << " changed to " << BBInfo.Pred << "\n");
1384
1385 // Note: It's tempting to cache the state changes here, but due to the
1386 // compatibility checks performed a blocks output state can change based on
1387 // the input state. To cache, we'd have to add logic for finding
1388 // never-compatible state changes.
1389 VSETVLIInfo TmpStatus;
1390 computeVLVTYPEChanges(MBB, TmpStatus);
1391
1392 // If the new exit value matches the old exit value, we don't need to revisit
1393 // any blocks.
1394 if (BBInfo.Exit == TmpStatus)
1395 return;
1396
1397 BBInfo.Exit = TmpStatus;
1398 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1399 << " changed to " << BBInfo.Exit << "\n");
1400
1401 // Add the successors to the work list so we can propagate the changed exit
1402 // status.
1403 for (MachineBasicBlock *S : MBB.successors())
1404 if (!BlockInfo[S->getNumber()].InQueue) {
1405 BlockInfo[S->getNumber()].InQueue = true;
1406 WorkList.push(S);
1407 }
1408}
1409
1410// If we weren't able to prove a vsetvli was directly unneeded, it might still
1411// be unneeded if the AVL was a phi node where all incoming values are VL
1412// outputs from the last VSETVLI in their respective basic blocks.
1413bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1414 const MachineBasicBlock &MBB) const {
1415 if (!Require.hasAVLReg())
1416 return true;
1417
1418 if (!LIS)
1419 return true;
1420
1421 // We need the AVL to have been produced by a PHI node in this basic block.
1422 const VNInfo *Valno = Require.getAVLVNInfo();
1423 if (!Valno->isPHIDef() || LIS->getMBBFromIndex(Valno->def) != &MBB)
1424 return true;
1425
1426 const LiveRange &LR = LIS->getInterval(Require.getAVLReg());
1427
1428 for (auto *PBB : MBB.predecessors()) {
1429 const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit;
1430
1431 // We need the PHI input to the be the output of a VSET(I)VLI.
1432 const VNInfo *Value = LR.getVNInfoBefore(LIS->getMBBEndIdx(PBB));
1433 if (!Value)
1434 return true;
1436 if (!DefMI || !isVectorConfigInstr(*DefMI))
1437 return true;
1438
1439 // We found a VSET(I)VLI make sure it matches the output of the
1440 // predecessor block.
1441 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1442 if (DefInfo != PBBExit)
1443 return true;
1444
1445 // Require has the same VL as PBBExit, so if the exit from the
1446 // predecessor has the VTYPE we are looking for we might be able
1447 // to avoid a VSETVLI.
1448 if (PBBExit.isUnknown() || !PBBExit.hasSameVTYPE(Require))
1449 return true;
1450 }
1451
1452 // If all the incoming values to the PHI checked out, we don't need
1453 // to insert a VSETVLI.
1454 return false;
1455}
1456
1457void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1458 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1459 // Track whether the prefix of the block we've scanned is transparent
1460 // (meaning has not yet changed the abstract state).
1461 bool PrefixTransparent = true;
1462 for (MachineInstr &MI : MBB) {
1463 const VSETVLIInfo PrevInfo = CurInfo;
1464 transferBefore(CurInfo, MI);
1465
1466 // If this is an explicit VSETVLI or VSETIVLI, update our state.
1467 if (isVectorConfigInstr(MI)) {
1468 // Conservatively, mark the VL and VTYPE as live.
1469 assert(MI.getOperand(3).getReg() == RISCV::VL &&
1470 MI.getOperand(4).getReg() == RISCV::VTYPE &&
1471 "Unexpected operands where VL and VTYPE should be");
1472 MI.getOperand(3).setIsDead(false);
1473 MI.getOperand(4).setIsDead(false);
1474 PrefixTransparent = false;
1475 }
1476
1478 isVectorCopy(ST->getRegisterInfo(), MI)) {
1479 if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) {
1480 insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo);
1481 PrefixTransparent = false;
1482 }
1483 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1484 /*isImp*/ true));
1485 }
1486
1487 uint64_t TSFlags = MI.getDesc().TSFlags;
1488 if (RISCVII::hasSEWOp(TSFlags)) {
1489 if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) {
1490 // If this is the first implicit state change, and the state change
1491 // requested can be proven to produce the same register contents, we
1492 // can skip emitting the actual state change and continue as if we
1493 // had since we know the GPR result of the implicit state change
1494 // wouldn't be used and VL/VTYPE registers are correct. Note that
1495 // we *do* need to model the state as if it changed as while the
1496 // register contents are unchanged, the abstract model can change.
1497 if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
1498 insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo);
1499 PrefixTransparent = false;
1500 }
1501
1502 if (RISCVII::hasVLOp(TSFlags)) {
1503 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1504 if (VLOp.isReg()) {
1505 Register Reg = VLOp.getReg();
1506
1507 // Erase the AVL operand from the instruction.
1508 VLOp.setReg(RISCV::NoRegister);
1509 VLOp.setIsKill(false);
1510 if (LIS) {
1511 LiveInterval &LI = LIS->getInterval(Reg);
1513 LIS->shrinkToUses(&LI, &DeadMIs);
1514 // We might have separate components that need split due to
1515 // needVSETVLIPHI causing us to skip inserting a new VL def.
1517 LIS->splitSeparateComponents(LI, SplitLIs);
1518
1519 // If the AVL was an immediate > 31, then it would have been emitted
1520 // as an ADDI. However, the ADDI might not have been used in the
1521 // vsetvli, or a vsetvli might not have been emitted, so it may be
1522 // dead now.
1523 for (MachineInstr *DeadMI : DeadMIs) {
1524 if (!TII->isAddImmediate(*DeadMI, Reg))
1525 continue;
1526 LIS->RemoveMachineInstrFromMaps(*DeadMI);
1527 DeadMI->eraseFromParent();
1528 }
1529 }
1530 }
1531 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1532 /*isImp*/ true));
1533 }
1534 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1535 /*isImp*/ true));
1536 }
1537
1538 if (MI.isCall() || MI.isInlineAsm() ||
1539 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1540 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1541 PrefixTransparent = false;
1542
1543 transferAfter(CurInfo, MI);
1544 }
1545
1546 const auto &Info = BlockInfo[MBB.getNumber()];
1547 if (CurInfo != Info.Exit) {
1548 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1549 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
1550 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n");
1551 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n");
1552 }
1553 assert(CurInfo == Info.Exit && "InsertVSETVLI dataflow invariant violated");
1554}
1555
1556/// Perform simple partial redundancy elimination of the VSETVLI instructions
1557/// we're about to insert by looking for cases where we can PRE from the
1558/// beginning of one block to the end of one of its predecessors. Specifically,
1559/// this is geared to catch the common case of a fixed length vsetvl in a single
1560/// block loop when it could execute once in the preheader instead.
1561void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1562 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1563 return;
1564
1565 MachineBasicBlock *UnavailablePred = nullptr;
1566 VSETVLIInfo AvailableInfo;
1567 for (MachineBasicBlock *P : MBB.predecessors()) {
1568 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1569 if (PredInfo.isUnknown()) {
1570 if (UnavailablePred)
1571 return;
1572 UnavailablePred = P;
1573 } else if (!AvailableInfo.isValid()) {
1574 AvailableInfo = PredInfo;
1575 } else if (AvailableInfo != PredInfo) {
1576 return;
1577 }
1578 }
1579
1580 // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1581 // phase 3.
1582 if (!UnavailablePred || !AvailableInfo.isValid())
1583 return;
1584
1585 if (!LIS)
1586 return;
1587
1588 // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of
1589 // the unavailable pred.
1590 if (AvailableInfo.hasSEWLMULRatioOnly())
1591 return;
1592
1593 // Critical edge - TODO: consider splitting?
1594 if (UnavailablePred->succ_size() != 1)
1595 return;
1596
1597 // If the AVL value is a register (other than our VLMAX sentinel),
1598 // we need to prove the value is available at the point we're going
1599 // to insert the vsetvli at.
1600 if (AvailableInfo.hasAVLReg()) {
1601 SlotIndex SI = AvailableInfo.getAVLVNInfo()->def;
1602 // This is an inline dominance check which covers the case of
1603 // UnavailablePred being the preheader of a loop.
1604 if (LIS->getMBBFromIndex(SI) != UnavailablePred)
1605 return;
1606 if (!UnavailablePred->terminators().empty() &&
1607 SI >= LIS->getInstructionIndex(*UnavailablePred->getFirstTerminator()))
1608 return;
1609 }
1610
1611 // Model the effect of changing the input state of the block MBB to
1612 // AvailableInfo. We're looking for two issues here; one legality,
1613 // one profitability.
1614 // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1615 // may hit the end of the block with a different end state. We can
1616 // not make this change without reflowing later blocks as well.
1617 // 2) If we don't actually remove a transition, inserting a vsetvli
1618 // into the predecessor block would be correct, but unprofitable.
1619 VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
1620 VSETVLIInfo CurInfo = AvailableInfo;
1621 int TransitionsRemoved = 0;
1622 for (const MachineInstr &MI : MBB) {
1623 const VSETVLIInfo LastInfo = CurInfo;
1624 const VSETVLIInfo LastOldInfo = OldInfo;
1625 transferBefore(CurInfo, MI);
1626 transferBefore(OldInfo, MI);
1627 if (CurInfo == LastInfo)
1628 TransitionsRemoved++;
1629 if (LastOldInfo == OldInfo)
1630 TransitionsRemoved--;
1631 transferAfter(CurInfo, MI);
1632 transferAfter(OldInfo, MI);
1633 if (CurInfo == OldInfo)
1634 // Convergence. All transitions after this must match by construction.
1635 break;
1636 }
1637 if (CurInfo != OldInfo || TransitionsRemoved <= 0)
1638 // Issues 1 and 2 above
1639 return;
1640
1641 // Finally, update both data flow state and insert the actual vsetvli.
1642 // Doing both keeps the code in sync with the dataflow results, which
1643 // is critical for correctness of phase 3.
1644 auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
1645 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1646 << UnavailablePred->getName() << " with state "
1647 << AvailableInfo << "\n");
1648 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1649 BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1650
1651 // Note there's an implicit assumption here that terminators never use
1652 // or modify VL or VTYPE. Also, fallthrough will return end().
1653 auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1654 insertVSETVLI(*UnavailablePred, InsertPt,
1655 UnavailablePred->findDebugLoc(InsertPt),
1656 AvailableInfo, OldExit);
1657}
1658
1659// Return true if we can mutate PrevMI to match MI without changing any the
1660// fields which would be observed.
1661bool RISCVInsertVSETVLI::canMutatePriorConfig(
1662 const MachineInstr &PrevMI, const MachineInstr &MI,
1663 const DemandedFields &Used) const {
1664 // If the VL values aren't equal, return false if either a) the former is
1665 // demanded, or b) we can't rewrite the former to be the later for
1666 // implementation reasons.
1667 if (!isVLPreservingConfig(MI)) {
1668 if (Used.VLAny)
1669 return false;
1670
1671 if (Used.VLZeroness) {
1672 if (isVLPreservingConfig(PrevMI))
1673 return false;
1674 if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI),
1675 LIS))
1676 return false;
1677 }
1678
1679 auto &AVL = MI.getOperand(1);
1680
1681 // If the AVL is a register, we need to make sure its definition is the same
1682 // at PrevMI as it was at MI.
1683 if (AVL.isReg() && AVL.getReg() != RISCV::X0) {
1684 VNInfo *VNI = getVNInfoFromReg(AVL.getReg(), MI, LIS);
1685 VNInfo *PrevVNI = getVNInfoFromReg(AVL.getReg(), PrevMI, LIS);
1686 if (!VNI || !PrevVNI || VNI != PrevVNI)
1687 return false;
1688 }
1689 }
1690
1691 assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm());
1692 auto PriorVType = PrevMI.getOperand(2).getImm();
1693 auto VType = MI.getOperand(2).getImm();
1694 return areCompatibleVTYPEs(PriorVType, VType, Used);
1695}
1696
1697void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
1698 MachineInstr *NextMI = nullptr;
1699 // We can have arbitrary code in successors, so VL and VTYPE
1700 // must be considered demanded.
1701 DemandedFields Used;
1702 Used.demandVL();
1703 Used.demandVTYPE();
1705
1706 auto dropAVLUse = [&](MachineOperand &MO) {
1707 if (!MO.isReg() || !MO.getReg().isVirtual())
1708 return;
1709 Register OldVLReg = MO.getReg();
1710 MO.setReg(RISCV::NoRegister);
1711
1712 if (LIS)
1713 LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
1714
1715 MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
1716 if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
1717 MRI->use_nodbg_empty(OldVLReg))
1718 ToDelete.push_back(VLOpDef);
1719 };
1720
1721 for (MachineInstr &MI :
1723
1724 if (!isVectorConfigInstr(MI)) {
1725 Used.doUnion(getDemanded(MI, ST));
1726 if (MI.isCall() || MI.isInlineAsm() ||
1727 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1728 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1729 NextMI = nullptr;
1730 continue;
1731 }
1732
1733 if (!MI.getOperand(0).isDead())
1734 Used.demandVL();
1735
1736 if (NextMI) {
1737 if (!Used.usedVL() && !Used.usedVTYPE()) {
1738 dropAVLUse(MI.getOperand(1));
1739 if (LIS)
1741 MI.eraseFromParent();
1742 NumCoalescedVSETVL++;
1743 // Leave NextMI unchanged
1744 continue;
1745 }
1746
1747 if (canMutatePriorConfig(MI, *NextMI, Used)) {
1748 if (!isVLPreservingConfig(*NextMI)) {
1749 Register DefReg = NextMI->getOperand(0).getReg();
1750
1751 MI.getOperand(0).setReg(DefReg);
1752 MI.getOperand(0).setIsDead(false);
1753
1754 // The def of DefReg moved to MI, so extend the LiveInterval up to
1755 // it.
1756 if (DefReg.isVirtual() && LIS) {
1757 LiveInterval &DefLI = LIS->getInterval(DefReg);
1758 SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
1759 VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
1760 LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
1761 DefLI.addSegment(S);
1762 DefVNI->def = MISlot;
1763 // Mark DefLI as spillable if it was previously unspillable
1764 DefLI.setWeight(0);
1765
1766 // DefReg may have had no uses, in which case we need to shrink
1767 // the LiveInterval up to MI.
1768 LIS->shrinkToUses(&DefLI);
1769 }
1770
1771 dropAVLUse(MI.getOperand(1));
1772 if (NextMI->getOperand(1).isImm())
1773 MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
1774 else
1775 MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(),
1776 false);
1777
1778 MI.setDesc(NextMI->getDesc());
1779 }
1780 MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
1781
1782 dropAVLUse(NextMI->getOperand(1));
1783 if (LIS)
1784 LIS->RemoveMachineInstrFromMaps(*NextMI);
1785 NextMI->eraseFromParent();
1786 NumCoalescedVSETVL++;
1787 // fallthrough
1788 }
1789 }
1790 NextMI = &MI;
1791 Used = getDemanded(MI, ST);
1792 }
1793
1794 // Loop over the dead AVL values, and delete them now. This has
1795 // to be outside the above loop to avoid invalidating iterators.
1796 for (auto *MI : ToDelete) {
1797 if (LIS) {
1798 LIS->removeInterval(MI->getOperand(0).getReg());
1800 }
1801 MI->eraseFromParent();
1802 }
1803}
1804
1805void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1806 for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1807 MachineInstr &MI = *I++;
1809 Register VLOutput = MI.getOperand(1).getReg();
1810 assert(VLOutput.isVirtual());
1811 if (!MI.getOperand(1).isDead()) {
1812 auto ReadVLMI = BuildMI(MBB, I, MI.getDebugLoc(),
1813 TII->get(RISCV::PseudoReadVL), VLOutput);
1814 // Move the LiveInterval's definition down to PseudoReadVL.
1815 if (LIS) {
1816 SlotIndex NewDefSI =
1817 LIS->InsertMachineInstrInMaps(*ReadVLMI).getRegSlot();
1818 LiveInterval &DefLI = LIS->getInterval(VLOutput);
1819 VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
1820 DefLI.removeSegment(DefLI.beginIndex(), NewDefSI);
1821 DefVNI->def = NewDefSI;
1822 }
1823 }
1824 // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1825 MI.getOperand(1).setReg(RISCV::X0);
1826 }
1827 }
1828}
1829
1830bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1831 // Skip if the vector extension is not enabled.
1833 if (!ST->hasVInstructions())
1834 return false;
1835
1836 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1837
1838 TII = ST->getInstrInfo();
1839 MRI = &MF.getRegInfo();
1840 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
1841 LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
1842
1843 assert(BlockInfo.empty() && "Expect empty block infos");
1844 BlockInfo.resize(MF.getNumBlockIDs());
1845
1846 bool HaveVectorOp = false;
1847
1848 // Phase 1 - determine how VL/VTYPE are affected by the each block.
1849 for (const MachineBasicBlock &MBB : MF) {
1850 VSETVLIInfo TmpStatus;
1851 HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus);
1852 // Initial exit state is whatever change we found in the block.
1853 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1854 BBInfo.Exit = TmpStatus;
1855 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1856 << " is " << BBInfo.Exit << "\n");
1857
1858 }
1859
1860 // If we didn't find any instructions that need VSETVLI, we're done.
1861 if (!HaveVectorOp) {
1862 BlockInfo.clear();
1863 return false;
1864 }
1865
1866 // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1867 // blocks to the list here, but will also add any that need to be revisited
1868 // during Phase 2 processing.
1869 for (const MachineBasicBlock &MBB : MF) {
1870 WorkList.push(&MBB);
1871 BlockInfo[MBB.getNumber()].InQueue = true;
1872 }
1873 while (!WorkList.empty()) {
1874 const MachineBasicBlock &MBB = *WorkList.front();
1875 WorkList.pop();
1876 computeIncomingVLVTYPE(MBB);
1877 }
1878
1879 // Perform partial redundancy elimination of vsetvli transitions.
1880 for (MachineBasicBlock &MBB : MF)
1881 doPRE(MBB);
1882
1883 // Phase 3 - add any vsetvli instructions needed in the block. Use the
1884 // Phase 2 information to avoid adding vsetvlis before the first vector
1885 // instruction in the block if the VL/VTYPE is satisfied by its
1886 // predecessors.
1887 for (MachineBasicBlock &MBB : MF)
1888 emitVSETVLIs(MBB);
1889
1890 // Now that all vsetvlis are explicit, go through and do block local
1891 // DSE and peephole based demanded fields based transforms. Note that
1892 // this *must* be done outside the main dataflow so long as we allow
1893 // any cross block analysis within the dataflow. We can't have both
1894 // demanded fields based mutation and non-local analysis in the
1895 // dataflow at the same time without introducing inconsistencies.
1896 for (MachineBasicBlock &MBB : MF)
1897 coalesceVSETVLIs(MBB);
1898
1899 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1900 // of VLEFF/VLSEGFF.
1901 for (MachineBasicBlock &MBB : MF)
1902 insertReadVL(MBB);
1903
1904 BlockInfo.clear();
1905 return HaveVectorOp;
1906}
1907
1908/// Returns an instance of the Insert VSETVLI pass.
1910 return new RISCVInsertVSETVLI();
1911}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_ATTRIBUTE_USED
Definition: Compiler.h:230
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
#define LLVM_DEBUG(...)
Definition: Debug.h:106
static RegisterPass< DebugifyFunctionPass > DF("debugify-function", "Attach debug info to a function")
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1315
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static Interval intersect(const Interval &I1, const Interval &I2)
uint64_t IntrinsicInst * II
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
static cl::opt< bool > EnsureWholeVectorRegisterMoveValidVTYPE(DEBUG_TYPE "-whole-vector-register-move-valid-vtype", cl::Hidden, cl::desc("Insert vsetvlis before vmvNr.vs to ensure vtype is valid and " "vill is cleared"), cl::init(true))
#define RISCV_INSERT_VSETVLI_NAME
static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo, DemandedFields &Demanded)
#define DEBUG_TYPE
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
BlockData()=default
Represent the analysis usage information of a pass.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
void setWeight(float Value)
Definition: LiveInterval.h:721
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
SlotIndexes * getSlotIndexes() const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
void removeInterval(Register Reg)
Interval removal.
bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
void extendToIndices(LiveRange &LR, ArrayRef< SlotIndex > Indices, ArrayRef< SlotIndex > Undefs)
Extend the live range LR to reach all points in Indices.
void splitSeparateComponents(LiveInterval &LI, SmallVectorImpl< LiveInterval * > &SplitLIs)
Split separate components in LiveInterval LI into separate intervals.
MachineBasicBlock * getMBBFromIndex(SlotIndex index) const
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
iterator addSegment(Segment S)
Add the specified Segment to this range, merging segments as appropriate.
bool liveAt(SlotIndex index) const
Definition: LiveInterval.h:401
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarily including Idx,...
Definition: LiveInterval.h:429
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:385
bool containsOneValue() const
Definition: LiveInterval.h:311
void removeSegment(SlotIndex Start, SlotIndex End, bool RemoveDeadValNo=false)
Remove the specified interval from this live range.
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:421
reverse_iterator rend()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
reverse_iterator rbegin()
iterator_range< pred_iterator > predecessors()
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:572
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:237
SlotIndex getInstructionIndex(const MachineInstr &MI, bool IgnoreBundle=false) const
Returns the base index for the given instruction.
Definition: SlotIndexes.h:379
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:78
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
static bool usesMaskPolicy(uint64_t TSFlags)
static unsigned getVLOpNum(const MCInstrDesc &Desc)
static bool doesForceTailAgnostic(uint64_t TSFlags)
static VLMUL getLMul(uint64_t TSFlags)
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
static bool hasSEWOp(uint64_t TSFlags)
static bool isTailAgnostic(unsigned VType)
static RISCVII::VLMUL getVLMUL(unsigned VType)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul)
static bool isMaskAgnostic(unsigned VType)
static bool isValidSEW(unsigned SEW)
unsigned encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic)
static unsigned getSEW(unsigned VType)
std::optional< RISCVII::VLMUL > getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL, unsigned EEW)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
bool isFaultFirstLoad(const MachineInstr &MI)
static constexpr int64_t VLMaxSentinel
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Uninitialized
Definition: Threading.h:61
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2082
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FunctionPass * createRISCVInsertVSETVLIPass()
Returns an instance of the Insert VSETVLI pass.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:303
char & RISCVInsertVSETVLIID
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Status intersect(const Status &S) const
This represents a simple continuous liveness interval for a value.
Definition: LiveInterval.h:162
static bool isRVVRegClass(const TargetRegisterClass *RC)