LLVM 20.0.0git
RISCVInsertWriteVXRM.cpp
Go to the documentation of this file.
1//===-- RISCVInsertWriteVXRM.cpp - Insert Write of RISC-V VXRM CSR --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass inserts writes to the VXRM CSR as needed by vector instructions.
10// Each instruction that uses VXRM carries an operand that contains its required
11// VXRM value. This pass tries to optimize placement to avoid redundant writes
12// to VXRM.
13//
14// This is done using 2 dataflow algorithms. The first is a forward data flow
15// to calculate where a VXRM value is available. The second is a backwards
16// dataflow to determine where a VXRM value is anticipated.
17//
18// Finally, we use the results of these two dataflows to insert VXRM writes
19// where a value is anticipated, but not available.
20//
21// FIXME: This pass does not split critical edges, so there can still be some
22// redundancy.
23//
24// FIXME: If we are willing to have writes that aren't always needed, we could
25// reduce the number of VXRM writes in some cases.
26//===----------------------------------------------------------------------===//
27
29#include "RISCV.h"
30#include "RISCVSubtarget.h"
32#include <queue>
33
34using namespace llvm;
35
36#define DEBUG_TYPE "riscv-insert-write-vxrm"
37#define RISCV_INSERT_WRITE_VXRM_NAME "RISC-V Insert Write VXRM Pass"
38
39namespace {
40
41class VXRMInfo {
42 uint8_t VXRMImm = 0;
43
44 enum : uint8_t {
46 Static,
47 Unknown,
48 } State = Uninitialized;
49
50public:
51 VXRMInfo() {}
52
53 static VXRMInfo getUnknown() {
54 VXRMInfo Info;
55 Info.setUnknown();
56 return Info;
57 }
58
59 bool isValid() const { return State != Uninitialized; }
60 void setUnknown() { State = Unknown; }
61 bool isUnknown() const { return State == Unknown; }
62
63 bool isStatic() const { return State == Static; }
64
65 void setVXRMImm(unsigned Imm) {
66 assert(Imm <= 3 && "Unexpected VXRM value");
67 VXRMImm = Imm;
68 State = Static;
69 }
70 unsigned getVXRMImm() const {
71 assert(isStatic() && VXRMImm <= 3 && "Unexpected state");
72 return VXRMImm;
73 }
74
75 bool operator==(const VXRMInfo &Other) const {
76 // Uninitialized is only equal to another Uninitialized.
77 if (State != Other.State)
78 return false;
79
80 if (isStatic())
81 return VXRMImm == Other.VXRMImm;
82
83 assert((isValid() || isUnknown()) && "Unexpected state");
84 return true;
85 }
86
87 bool operator!=(const VXRMInfo &Other) const { return !(*this == Other); }
88
89 // Calculate the VXRMInfo visible to a block assuming this and Other are
90 // both predecessors.
91 VXRMInfo intersect(const VXRMInfo &Other) const {
92 // If the new value isn't valid, ignore it.
93 if (!Other.isValid())
94 return *this;
95
96 // If this value isn't valid, this must be the first predecessor, use it.
97 if (!isValid())
98 return Other;
99
100 // If either is unknown, the result is unknown.
101 if (isUnknown() || Other.isUnknown())
102 return VXRMInfo::getUnknown();
103
104 // If we have an exact match, return this.
105 if (*this == Other)
106 return *this;
107
108 // Otherwise the result is unknown.
109 return VXRMInfo::getUnknown();
110 }
111
112 // Calculate the VXRMInfo visible to a block assuming this and Other
113 // are both predecessors. To allow speculatively running WriteVXRM
114 // we will ignore Unknowns if one of this and Other have valid
115 // WriteVXRM. Rationale: WriteVXRM causes a pipeline flush in some
116 // uarchs and moving it outside loops is very important for some
117 // workloads.
118 VXRMInfo intersectAnticipated(const VXRMInfo &Other) const {
119 // If the new value isn't valid, ignore it.
120 if (!Other.isValid())
121 return *this;
122
123 // If this value isn't valid, this must be the first predecessor, use it.
124 if (!isValid())
125 return Other;
126
127 // If either is unknown, the result is the other one.
128 if (isUnknown())
129 return Other;
130 if (Other.isUnknown())
131 return *this;
132
133 // If we have an exact match, return this.
134 if (*this == Other)
135 return *this;
136
137 // Otherwise the result is unknown.
138 return VXRMInfo::getUnknown();
139 }
140
141#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
142 /// Support for debugging, callable in GDB: V->dump()
143 LLVM_DUMP_METHOD void dump() const {
144 print(dbgs());
145 dbgs() << "\n";
146 }
147
148 void print(raw_ostream &OS) const {
149 OS << '{';
150 if (!isValid())
151 OS << "Uninitialized";
152 else if (isUnknown())
153 OS << "Unknown";
154 else
155 OS << getVXRMImm();
156 OS << '}';
157 }
158#endif
159};
160
161#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
163inline raw_ostream &operator<<(raw_ostream &OS, const VXRMInfo &V) {
164 V.print(OS);
165 return OS;
166}
167#endif
168
169struct BlockData {
170 // Indicates if the block uses VXRM. Uninitialized means no use.
171 VXRMInfo VXRMUse;
172
173 // Indicates the VXRM output from the block. Unitialized means transparent.
174 VXRMInfo VXRMOut;
175
176 // Keeps track of the available VXRM value at the start of the basic bloc.
177 VXRMInfo AvailableIn;
178
179 // Keeps track of the available VXRM value at the end of the basic block.
180 VXRMInfo AvailableOut;
181
182 // Keeps track of what VXRM is anticipated at the start of the basic block.
183 VXRMInfo AnticipatedIn;
184
185 // Keeps track of what VXRM is anticipated at the end of the basic block.
186 VXRMInfo AnticipatedOut;
187
188 // Keeps track of whether the block is already in the queue.
189 bool InQueue;
190
191 BlockData() = default;
192};
193
194class RISCVInsertWriteVXRM : public MachineFunctionPass {
195 const TargetInstrInfo *TII;
196
197 std::vector<BlockData> BlockInfo;
198 std::queue<const MachineBasicBlock *> WorkList;
199
200public:
201 static char ID;
202
203 RISCVInsertWriteVXRM() : MachineFunctionPass(ID) {}
204
205 bool runOnMachineFunction(MachineFunction &MF) override;
206
207 void getAnalysisUsage(AnalysisUsage &AU) const override {
208 AU.setPreservesCFG();
210 }
211
212 StringRef getPassName() const override {
214 }
215
216private:
217 bool computeVXRMChanges(const MachineBasicBlock &MBB);
218 void computeAvailable(const MachineBasicBlock &MBB);
219 void computeAnticipated(const MachineFunction &MF, const MachineBasicBlock &MBB);
220 void emitWriteVXRM(MachineBasicBlock &MBB);
221};
222
223} // end anonymous namespace
224
225char RISCVInsertWriteVXRM::ID = 0;
226
228 false, false)
229
230static bool ignoresVXRM(const MachineInstr &MI) {
231 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
232 default:
233 return false;
234 case RISCV::VNCLIP_WI:
235 case RISCV::VNCLIPU_WI:
236 return MI.getOperand(3).getImm() == 0;
237 }
238}
239
240bool RISCVInsertWriteVXRM::computeVXRMChanges(const MachineBasicBlock &MBB) {
241 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
242
243 bool NeedVXRMWrite = false;
244 for (const MachineInstr &MI : MBB) {
245 int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
246 if (VXRMIdx >= 0 && !ignoresVXRM(MI)) {
247 unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();
248
249 if (!BBInfo.VXRMUse.isValid())
250 BBInfo.VXRMUse.setVXRMImm(NewVXRMImm);
251
252 BBInfo.VXRMOut.setVXRMImm(NewVXRMImm);
253 NeedVXRMWrite = true;
254 continue;
255 }
256
257 if (MI.isCall() || MI.isInlineAsm() ||
258 MI.modifiesRegister(RISCV::VXRM, /*TRI=*/nullptr)) {
259 if (!BBInfo.VXRMUse.isValid())
260 BBInfo.VXRMUse.setUnknown();
261
262 BBInfo.VXRMOut.setUnknown();
263 }
264 }
265
266 return NeedVXRMWrite;
267}
268
269void RISCVInsertWriteVXRM::computeAvailable(const MachineBasicBlock &MBB) {
270 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
271
272 BBInfo.InQueue = false;
273
274 VXRMInfo Available;
275 if (MBB.pred_empty()) {
276 Available.setUnknown();
277 } else {
278 for (const MachineBasicBlock *P : MBB.predecessors())
279 Available = Available.intersect(BlockInfo[P->getNumber()].AvailableOut);
280 }
281
282 // If we don't have any valid available info, wait until we do.
283 if (!Available.isValid())
284 return;
285
286 if (Available != BBInfo.AvailableIn) {
287 BBInfo.AvailableIn = Available;
288 LLVM_DEBUG(dbgs() << "AvailableIn state of " << printMBBReference(MBB)
289 << " changed to " << BBInfo.AvailableIn << "\n");
290 }
291
292 if (BBInfo.VXRMOut.isValid())
293 Available = BBInfo.VXRMOut;
294
295 if (Available == BBInfo.AvailableOut)
296 return;
297
298 BBInfo.AvailableOut = Available;
299 LLVM_DEBUG(dbgs() << "AvailableOut state of " << printMBBReference(MBB)
300 << " changed to " << BBInfo.AvailableOut << "\n");
301
302 // Add the successors to the work list so that we can propagate.
303 for (MachineBasicBlock *S : MBB.successors()) {
304 if (!BlockInfo[S->getNumber()].InQueue) {
305 BlockInfo[S->getNumber()].InQueue = true;
306 WorkList.push(S);
307 }
308 }
309}
310
311void RISCVInsertWriteVXRM::computeAnticipated(const MachineFunction &MF, const MachineBasicBlock &MBB) {
312 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
314
315 BBInfo.InQueue = false;
316
317 VXRMInfo Anticipated;
318 if (MBB.succ_empty()) {
319 Anticipated.setUnknown();
320 } else {
321 for (const MachineBasicBlock *S : MBB.successors())
322 if (ST.hasVXRMPipelineFlush())
323 Anticipated =
324 Anticipated.intersectAnticipated(BlockInfo[S->getNumber()].AnticipatedIn);
325 else
326 Anticipated =
327 Anticipated.intersect(BlockInfo[S->getNumber()].AnticipatedIn);
328 }
329
330 // If we don't have any valid anticipated info, wait until we do.
331 if (!Anticipated.isValid())
332 return;
333
334 if (Anticipated != BBInfo.AnticipatedOut) {
335 BBInfo.AnticipatedOut = Anticipated;
336 LLVM_DEBUG(dbgs() << "AnticipatedOut state of " << printMBBReference(MBB)
337 << " changed to " << BBInfo.AnticipatedOut << "\n");
338 }
339
340 // If this block reads VXRM, copy it.
341 if (BBInfo.VXRMUse.isValid())
342 Anticipated = BBInfo.VXRMUse;
343
344 if (Anticipated == BBInfo.AnticipatedIn)
345 return;
346
347 BBInfo.AnticipatedIn = Anticipated;
348 LLVM_DEBUG(dbgs() << "AnticipatedIn state of " << printMBBReference(MBB)
349 << " changed to " << BBInfo.AnticipatedIn << "\n");
350
351 // Add the predecessors to the work list so that we can propagate.
353 if (!BlockInfo[P->getNumber()].InQueue) {
354 BlockInfo[P->getNumber()].InQueue = true;
355 WorkList.push(P);
356 }
357 }
358}
359
360void RISCVInsertWriteVXRM::emitWriteVXRM(MachineBasicBlock &MBB) {
361 const BlockData &BBInfo = BlockInfo[MBB.getNumber()];
362
363 VXRMInfo Info = BBInfo.AvailableIn;
364
365 // Flag to indicates we need to insert a VXRM write. We want to delay it as
366 // late as possible in this block.
367 bool PendingInsert = false;
368
369 // Insert VXRM write if anticipated and not available.
370 if (BBInfo.AnticipatedIn.isStatic()) {
371 // If this is the entry block and the value is anticipated, insert.
372 if (MBB.isEntryBlock()) {
373 PendingInsert = true;
374 } else {
375 // Search for any predecessors that wouldn't satisfy our requirement and
376 // insert a write VXRM if needed.
377 // NOTE: If one predecessor is able to provide the requirement, but
378 // another isn't, it means we have a critical edge. The better placement
379 // would be to split the critical edge.
381 const BlockData &PInfo = BlockInfo[P->getNumber()];
382 // If it's available out of the predecessor, then we're ok.
383 if (PInfo.AvailableOut.isStatic() &&
384 PInfo.AvailableOut.getVXRMImm() ==
385 BBInfo.AnticipatedIn.getVXRMImm())
386 continue;
387 // If the predecessor anticipates this value for all its succesors,
388 // then a write to VXRM would have already occured before this block is
389 // executed.
390 if (PInfo.AnticipatedOut.isStatic() &&
391 PInfo.AnticipatedOut.getVXRMImm() ==
392 BBInfo.AnticipatedIn.getVXRMImm())
393 continue;
394 PendingInsert = true;
395 break;
396 }
397 }
398
399 Info = BBInfo.AnticipatedIn;
400 }
401
402 for (MachineInstr &MI : MBB) {
403 int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
404 if (VXRMIdx >= 0 && !ignoresVXRM(MI)) {
405 unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();
406
407 if (PendingInsert || !Info.isStatic() ||
408 Info.getVXRMImm() != NewVXRMImm) {
409 assert((!PendingInsert ||
410 (Info.isStatic() && Info.getVXRMImm() == NewVXRMImm)) &&
411 "Pending VXRM insertion mismatch");
412 LLVM_DEBUG(dbgs() << "Inserting before "; MI.print(dbgs()));
413 BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteVXRMImm))
414 .addImm(NewVXRMImm);
415 PendingInsert = false;
416 }
417
418 MI.addOperand(MachineOperand::CreateReg(RISCV::VXRM, /*IsDef*/ false,
419 /*IsImp*/ true));
420 Info.setVXRMImm(NewVXRMImm);
421 continue;
422 }
423
424 if (MI.isCall() || MI.isInlineAsm() ||
425 MI.modifiesRegister(RISCV::VXRM, /*TRI=*/nullptr))
426 Info.setUnknown();
427 }
428
429 // If all our successors anticipate a value, do the insert.
430 // NOTE: It's possible that not all predecessors of our successor provide the
431 // correct value. This can occur on critical edges. If we don't split the
432 // critical edge we'll also have a write vxrm in the succesor that is
433 // redundant with this one.
434 if (PendingInsert ||
435 (BBInfo.AnticipatedOut.isStatic() &&
436 (!Info.isStatic() ||
437 Info.getVXRMImm() != BBInfo.AnticipatedOut.getVXRMImm()))) {
438 assert((!PendingInsert ||
439 (Info.isStatic() && BBInfo.AnticipatedOut.isStatic() &&
440 Info.getVXRMImm() == BBInfo.AnticipatedOut.getVXRMImm())) &&
441 "Pending VXRM insertion mismatch");
442 LLVM_DEBUG(dbgs() << "Inserting at end of " << printMBBReference(MBB)
443 << " changing to " << BBInfo.AnticipatedOut << "\n");
445 TII->get(RISCV::WriteVXRMImm))
446 .addImm(BBInfo.AnticipatedOut.getVXRMImm());
447 }
448}
449
450bool RISCVInsertWriteVXRM::runOnMachineFunction(MachineFunction &MF) {
451 // Skip if the vector extension is not enabled.
453 if (!ST.hasVInstructions())
454 return false;
455
456 TII = ST.getInstrInfo();
457
458 assert(BlockInfo.empty() && "Expect empty block infos");
459 BlockInfo.resize(MF.getNumBlockIDs());
460
461 // Phase 1 - collect block information.
462 bool NeedVXRMChange = false;
463 for (const MachineBasicBlock &MBB : MF)
464 NeedVXRMChange |= computeVXRMChanges(MBB);
465
466 if (!NeedVXRMChange) {
467 BlockInfo.clear();
468 return false;
469 }
470
471 // Phase 2 - Compute available VXRM using a forward walk.
472 for (const MachineBasicBlock &MBB : MF) {
473 WorkList.push(&MBB);
474 BlockInfo[MBB.getNumber()].InQueue = true;
475 }
476 while (!WorkList.empty()) {
477 const MachineBasicBlock &MBB = *WorkList.front();
478 WorkList.pop();
479 computeAvailable(MBB);
480 }
481
482 // Phase 3 - Compute anticipated VXRM using a backwards walk.
483 for (const MachineBasicBlock &MBB : llvm::reverse(MF)) {
484 WorkList.push(&MBB);
485 BlockInfo[MBB.getNumber()].InQueue = true;
486 }
487 while (!WorkList.empty()) {
488 const MachineBasicBlock &MBB = *WorkList.front();
489 WorkList.pop();
490 computeAnticipated(MF, MBB);
491 }
492
493 // Phase 4 - Emit VXRM writes at the earliest place possible.
494 for (MachineBasicBlock &MBB : MF)
495 emitWriteVXRM(MBB);
496
497 BlockInfo.clear();
498
499 return true;
500}
501
503 return new RISCVInsertWriteVXRM();
504}
aarch64 promote const
MachineBasicBlock & MBB
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_ATTRIBUTE_USED
Definition: Compiler.h:230
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
#define LLVM_DEBUG(...)
Definition: Debug.h:106
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1313
@ Available
We know the block is fully available. This is a fixpoint.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static Interval intersect(const Interval &I1, const Interval &I2)
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
#define RISCV_INSERT_WRITE_VXRM_NAME
#define DEBUG_TYPE
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
BlockData()=default
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isEntryBlock() const
Returns true if this is the entry block of the function.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:130
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
TargetInstrInfo - Interface to description of machine instruction set.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
static int getVXRMOpNum(const MCInstrDesc &Desc)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Uninitialized
Definition: Threading.h:61
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2082
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:303
FunctionPass * createRISCVInsertWriteVXRMPass()
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.