81 #define DEBUG_TYPE "si-wqm"
94 explicit PrintState(
int State) : State(State) {}
98 if (PS.State & StateWQM)
100 if (PS.State & StateExact) {
101 if (PS.State & StateWQM)
124 WorkItem() =
default;
143 std::vector<WorkItem> &Worklist);
144 void markUsesWQM(
const MachineInstr &MI, std::vector<WorkItem> &Worklist);
145 char scanInstructions(
MachineFunction &MF, std::vector<WorkItem> &Worklist);
146 void propagateInstruction(
MachineInstr &MI, std::vector<WorkItem> &Worklist);
150 bool requiresCorrectState(
const MachineInstr &MI)
const;
159 unsigned SaveWQM,
unsigned LiveMaskReg);
164 void lowerLiveMaskQueries(
unsigned LiveMaskReg);
174 StringRef getPassName()
const override {
return "SI Whole Quad Mode"; }
196 return new SIWholeQuadMode;
199 void SIWholeQuadMode::printInfo() {
200 for (
const auto &BII : Blocks) {
201 dbgs() <<
"\nBB#" << BII.first->getNumber() <<
":\n"
202 <<
" InNeeds = " << PrintState(BII.second.InNeeds)
203 <<
", Needs = " << PrintState(BII.second.Needs)
204 <<
", OutNeeds = " << PrintState(BII.second.OutNeeds) <<
"\n\n";
207 auto III = Instructions.find(&MI);
208 if (III == Instructions.end())
211 dbgs() <<
" " << MI <<
" Needs = " << PrintState(III->second.Needs)
212 <<
", OutNeeds = " << PrintState(III->second.OutNeeds) <<
'\n';
218 std::vector<WorkItem> &Worklist) {
219 InstrInfo &II = Instructions[&
MI];
221 assert(Flag == StateWQM || Flag == StateExact);
231 Worklist.push_back(&MI);
235 void SIWholeQuadMode::markUsesWQM(
const MachineInstr &MI,
236 std::vector<WorkItem> &Worklist) {
238 if (!
Use.isReg() || !
Use.isUse())
241 unsigned Reg =
Use.getReg();
247 if (Reg == AMDGPU::EXEC)
251 LiveRange &LR = LIS->getRegUnit(*RegUnit);
261 markInstruction(*LIS->getInstructionFromIndex(Value->
def), StateWQM,
269 markInstruction(DefMI, StateWQM, Worklist);
276 std::vector<WorkItem> &Worklist) {
277 char GlobalFlags = 0;
280 for (
auto BI = MF.
begin(), BE = MF.
end(); BI != BE; ++BI) {
283 for (
auto II = MBB.
begin(),
IE = MBB.
end(); II !=
IE; ++II) {
288 if (
TII->isDS(Opcode)) {
290 }
else if (
TII->isWQM(Opcode)) {
294 markUsesWQM(MI, Worklist);
295 GlobalFlags |= StateWQM;
297 }
else if (
TII->isDisableWQM(MI)) {
300 if (Opcode == AMDGPU::SI_PS_LIVE) {
301 LiveMaskQueries.push_back(&MI);
302 }
else if (WQMOutputs) {
310 unsigned Reg = MO.getReg();
312 if (!TRI->isVirtualRegister(Reg) &&
313 TRI->hasVGPRs(TRI->getPhysRegClass(Reg))) {
324 markInstruction(MI, Flags, Worklist);
325 GlobalFlags |=
Flags;
332 void SIWholeQuadMode::propagateInstruction(
MachineInstr &MI,
333 std::vector<WorkItem>& Worklist) {
335 InstrInfo II = Instructions[&
MI];
336 BlockInfo &BI = Blocks[
MBB];
340 if ((II.OutNeeds & StateWQM) && !II.Needs &&
342 Instructions[&
MI].Needs = StateWQM;
347 BI.Needs |= II.Needs;
348 if ((BI.InNeeds | II.Needs) != BI.InNeeds) {
349 BI.InNeeds |= II.Needs;
350 Worklist.push_back(MBB);
355 char InNeeds = II.Needs | II.OutNeeds;
356 if (!PrevMI->isPHI()) {
357 InstrInfo &PrevII = Instructions[PrevMI];
358 if ((PrevII.OutNeeds | InNeeds) != PrevII.OutNeeds) {
359 PrevII.OutNeeds |= InNeeds;
360 Worklist.push_back(PrevMI);
366 assert(II.Needs != (StateWQM | StateExact));
368 if (II.Needs == StateWQM)
369 markUsesWQM(MI, Worklist);
373 std::vector<WorkItem>& Worklist) {
374 BlockInfo BI = Blocks[&
MBB];
379 InstrInfo &LastII = Instructions[LastMI];
380 if ((LastII.OutNeeds | BI.OutNeeds) != LastII.OutNeeds) {
381 LastII.OutNeeds |= BI.OutNeeds;
382 Worklist.push_back(LastMI);
388 BlockInfo &PredBI = Blocks[Pred];
389 if ((PredBI.OutNeeds | BI.InNeeds) == PredBI.OutNeeds)
392 PredBI.OutNeeds |= BI.InNeeds;
393 PredBI.InNeeds |= BI.InNeeds;
394 Worklist.push_back(Pred);
399 BlockInfo &SuccBI = Blocks[Succ];
400 if ((SuccBI.InNeeds | BI.OutNeeds) == SuccBI.InNeeds)
403 SuccBI.InNeeds |= BI.OutNeeds;
404 Worklist.push_back(Succ);
409 std::vector<WorkItem> Worklist;
410 char GlobalFlags = scanInstructions(MF, Worklist);
412 while (!Worklist.empty()) {
413 WorkItem WI = Worklist.back();
417 propagateInstruction(*WI.MI, Worklist);
419 propagateBlock(*WI.MBB, Worklist);
430 bool SIWholeQuadMode::requiresCorrectState(
const MachineInstr &MI)
const {
435 if (
TII->isScalarUnit(MI))
458 unsigned SaveReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
462 .addReg(AMDGPU::SCC);
467 LIS->InsertMachineInstrInMaps(*Save);
468 LIS->InsertMachineInstrInMaps(*Restore);
469 LIS->createAndComputeVirtRegInterval(SaveReg);
481 return PreferLast ? Last : First;
484 auto MBBE = MBB.
end();
485 SlotIndex FirstIdx = First != MBBE ? LIS->getInstructionIndex(*First)
486 : LIS->getMBBEndIdx(&MBB);
488 Last != MBBE ? LIS->getInstructionIndex(*Last) : LIS->getMBBEndIdx(&MBB);
489 SlotIndex Idx = PreferLast ? LastIdx : FirstIdx;
512 if (
MachineInstr *MI = LIS->getInstructionFromIndex(Idx))
515 assert(Idx == LIS->getMBBEndIdx(&MBB));
520 MBBI = saveSCC(MBB, MBBI);
527 unsigned SaveWQM,
unsigned LiveMaskReg) {
533 .addReg(LiveMaskReg);
537 .addReg(AMDGPU::EXEC)
541 LIS->InsertMachineInstrInMaps(*MI);
555 .addReg(AMDGPU::EXEC);
558 LIS->InsertMachineInstrInMaps(*MI);
563 auto BII = Blocks.find(&MBB);
564 if (BII == Blocks.end())
567 const BlockInfo &BI = BII->second;
569 if (!(BI.InNeeds & StateWQM))
574 if (!isEntry && !(BI.Needs & StateExact) && BI.OutNeeds != StateExact)
579 unsigned SavedWQMReg = 0;
580 bool WQMFromExec = isEntry;
581 char State = isEntry ? StateExact : StateWQM;
599 if (requiresCorrectState(MI)) {
600 auto III = Instructions.find(&MI);
601 if (III != Instructions.end()) {
602 Needs = III->second.Needs;
603 OutNeeds = III->second.OutNeeds;
607 if (MI.
isTerminator() && !Needs && OutNeeds == StateExact)
610 if (MI.
getOpcode() == AMDGPU::SI_ELSE && BI.OutNeeds == StateExact)
616 if (BI.OutNeeds & StateWQM)
618 else if (BI.OutNeeds == StateExact)
623 if (Needs != State) {
625 prepareInsertion(MBB, First, II, Needs == StateWQM,
626 Needs == StateExact || WQMFromExec);
628 if (Needs == StateExact) {
629 if (!WQMFromExec && (OutNeeds & StateWQM))
630 SavedWQMReg =
MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
632 toExact(MBB, Before, SavedWQMReg, LiveMaskReg);
634 assert(WQMFromExec == (SavedWQMReg == 0));
636 toWQM(MBB, Before, SavedWQMReg);
639 LIS->createAndComputeVirtRegInterval(SavedWQMReg);
656 void SIWholeQuadMode::lowerLiveMaskQueries(
unsigned LiveMaskReg) {
662 .addReg(LiveMaskReg);
664 LIS->ReplaceMachineInstrInMaps(*MI, *Copy);
673 Instructions.clear();
675 LiveMaskQueries.clear();
682 LIS = &getAnalysis<LiveIntervals>();
684 char GlobalFlags = analyzeFunction(MF);
685 if (!(GlobalFlags & StateWQM)) {
686 lowerLiveMaskQueries(AMDGPU::EXEC);
687 return !LiveMaskQueries.empty();
691 unsigned LiveMaskReg = 0;
696 if (GlobalFlags & StateExact || !LiveMaskQueries.empty()) {
697 LiveMaskReg =
MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
699 TII->get(AMDGPU::COPY), LiveMaskReg)
700 .addReg(AMDGPU::EXEC);
701 LIS->InsertMachineInstrInMaps(*MI);
704 if (GlobalFlags == StateWQM) {
708 .addReg(AMDGPU::EXEC);
710 lowerLiveMaskQueries(LiveMaskReg);
718 lowerLiveMaskQueries(LiveMaskReg);
721 for (
auto BII : Blocks)
722 processBlock(*BII.first, LiveMaskReg, BII.first == &*MF.
begin());
const Segment * getSegmentContaining(SlotIndex Idx) const
Return the segment that contains the specified index, or null if there is none.
INITIALIZE_PASS_BEGIN(SIWholeQuadMode, DEBUG_TYPE,"SI Whole Quad Mode", false, false) INITIALIZE_PASS_END(SIWholeQuadMode
AMDGPU specific subclass of TargetSubtarget.
SlotIndex def
The index of the defining instruction.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
SlotIndex getBaseIndex() const
Returns the base index for associated with this index.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
const SIInstrInfo * getInstrInfo() const override
This represents a simple continuous liveness interval for a value.
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
VNInfo - Value Number Information.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
iterator_range< succ_iterator > successors()
This class represents the liveness of a register, stack slot, etc.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
struct fuzzer::@269 Flags
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
A Use represents the edge between a Value definition and its users.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Reg
All possible values of the reg field in the ModR/M byte.
const MachineBasicBlock & front() const
reverse_iterator rbegin()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
const MachineBasicBlock * getParent() const
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
unsigned const MachineRegisterInfo * MRI
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
FunctionPass * createSIWholeQuadModePass()
const MachineOperand & getOperand(unsigned i) const
Represent the analysis usage information of a pass.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
iterator_range< pred_iterator > predecessors()
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Calling convention used for Mesa pixel shaders.
const SIRegisterInfo * getRegisterInfo() const override
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
SlotIndex getNextIndex() const
Returns the next index.
Representation of each machine instruction.
Interface definition for SIInstrInfo.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
unsigned getReg() const
getReg - Returns the register number.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream...
StringRef - Represent a constant reference to a string, i.e.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
SlotIndex - An opaque wrapper around machine indexes.