31 #define DEBUG_TYPE "packets"
50 StringRef getPassName()
const override {
return "R600 Packetizer"; }
61 bool ConsideredInstUsesAlreadyWrittenVectorElement;
73 if (!
TII->isALUInstr(I->getOpcode()) && !I->isBundle())
81 int BISlot = getSlot(*BI);
82 if (LastDstChan >= BISlot)
88 if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
90 int DstIdx =
TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst);
94 unsigned Dst = BI->getOperand(DstIdx).getReg();
95 if (isTrans ||
TII->isTransOnly(*BI)) {
99 if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
100 BI->getOpcode() == AMDGPU::DOT4_eg) {
101 Result[Dst] = AMDGPU::PV_X;
104 if (Dst == AMDGPU::OQAP) {
108 switch (TRI.getHWRegChan(Dst)) {
110 PVReg = AMDGPU::PV_X;
113 PVReg = AMDGPU::PV_Y;
116 PVReg = AMDGPU::PV_Z;
119 PVReg = AMDGPU::PV_W;
125 }
while ((++BI)->isBundledWithPred());
132 AMDGPU::OpName::src0,
133 AMDGPU::OpName::src1,
136 for (
unsigned i = 0;
i < 3;
i++) {
157 void initPacketizerState()
override {
158 ConsideredInstUsesAlreadyWrittenVectorElement =
false;
169 bool isSoloInstruction(
const MachineInstr &MI)
override {
170 if (
TII->isVector(MI))
174 if (MI.
getOpcode() == AMDGPU::GROUP_BARRIER)
183 bool isLegalToPacketizeTogether(
SUnit *SUI,
SUnit *SUJ)
override {
185 if (getSlot(*MII) == getSlot(*MIJ))
186 ConsideredInstUsesAlreadyWrittenVectorElement =
true;
188 int OpI =
TII->getOperandIdx(MII->
getOpcode(), AMDGPU::OpName::pred_sel),
189 OpJ =
TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
191 PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
195 for (
unsigned i = 0, e = SUJ->
Succs.size();
i < e; ++
i) {
209 TII->definesAddressRegister(*MII) ||
TII->definesAddressRegister(*MIJ);
211 TII->usesAddressRegister(*MII) ||
TII->usesAddressRegister(*MIJ);
213 return !ARDef || !ARUse;
218 bool isLegalToPruneDependencies(
SUnit *SUI,
SUnit *SUJ)
override {
223 unsigned LastOp =
TII->getOperandIdx(MI->
getOpcode(), AMDGPU::OpName::last);
229 std::vector<R600InstrInfo::BankSwizzle> &BS,
231 isTransSlot =
TII->isTransOnly(MI);
232 assert (!isTransSlot || VLIW5);
235 if (!isTransSlot && !CurrentPacketMIs.empty()) {
236 if (getSlot(MI) <= getSlot(*CurrentPacketMIs.back())) {
237 if (ConsideredInstUsesAlreadyWrittenVectorElement &&
238 !
TII->isVectorOnly(MI) && VLIW5) {
241 dbgs() <<
"Considering as Trans Inst :";
251 CurrentPacketMIs.push_back(&MI);
252 if (!
TII->fitsConstReadLimitations(CurrentPacketMIs)) {
254 dbgs() <<
"Couldn't pack :\n";
256 dbgs() <<
"with the following packets :\n";
257 for (
unsigned i = 0, e = CurrentPacketMIs.size() - 1;
i < e;
i++) {
258 CurrentPacketMIs[
i]->dump();
261 dbgs() <<
"because of Consts read limitations\n";
263 CurrentPacketMIs.pop_back();
268 if (!
TII->fitsReadPortLimitations(CurrentPacketMIs,
269 PV, BS, isTransSlot)) {
271 dbgs() <<
"Couldn't pack :\n";
273 dbgs() <<
"with the following packets :\n";
274 for (
unsigned i = 0, e = CurrentPacketMIs.size() - 1;
i < e;
i++) {
275 CurrentPacketMIs[
i]->dump();
278 dbgs() <<
"because of Read port limitations\n";
280 CurrentPacketMIs.pop_back();
285 if (isTransSlot &&
TII->readsLDSSrcReg(MI))
288 CurrentPacketMIs.pop_back();
294 CurrentPacketMIs.empty() ? &MI : CurrentPacketMIs.front();
296 getPreviousVector(FirstInBundle);
297 std::vector<R600InstrInfo::BankSwizzle> BS;
300 if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
301 for (
unsigned i = 0, e = CurrentPacketMIs.size();
i < e;
i++) {
304 AMDGPU::OpName::bank_swizzle);
308 TII->getOperandIdx(MI.
getOpcode(), AMDGPU::OpName::bank_swizzle);
310 if (!CurrentPacketMIs.empty())
311 setIsLastBit(CurrentPacketMIs.back(), 0);
312 substitutePV(MI, PV);
315 endPacket(std::next(It)->
getParent(), std::next(It));
320 if (
TII->isTransOnly(MI))
336 assert(Packetizer.getResourceTracker() &&
"Empty DFA table!");
338 if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty())
356 if (MI->isKill() || MI->getOpcode() == AMDGPU::IMPLICIT_DEF ||
357 (MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) {
372 unsigned RemainingCount =
MBB->
size();
378 for(;I !=
MBB->
begin(); --
I, --RemainingCount) {
379 if (TII->isSchedulingBoundary(*std::prev(I), &*
MBB, Fn))
385 if (I == RegionEnd) {
386 RegionEnd = std::prev(RegionEnd);
391 if (I == std::prev(RegionEnd)) {
392 RegionEnd = std::prev(RegionEnd);
396 Packetizer.PacketizeMIs(&*
MBB, &*I, RegionEnd);
408 return new R600Packetizer(tm);
bool hasCaymanISA() const
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
AMDGPU specific subclass of TargetSubtarget.
bool isSucc(SUnit *N)
isSucc - Test if node N is a successor of this node.
Interface definition for R600InstrInfo.
unsigned getHWRegChan(unsigned reg) const
get the HW encoding for a register's channel.
MachineInstr * getInstr() const
getInstr - Return the representative MachineInstr for this SUnit.
A register anti-dependedence (aka WAR).
AnalysisUsage & addRequired()
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
Kind getKind() const
getKind - Return an enum value representing the kind of the dependence.
const HexagonRegisterInfo & getRegisterInfo() const
HexagonInstrInfo specifics.
A register output-dependence (aka WAW).
virtual MachineBasicBlock::iterator addToPacket(MachineInstr &MI)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
SDep - Scheduling dependency.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineOperand & getOperand(unsigned i) const
FunctionPass * createR600Packetizer(TargetMachine &tm)
Represent the analysis usage information of a pass.
const R600InstrInfo * getInstrInfo() const override
static const unsigned End
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
static void write(bool isBE, void *P, T V)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isPredicated(const MachineInstr &MI) const override
Returns true if the instruction is already predicated.
Iterator for intrusive lists based on ilist_node.
instr_iterator getInstrIterator() const
void dump(const TargetInstrInfo *TII=nullptr) const
void setPreservesCFG()
This function should be called by the pass, iff they do not:
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Representation of each machine instruction.
void setReg(unsigned Reg)
Change the register this operand corresponds to.
iterator find(const KeyT &Val)
unsigned getReg() const
getReg - Returns the register number.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SmallVector< SDep, 4 > Succs
static const Function * getParent(const Value *V)
Primary interface to the complete machine description for the target machine.
StringRef - Represent a constant reference to a string, i.e.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
SUnit - Scheduling unit. This is a node in the scheduling DAG.