31 #define DEBUG_TYPE "packets"
50 const char *getPassName()
const override {
51 return "R600 Packetizer";
64 bool ConsideredInstUsesAlreadyWrittenVectorElement;
76 if (!
TII->isALUInstr(I->getOpcode()) && !I->isBundle())
84 int BISlot = getSlot(BI);
85 if (LastDstChan >= BISlot)
91 if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
93 int DstIdx =
TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst);
97 unsigned Dst = BI->getOperand(DstIdx).getReg();
98 if (isTrans ||
TII->isTransOnly(BI)) {
102 if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
103 BI->getOpcode() == AMDGPU::DOT4_eg) {
104 Result[Dst] = AMDGPU::PV_X;
107 if (Dst == AMDGPU::OQAP) {
111 switch (TRI.getHWRegChan(Dst)) {
113 PVReg = AMDGPU::PV_X;
116 PVReg = AMDGPU::PV_Y;
119 PVReg = AMDGPU::PV_Z;
122 PVReg = AMDGPU::PV_W;
128 }
while ((++BI)->isBundledWithPred());
135 AMDGPU::OpName::src0,
136 AMDGPU::OpName::src1,
139 for (
unsigned i = 0; i < 3; i++) {
140 int OperandIdx =
TII->getOperandIdx(MI->
getOpcode(), Ops[i]);
153 TII(static_cast<const R600InstrInfo *>(
160 void initPacketizerState()
override {
161 ConsideredInstUsesAlreadyWrittenVectorElement =
false;
173 if (
TII->isVector(*MI))
177 if (MI->
getOpcode() == AMDGPU::GROUP_BARRIER)
188 bool isLegalToPacketizeTogether(
SUnit *SUI,
SUnit *SUJ)
override {
190 if (getSlot(MII) == getSlot(MIJ))
191 ConsideredInstUsesAlreadyWrittenVectorElement =
true;
193 int OpI =
TII->getOperandIdx(MII->
getOpcode(), AMDGPU::OpName::pred_sel),
194 OpJ =
TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
196 PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
200 for (
unsigned i = 0, e = SUJ->
Succs.size(); i < e; ++i) {
213 bool ARDef =
TII->definesAddressRegister(MII) ||
214 TII->definesAddressRegister(MIJ);
215 bool ARUse =
TII->usesAddressRegister(MII) ||
216 TII->usesAddressRegister(MIJ);
225 bool isLegalToPruneDependencies(
SUnit *SUI,
SUnit *SUJ)
override {
230 unsigned LastOp =
TII->getOperandIdx(MI->
getOpcode(), AMDGPU::OpName::last);
236 std::vector<R600InstrInfo::BankSwizzle> &BS,
238 isTransSlot =
TII->isTransOnly(MI);
239 assert (!isTransSlot || VLIW5);
242 if (!isTransSlot && !CurrentPacketMIs.empty()) {
243 if (getSlot(MI) <= getSlot(CurrentPacketMIs.back())) {
244 if (ConsideredInstUsesAlreadyWrittenVectorElement &&
245 !
TII->isVectorOnly(MI) && VLIW5) {
255 CurrentPacketMIs.push_back(MI);
256 if (!
TII->fitsConstReadLimitations(CurrentPacketMIs)) {
258 dbgs() <<
"Couldn't pack :\n";
260 dbgs() <<
"with the following packets :\n";
261 for (
unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
262 CurrentPacketMIs[i]->dump();
265 dbgs() <<
"because of Consts read limitations\n";
267 CurrentPacketMIs.pop_back();
272 if (!
TII->fitsReadPortLimitations(CurrentPacketMIs,
273 PV, BS, isTransSlot)) {
275 dbgs() <<
"Couldn't pack :\n";
277 dbgs() <<
"with the following packets :\n";
278 for (
unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
279 CurrentPacketMIs[i]->dump();
282 dbgs() <<
"because of Read port limitations\n";
284 CurrentPacketMIs.pop_back();
289 if (isTransSlot &&
TII->readsLDSSrcReg(MI))
292 CurrentPacketMIs.pop_back();
298 CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front();
300 getPreviousVector(FirstInBundle);
301 std::vector<R600InstrInfo::BankSwizzle> BS;
304 if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
305 for (
unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
308 AMDGPU::OpName::bank_swizzle);
312 AMDGPU::OpName::bank_swizzle);
314 if (!CurrentPacketMIs.empty())
315 setIsLastBit(CurrentPacketMIs.back(), 0);
316 substitutePV(MI, PV);
319 endPacket(std::next(It)->
getParent(), std::next(It));
324 if (
TII->isTransOnly(MI))
338 assert(Packetizer.getResourceTracker() &&
"Empty DFA table!");
351 MBB != MBBe; ++MBB) {
356 (MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) {
359 MBB->erase(DeleteMI);
369 MBB != MBBe; ++MBB) {
371 unsigned RemainingCount = MBB->size();
373 RegionEnd != MBB->begin();) {
377 for(;I != MBB->begin(); --
I, --RemainingCount) {
384 if (I == RegionEnd) {
385 RegionEnd = std::prev(RegionEnd);
390 if (I == std::prev(RegionEnd)) {
391 RegionEnd = std::prev(RegionEnd);
395 Packetizer.PacketizeMIs(MBB, I, RegionEnd);
407 return new R600Packetizer(tm);
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
AMDGPU specific subclass of TargetSubtarget.
bool isSucc(SUnit *N)
isSucc - Test if node N is a successor of this node.
Interface definition for R600InstrInfo.
unsigned getHWRegChan(unsigned reg) const
get the HW encoding for a register's channel.
MachineInstr * getInstr() const
getInstr - Return the representative MachineInstr for this SUnit.
virtual bool isSchedulingBoundary(const MachineInstr *MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
Instructions::iterator instr_iterator
A register anti-dependedence (aka WAR).
AnalysisUsage & addRequired()
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
bool isPredicated(const MachineInstr *MI) const override
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const HexagonRegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
A register output-dependence (aka WAW).
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
TargetInstrInfo - Interface to description of machine instruction set.
SDep - Scheduling dependency.
IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef.
bundle_iterator< MachineInstr, instr_iterator > iterator
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineOperand & getOperand(unsigned i) const
FunctionPass * createR600Packetizer(TargetMachine &tm)
Represent the analysis usage information of a pass.
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void write(void *memory, value_type value)
Write a value to memory with a particular endianness.
Representation of each machine instruction.
void setReg(unsigned Reg)
Change the register this operand corresponds to.
iterator find(const KeyT &Val)
Kind getKind() const
getKind - Return an enum value representing the kind of the dependence.
unsigned getReg() const
getReg - Returns the register number.
virtual const TargetInstrInfo * getInstrInfo() const
SmallVector< SDep, 4 > Succs
static const Function * getParent(const Value *V)
BasicBlockListType::iterator iterator
Primary interface to the complete machine description for the target machine.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
SUnit - Scheduling unit. This is a node in the scheduling DAG.
virtual MachineBasicBlock::iterator addToPacket(MachineInstr *MI)