57 #define DEBUG_TYPE "vec-merger"
63 return (*It).isImplicitDef();
78 std::vector<unsigned> UndefReg;
82 for (
unsigned i = 1, e = Instr->getNumOperands();
i < e;
i+=2) {
84 unsigned Chan = Instr->getOperand(
i + 1).
getImm();
86 UndefReg.push_back(Chan);
88 RegToChan[MO.
getReg()] = Chan;
92 RegSeqInfo() =
default;
95 return RSI.Instr == Instr;
105 bool areAllUsesSwizzeable(
unsigned Reg)
const;
107 const std::vector<std::pair<unsigned, unsigned>> &RemapChan)
const;
108 bool tryMergeVector(
const RegSeqInfo *Untouched, RegSeqInfo *ToMerge,
109 std::vector<std::pair<unsigned, unsigned>> &Remap)
const;
110 bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
111 std::vector<std::pair<unsigned, unsigned>> &RemapChan);
112 bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
113 std::vector<std::pair<unsigned, unsigned>> &RemapChan);
114 MachineInstr *RebuildVector(RegSeqInfo *MI,
const RegSeqInfo *BaseVec,
115 const std::vector<std::pair<unsigned, unsigned>> &RemapChan)
const;
117 void trackRSI(
const RegSeqInfo &RSI);
121 InstructionSetMap PreviousRegSeqByReg;
122 InstructionSetMap PreviousRegSeqByUndefCount;
140 return "R600 Vector Registers Merge Pass";
150 bool R600VectorRegMerger::canSwizzle(
const MachineInstr &MI)
155 case AMDGPU::R600_ExportSwz:
156 case AMDGPU::EG_ExportSwz:
163 bool R600VectorRegMerger::tryMergeVector(
const RegSeqInfo *Untouched,
164 RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned>> &Remap)
166 unsigned CurrentUndexIdx = 0;
168 E = ToMerge->RegToChan.end(); It !=
E; ++It) {
170 Untouched->RegToChan.find((*It).first);
171 if (PosInUntouched != Untouched->RegToChan.end()) {
172 Remap.push_back(std::pair<unsigned, unsigned>
173 ((*It).second, (*PosInUntouched).second));
176 if (CurrentUndexIdx >= Untouched->UndefReg.size())
178 Remap.push_back(std::pair<unsigned, unsigned>
179 ((*It).second, Untouched->UndefReg[CurrentUndexIdx++]));
187 const std::vector<std::pair<unsigned, unsigned>> &RemapChan,
189 for (
unsigned j = 0, je = RemapChan.size(); j < je; j++) {
190 if (RemapChan[j].first == Chan)
191 return RemapChan[j].second;
197 RegSeqInfo *RSI,
const RegSeqInfo *BaseRSI,
198 const std::vector<std::pair<unsigned, unsigned>> &RemapChan)
const {
199 unsigned Reg = RSI->Instr->getOperand(0).getReg();
204 unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg();
206 std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg;
208 E = RSI->RegToChan.end(); It !=
E; ++It) {
210 unsigned SubReg = (*It).first;
211 unsigned Swizzle = (*It).second;
219 UpdatedRegToChan[
SubReg] = Chan;
220 std::vector<unsigned>::iterator ChanPos =
llvm::find(UpdatedUndef, Chan);
221 if (ChanPos != UpdatedUndef.end())
222 UpdatedUndef.erase(ChanPos);
224 "UpdatedUndef shouldn't contain Chan more than once!");
230 BuildMI(MBB, Pos, DL,
TII->get(AMDGPU::COPY),
Reg).addReg(SrcVec);
237 SwizzleInput(*It, RemapChan);
240 RSI->Instr->eraseFromParent();
244 RSI->RegToChan = UpdatedRegToChan;
245 RSI->UndefReg = UpdatedUndef;
251 for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(),
252 E = PreviousRegSeqByReg.end(); It !=
E; ++It) {
253 std::vector<MachineInstr *> &MIs = (*It).second;
256 for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(),
257 E = PreviousRegSeqByUndefCount.end(); It !=
E; ++It) {
258 std::vector<MachineInstr *> &MIs = (*It).second;
263 void R600VectorRegMerger::SwizzleInput(
MachineInstr &MI,
264 const std::vector<std::pair<unsigned, unsigned>> &RemapChan)
const {
266 if (
TII->get(MI.
getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
270 for (
unsigned i = 0;
i < 4;
i++) {
272 for (
unsigned j = 0, e = RemapChan.size(); j < e; j++) {
273 if (RemapChan[j].first == Swizzle) {
281 bool R600VectorRegMerger::areAllUsesSwizzeable(
unsigned Reg)
const {
284 if (!canSwizzle(*It))
290 bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI,
291 RegSeqInfo &CompatibleRSI,
292 std::vector<std::pair<unsigned, unsigned>> &RemapChan) {
294 MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) {
297 if (PreviousRegSeqByReg[MOp->getReg()].empty())
299 for (
MachineInstr *MI : PreviousRegSeqByReg[MOp->getReg()]) {
300 CompatibleRSI = PreviousRegSeq[
MI];
301 if (RSI == CompatibleRSI)
303 if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan))
310 bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI,
311 RegSeqInfo &CompatibleRSI,
312 std::vector<std::pair<unsigned, unsigned>> &RemapChan) {
313 unsigned NeededUndefs = 4 - RSI.UndefReg.size();
314 if (PreviousRegSeqByUndefCount[NeededUndefs].empty())
316 std::vector<MachineInstr *> &MIs =
317 PreviousRegSeqByUndefCount[NeededUndefs];
318 CompatibleRSI = PreviousRegSeq[MIs.back()];
319 tryMergeVector(&CompatibleRSI, &RSI, RemapChan);
323 void R600VectorRegMerger::trackRSI(
const RegSeqInfo &RSI) {
325 It = RSI.RegToChan.begin(),
E = RSI.RegToChan.end(); It !=
E; ++It) {
326 PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr);
328 PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr);
329 PreviousRegSeq[RSI.Instr] = RSI;
341 MBB != MBBe; ++
MBB) {
343 PreviousRegSeq.
clear();
344 PreviousRegSeqByReg.clear();
345 PreviousRegSeqByUndefCount.clear();
348 MII != MIIE; ++MII) {
350 if (MI.
getOpcode() != AMDGPU::REG_SEQUENCE) {
351 if (TII->get(MI.
getOpcode()).TSFlags & R600_InstFlag::TEX_INST) {
362 RegSeqInfo RSI(*MRI, &MI);
366 if (!areAllUsesSwizzeable(Reg))
370 dbgs() <<
"Trying to optimize ";
374 RegSeqInfo CandidateRSI;
375 std::vector<std::pair<unsigned, unsigned>> RemapChan;
376 DEBUG(
dbgs() <<
"Using common slots...\n";);
377 if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) {
379 RemoveMI(CandidateRSI.Instr);
380 MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
384 DEBUG(
dbgs() <<
"Using free slots...\n";);
386 if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) {
387 RemoveMI(CandidateRSI.Instr);
388 MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
400 return new R600VectorRegMerger(tm);
static bool isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg)
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
AMDGPU specific subclass of TargetSubtarget.
Interface definition for R600InstrInfo.
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
AnalysisUsage & addRequired()
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
use_instr_iterator use_instr_begin(unsigned RegNo) const
Reg
All possible values of the reg field in the ModR/M byte.
FunctionPass * createR600VectorRegMerger(TargetMachine &tm)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
static def_instr_iterator def_instr_end()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned const MachineRegisterInfo * MRI
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool isReserved(unsigned PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
const MachineOperand & getOperand(unsigned i) const
Represent the analysis usage information of a pass.
const R600InstrInfo * getInstrInfo() const override
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
static unsigned getReassignedChan(const std::vector< std::pair< unsigned, unsigned >> &RemapChan, unsigned Chan)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Iterator for intrusive lists based on ilist_node.
auto find(R &&Range, const T &Val) -> decltype(std::begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
MachineOperand class - Representation of each machine instruction operand.
void dump(const TargetInstrInfo *TII=nullptr) const
void setPreservesCFG()
This function should be called by the pass, iff they do not:
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
def_instr_iterator def_instr_begin(unsigned RegNo) const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
unsigned getReg() const
getReg - Returns the register number.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static use_instr_iterator use_instr_end()
Primary interface to the complete machine description for the target machine.
StringRef - Represent a constant reference to a string, i.e.
bool operator==(uint64_t V1, const APInt &V2)
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.