Line data Source code
1 : //===- CriticalAntiDepBreaker.cpp - Anti-dep breaker ----------------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This file implements the CriticalAntiDepBreaker class, which
11 : // implements register anti-dependence breaking along a blocks
12 : // critical path during post-RA scheduler.
13 : //
14 : //===----------------------------------------------------------------------===//
15 :
16 : #include "CriticalAntiDepBreaker.h"
17 : #include "llvm/ADT/ArrayRef.h"
18 : #include "llvm/ADT/BitVector.h"
19 : #include "llvm/ADT/DenseMap.h"
20 : #include "llvm/ADT/SmallVector.h"
21 : #include "llvm/CodeGen/MachineBasicBlock.h"
22 : #include "llvm/CodeGen/MachineFrameInfo.h"
23 : #include "llvm/CodeGen/MachineFunction.h"
24 : #include "llvm/CodeGen/MachineInstr.h"
25 : #include "llvm/CodeGen/MachineOperand.h"
26 : #include "llvm/CodeGen/MachineRegisterInfo.h"
27 : #include "llvm/CodeGen/RegisterClassInfo.h"
28 : #include "llvm/CodeGen/ScheduleDAG.h"
29 : #include "llvm/CodeGen/TargetInstrInfo.h"
30 : #include "llvm/CodeGen/TargetRegisterInfo.h"
31 : #include "llvm/CodeGen/TargetSubtargetInfo.h"
32 : #include "llvm/MC/MCInstrDesc.h"
33 : #include "llvm/MC/MCRegisterInfo.h"
34 : #include "llvm/Support/Debug.h"
35 : #include "llvm/Support/raw_ostream.h"
36 : #include <cassert>
37 : #include <map>
38 : #include <utility>
39 : #include <vector>
40 :
41 : using namespace llvm;
42 :
43 : #define DEBUG_TYPE "post-RA-sched"
44 :
45 6728 : CriticalAntiDepBreaker::CriticalAntiDepBreaker(MachineFunction &MFi,
46 6728 : const RegisterClassInfo &RCI)
47 6728 : : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()),
48 6728 : TII(MF.getSubtarget().getInstrInfo()),
49 6728 : TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI),
50 13456 : Classes(TRI->getNumRegs(), nullptr), KillIndices(TRI->getNumRegs(), 0),
51 6728 : DefIndices(TRI->getNumRegs(), 0), KeepRegs(TRI->getNumRegs(), false) {}
52 :
53 : CriticalAntiDepBreaker::~CriticalAntiDepBreaker() = default;
54 :
55 91738 : void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
56 : const unsigned BBSize = BB->size();
57 25503164 : for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
58 : // Clear out the register class data.
59 25411426 : Classes[i] = nullptr;
60 :
61 : // Initialize the indices to indicate that no registers are live.
62 25411426 : KillIndices[i] = ~0u;
63 50822852 : DefIndices[i] = BBSize;
64 : }
65 :
66 : // Clear "do not change" set.
67 : KeepRegs.reset();
68 :
69 91738 : bool IsReturnBlock = BB->isReturnBlock();
70 :
71 : // Examine the live-in regs of all successors.
72 : for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
73 209161 : SE = BB->succ_end(); SI != SE; ++SI)
74 594621 : for (const auto &LI : (*SI)->liveins()) {
75 4179716 : for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) {
76 : unsigned Reg = *AI;
77 3819941 : Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
78 3819941 : KillIndices[Reg] = BBSize;
79 7639882 : DefIndices[Reg] = ~0u;
80 : }
81 : }
82 :
83 : // Mark live-out callee-saved registers. In a return block this is
84 : // all callee-saved registers. In non-return this is any
85 : // callee-saved register that is not saved in the prolog.
86 91738 : const MachineFrameInfo &MFI = MF.getFrameInfo();
87 91738 : BitVector Pristine = MFI.getPristineRegs(MF);
88 466230 : for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I;
89 : ++I) {
90 374492 : unsigned Reg = *I;
91 374492 : if (!IsReturnBlock && !Pristine.test(Reg))
92 : continue;
93 524560 : for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
94 : unsigned Reg = *AI;
95 480840 : Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
96 480840 : KillIndices[Reg] = BBSize;
97 961680 : DefIndices[Reg] = ~0u;
98 : }
99 : }
100 91738 : }
101 :
102 91738 : void CriticalAntiDepBreaker::FinishBlock() {
103 : RegRefs.clear();
104 : KeepRegs.reset();
105 91738 : }
106 :
107 231885 : void CriticalAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count,
108 : unsigned InsertPosIndex) {
109 : // Kill instructions can define registers but are really nops, and there might
110 : // be a real definition earlier that needs to be paired with uses dominated by
111 : // this kill.
112 :
113 : // FIXME: It may be possible to remove the isKill() restriction once PR18663
114 : // has been properly fixed. There can be value in processing kills as seen in
115 : // the AggressiveAntiDepBreaker class.
116 231885 : if (MI.isDebugInstr() || MI.isKill())
117 : return;
118 : assert(Count < InsertPosIndex && "Instruction index out of expected range!");
119 :
120 64464030 : for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
121 128464290 : if (KillIndices[Reg] != ~0u) {
122 : // If Reg is currently live, then mark that it can't be renamed as
123 : // we don't know the extent of its live-range anymore (now that it
124 : // has been scheduled).
125 5357385 : Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
126 5357385 : KillIndices[Reg] = Count;
127 117749520 : } else if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
128 : // Any register which was defined within the previous scheduling region
129 : // may have been rescheduled and its lifetime may overlap with registers
130 : // in ways not reflected in our current liveness state. For each such
131 : // register, adjust the liveness state to be conservatively correct.
132 222292 : Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
133 :
134 : // Move the def index to the end of the previous region, to reflect
135 : // that the def could theoretically have been scheduled at the end.
136 222292 : DefIndices[Reg] = InsertPosIndex;
137 : }
138 : }
139 :
140 231885 : PrescanInstruction(MI);
141 231885 : ScanInstruction(MI, Count);
142 : }
143 :
144 : /// CriticalPathStep - Return the next SUnit after SU on the bottom-up
145 : /// critical path.
146 351054 : static const SDep *CriticalPathStep(const SUnit *SU) {
147 : const SDep *Next = nullptr;
148 : unsigned NextDepth = 0;
149 : // Find the predecessor edge with the greatest depth.
150 734897 : for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
151 1085951 : P != PE; ++P) {
152 : const SUnit *PredSU = P->getSUnit();
153 734897 : unsigned PredLatency = P->getLatency();
154 734897 : unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
155 : // In the case of a latency tie, prefer an anti-dependency edge over
156 : // other types of edges.
157 734897 : if (NextDepth < PredTotalLatency ||
158 121328 : (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
159 : NextDepth = PredTotalLatency;
160 : Next = &*P;
161 : }
162 : }
163 351054 : return Next;
164 : }
165 :
166 826414 : void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
167 : // It's not safe to change register allocation for source operands of
168 : // instructions that have special allocation requirements. Also assume all
169 : // registers used in a call must not be changed (ABI).
170 : // FIXME: The issue with predicated instruction is more complex. We are being
171 : // conservative here because the kill markers cannot be trusted after
172 : // if-conversion:
173 : // %r6 = LDR %sp, %reg0, 92, 14, %reg0; mem:LD4[FixedStack14]
174 : // ...
175 : // STR %r0, killed %r6, %reg0, 0, 0, %cpsr; mem:ST4[%395]
176 : // %r6 = LDR %sp, %reg0, 100, 0, %cpsr; mem:LD4[FixedStack12]
177 : // STR %r0, killed %r6, %reg0, 0, 14, %reg0; mem:ST4[%396](align=8)
178 : //
179 : // The first R6 kill is not really a kill since it's killed by a predicated
180 : // instruction which may not be executed. The second R6 def may or may not
181 : // re-define R6 so it's not safe to change it since the last R6 use cannot be
182 : // changed.
183 : bool Special =
184 1618256 : MI.isCall() || MI.hasExtraSrcRegAllocReq() || TII->isPredicated(MI);
185 :
186 : // Scan the register operands for this instruction and update
187 : // Classes and RegRefs.
188 4957213 : for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
189 4130799 : MachineOperand &MO = MI.getOperand(i);
190 4130799 : if (!MO.isReg()) continue;
191 2670460 : unsigned Reg = MO.getReg();
192 2670460 : if (Reg == 0) continue;
193 : const TargetRegisterClass *NewRC = nullptr;
194 :
195 3385828 : if (i < MI.getDesc().getNumOperands())
196 1085770 : NewRC = TII->getRegClass(MI.getDesc(), i, TRI, MF);
197 :
198 : // For now, only allow the register to be changed if its register
199 : // class is consistent across all uses.
200 3385828 : if (!Classes[Reg] && NewRC)
201 177960 : Classes[Reg] = NewRC;
202 1514954 : else if (!NewRC || Classes[Reg] != NewRC)
203 1262897 : Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
204 :
205 : // Now check for aliases.
206 9848876 : for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) {
207 : // If an alias of the reg is used during the live range, give up.
208 : // Note that this allows us to skip checking if AntiDepReg
209 : // overlaps with any of the aliases, among other things.
210 : unsigned AliasReg = *AI;
211 16311924 : if (Classes[AliasReg]) {
212 5119591 : Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
213 5119591 : Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
214 : }
215 : }
216 :
217 : // If we're still willing to consider this register, note the reference.
218 3385828 : if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
219 396548 : RegRefs.insert(std::make_pair(Reg, &MO));
220 :
221 : // If this reg is tied and live (Classes[Reg] is set to -1), we can't change
222 : // it or any of its sub or super regs. We need to use KeepRegs to mark the
223 : // reg because not all uses of the same reg within an instruction are
224 : // necessarily tagged as tied.
225 : // Example: an x86 "xor %eax, %eax" will have one source operand tied to the
226 : // def register but not the second (see PR20020 for details).
227 : // FIXME: can this check be relaxed to account for undef uses
228 : // of a register? In the above 'xor' example, the uses of %eax are undef, so
229 : // earlier instructions could still replace %eax even though the 'xor'
230 : // itself can't be changed.
231 1692914 : if (MI.isRegTiedToUseOperand(i) &&
232 187764 : Classes[Reg] == reinterpret_cast<TargetRegisterClass *>(-1)) {
233 33525 : for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
234 140255 : SubRegs.isValid(); ++SubRegs) {
235 : KeepRegs.set(*SubRegs);
236 : }
237 33525 : for (MCSuperRegIterator SuperRegs(Reg, TRI);
238 82272 : SuperRegs.isValid(); ++SuperRegs) {
239 : KeepRegs.set(*SuperRegs);
240 : }
241 : }
242 :
243 1692914 : if (MO.isUse() && Special) {
244 107156 : if (!KeepRegs.test(Reg)) {
245 100435 : for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
246 464375 : SubRegs.isValid(); ++SubRegs)
247 : KeepRegs.set(*SubRegs);
248 : }
249 : }
250 : }
251 826414 : }
252 :
253 826414 : void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
254 : // Update liveness.
255 : // Proceeding upwards, registers that are defed but not used in this
256 : // instruction are now dead.
257 : assert(!MI.isKill() && "Attempting to scan a kill instruction");
258 :
259 826414 : if (!TII->isPredicated(MI)) {
260 : // Predicated defs are modeled as read + write, i.e. similar to two
261 : // address updates.
262 4957213 : for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
263 4130799 : MachineOperand &MO = MI.getOperand(i);
264 :
265 4130799 : if (MO.isRegMask())
266 9610460 : for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
267 9575890 : if (MO.clobbersPhysReg(i)) {
268 8883440 : DefIndices[i] = Count;
269 8883440 : KillIndices[i] = ~0u;
270 8883440 : KeepRegs.reset(i);
271 17766880 : Classes[i] = nullptr;
272 : RegRefs.erase(i);
273 : }
274 :
275 4130799 : if (!MO.isReg()) continue;
276 2670460 : unsigned Reg = MO.getReg();
277 2670460 : if (Reg == 0) continue;
278 1692914 : if (!MO.isDef()) continue;
279 :
280 : // Ignore two-addr defs.
281 607271 : if (MI.isRegTiedToUseOperand(i))
282 : continue;
283 :
284 : // If we've already marked this reg as unchangeable, don't remove
285 : // it or any of its subregs from KeepRegs.
286 : bool Keep = KeepRegs.test(Reg);
287 :
288 : // For the reg itself and all subregs: update the def to current;
289 : // reset the kill state, any restrictions, and references.
290 2286445 : for (MCSubRegIterator SRI(Reg, TRI, true); SRI.isValid(); ++SRI) {
291 1259667 : unsigned SubregReg = *SRI;
292 1259667 : DefIndices[SubregReg] = Count;
293 1259667 : KillIndices[SubregReg] = ~0u;
294 2519334 : Classes[SubregReg] = nullptr;
295 : RegRefs.erase(SubregReg);
296 1259667 : if (!Keep)
297 1186184 : KeepRegs.reset(SubregReg);
298 : }
299 : // Conservatively mark super-registers as unusable.
300 1320617 : for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR)
301 587678 : Classes[*SR] = reinterpret_cast<TargetRegisterClass *>(-1);
302 : }
303 : }
304 4957213 : for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
305 4130799 : MachineOperand &MO = MI.getOperand(i);
306 4130799 : if (!MO.isReg()) continue;
307 2670460 : unsigned Reg = MO.getReg();
308 2670460 : if (Reg == 0) continue;
309 1692914 : if (!MO.isUse()) continue;
310 :
311 : const TargetRegisterClass *NewRC = nullptr;
312 2171286 : if (i < MI.getDesc().getNumOperands())
313 825218 : NewRC = TII->getRegClass(MI.getDesc(), i, TRI, MF);
314 :
315 : // For now, only allow the register to be changed if its register
316 : // class is consistent across all uses.
317 2171286 : if (!Classes[Reg] && NewRC)
318 8399 : Classes[Reg] = NewRC;
319 1077244 : else if (!NewRC || Classes[Reg] != NewRC)
320 801545 : Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
321 :
322 1085643 : RegRefs.insert(std::make_pair(Reg, &MO));
323 :
324 : // It wasn't previously live but now it is, this is a kill.
325 : // Repeat for all aliases.
326 9826992 : for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
327 : unsigned AliasReg = *AI;
328 17482698 : if (KillIndices[AliasReg] == ~0u) {
329 1457268 : KillIndices[AliasReg] = Count;
330 2914536 : DefIndices[AliasReg] = ~0u;
331 : }
332 : }
333 : }
334 826414 : }
335 :
336 : // Check all machine operands that reference the antidependent register and must
337 : // be replaced by NewReg. Return true if any of their parent instructions may
338 : // clobber the new register.
339 : //
340 : // Note: AntiDepReg may be referenced by a two-address instruction such that
341 : // it's use operand is tied to a def operand. We guard against the case in which
342 : // the two-address instruction also defines NewReg, as may happen with
343 : // pre/postincrement loads. In this case, both the use and def operands are in
344 : // RegRefs because the def is inserted by PrescanInstruction and not erased
345 : // during ScanInstruction. So checking for an instruction with definitions of
346 : // both NewReg and AntiDepReg covers it.
347 : bool
348 33725 : CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
349 : RegRefIter RegRefEnd,
350 : unsigned NewReg) {
351 218000 : for (RegRefIter I = RegRefBegin; I != RegRefEnd; ++I ) {
352 184275 : MachineOperand *RefOper = I->second;
353 :
354 : // Don't allow the instruction defining AntiDepReg to earlyclobber its
355 : // operands, in case they may be assigned to NewReg. In this case antidep
356 : // breaking must fail, but it's too rare to bother optimizing.
357 184275 : if (RefOper->isDef() && RefOper->isEarlyClobber())
358 : return true;
359 :
360 : // Handle cases in which this instruction defines NewReg.
361 184275 : MachineInstr *MI = RefOper->getParent();
362 1046833 : for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
363 862558 : const MachineOperand &CheckOper = MI->getOperand(i);
364 :
365 862558 : if (CheckOper.isRegMask() && CheckOper.clobbersPhysReg(NewReg))
366 : return true;
367 :
368 862558 : if (!CheckOper.isReg() || !CheckOper.isDef() ||
369 119091 : CheckOper.getReg() != NewReg)
370 : continue;
371 :
372 : // Don't allow the instruction to define NewReg and AntiDepReg.
373 : // When AntiDepReg is renamed it will be an illegal op.
374 752 : if (RefOper->isDef())
375 : return true;
376 :
377 : // Don't allow an instruction using AntiDepReg to be earlyclobbered by
378 : // NewReg.
379 752 : if (CheckOper.isEarlyClobber())
380 : return true;
381 :
382 : // Don't allow inline asm to define NewReg at all. Who knows what it's
383 : // doing with it.
384 752 : if (MI->isInlineAsm())
385 : return true;
386 : }
387 : }
388 : return false;
389 : }
390 :
391 18418 : unsigned CriticalAntiDepBreaker::
392 : findSuitableFreeRegister(RegRefIter RegRefBegin,
393 : RegRefIter RegRefEnd,
394 : unsigned AntiDepReg,
395 : unsigned LastNewReg,
396 : const TargetRegisterClass *RC,
397 : SmallVectorImpl<unsigned> &Forbid) {
398 18418 : ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC);
399 58636 : for (unsigned i = 0; i != Order.size(); ++i) {
400 58421 : unsigned NewReg = Order[i];
401 : // Don't replace a register with itself.
402 58421 : if (NewReg == AntiDepReg) continue;
403 : // Don't replace a register with one that was recently used to repair
404 : // an anti-dependence with this AntiDepReg, because that would
405 : // re-introduce that anti-dependence.
406 40006 : if (NewReg == LastNewReg) continue;
407 : // If any instructions that define AntiDepReg also define the NewReg, it's
408 : // not suitable. For example, Instruction with multiple definitions can
409 : // result in this condition.
410 33725 : if (isNewRegClobberedByRefs(RegRefBegin, RegRefEnd, NewReg)) continue;
411 : // If NewReg is dead and NewReg's most recent def is not before
412 : // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
413 : assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u))
414 : && "Kill and Def maps aren't consistent for AntiDepReg!");
415 : assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u))
416 : && "Kill and Def maps aren't consistent for NewReg!");
417 33725 : if (KillIndices[NewReg] != ~0u ||
418 33725 : Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) ||
419 55398 : KillIndices[AntiDepReg] > DefIndices[NewReg])
420 : continue;
421 : // If NewReg overlaps any of the forbidden registers, we can't use it.
422 : bool Forbidden = false;
423 0 : for (SmallVectorImpl<unsigned>::iterator it = Forbid.begin(),
424 18203 : ite = Forbid.end(); it != ite; ++it)
425 0 : if (TRI->regsOverlap(NewReg, *it)) {
426 : Forbidden = true;
427 : break;
428 : }
429 18203 : if (Forbidden) continue;
430 : return NewReg;
431 : }
432 :
433 : // No registers are free and available!
434 : return 0;
435 : }
436 :
437 323623 : unsigned CriticalAntiDepBreaker::
438 : BreakAntiDependencies(const std::vector<SUnit> &SUnits,
439 : MachineBasicBlock::iterator Begin,
440 : MachineBasicBlock::iterator End,
441 : unsigned InsertPosIndex,
442 : DbgValueVector &DbgValues) {
443 : // The code below assumes that there is at least one instruction,
444 : // so just duck out immediately if the block is empty.
445 323623 : if (SUnits.empty()) return 0;
446 :
447 : // Keep a map of the MachineInstr*'s back to the SUnit representing them.
448 : // This is used for updating debug information.
449 : //
450 : // FIXME: Replace this with the existing map in ScheduleDAGInstrs::MISUnitMap
451 : DenseMap<MachineInstr *, const SUnit *> MISUnitMap;
452 :
453 : // Find the node at the bottom of the critical path.
454 : const SUnit *Max = nullptr;
455 827488 : for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
456 594996 : const SUnit *SU = &SUnits[i];
457 594996 : MISUnitMap[SU->getInstr()] = SU;
458 1096843 : if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
459 : Max = SU;
460 : }
461 :
462 : #ifndef NDEBUG
463 : {
464 : LLVM_DEBUG(dbgs() << "Critical path has total latency "
465 : << (Max->getDepth() + Max->Latency) << "\n");
466 : LLVM_DEBUG(dbgs() << "Available regs:");
467 : for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
468 : if (KillIndices[Reg] == ~0u)
469 : LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI));
470 : }
471 : LLVM_DEBUG(dbgs() << '\n');
472 : }
473 : #endif
474 :
475 : // Track progress along the critical path through the SUnit graph as we walk
476 : // the instructions.
477 : const SUnit *CriticalPathSU = Max;
478 116246 : MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();
479 :
480 : // Consider this pattern:
481 : // A = ...
482 : // ... = A
483 : // A = ...
484 : // ... = A
485 : // A = ...
486 : // ... = A
487 : // A = ...
488 : // ... = A
489 : // There are three anti-dependencies here, and without special care,
490 : // we'd break all of them using the same register:
491 : // A = ...
492 : // ... = A
493 : // B = ...
494 : // ... = B
495 : // B = ...
496 : // ... = B
497 : // B = ...
498 : // ... = B
499 : // because at each anti-dependence, B is the first register that
500 : // isn't A which is free. This re-introduces anti-dependencies
501 : // at all but one of the original anti-dependencies that we were
502 : // trying to break. To avoid this, keep track of the most recent
503 : // register that each register was replaced with, avoid
504 : // using it to repair an anti-dependence on the same register.
505 : // This lets us produce this:
506 : // A = ...
507 : // ... = A
508 : // B = ...
509 : // ... = B
510 : // C = ...
511 : // ... = C
512 : // B = ...
513 : // ... = B
514 : // This still has an anti-dependence on B, but at least it isn't on the
515 : // original critical path.
516 : //
517 : // TODO: If we tracked more than one register here, we could potentially
518 : // fix that remaining critical edge too. This is a little more involved,
519 : // because unlike the most recent register, less recent registers should
520 : // still be considered, though only if no other registers are available.
521 116246 : std::vector<unsigned> LastNewReg(TRI->getNumRegs(), 0);
522 :
523 : // Attempt to break anti-dependence edges on the critical path. Walk the
524 : // instructions from the bottom up, tracking information about liveness
525 : // as we go to help determine which registers are available.
526 : unsigned Broken = 0;
527 116246 : unsigned Count = InsertPosIndex - 1;
528 711304 : for (MachineBasicBlock::iterator I = End, E = Begin; I != E; --Count) {
529 : MachineInstr &MI = *--I;
530 : // Kill instructions can define registers but are really nops, and there
531 : // might be a real definition earlier that needs to be paired with uses
532 : // dominated by this kill.
533 :
534 : // FIXME: It may be possible to remove the isKill() restriction once PR18663
535 : // has been properly fixed. There can be value in processing kills as seen
536 : // in the AggressiveAntiDepBreaker class.
537 594996 : if (MI.isDebugInstr() || MI.isKill())
538 529 : continue;
539 :
540 : // Check if this instruction has a dependence on the critical path that
541 : // is an anti-dependence that we may be able to break. If it is, set
542 : // AntiDepReg to the non-zero register associated with the anti-dependence.
543 : //
544 : // We limit our attention to the critical path as a heuristic to avoid
545 : // breaking anti-dependence edges that aren't going to significantly
546 : // impact the overall schedule. There are a limited number of registers
547 : // and we want to save them for the important edges.
548 : //
549 : // TODO: Instructions with multiple defs could have multiple
550 : // anti-dependencies. The current code here only knows how to break one
551 : // edge per instruction. Note that we'd have to be able to break all of
552 : // the anti-dependencies in an instruction in order to be effective.
553 594529 : unsigned AntiDepReg = 0;
554 594529 : if (&MI == CriticalPathMI) {
555 351054 : if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) {
556 : const SUnit *NextSU = Edge->getSUnit();
557 :
558 : // Only consider anti-dependence edges.
559 234888 : if (Edge->getKind() == SDep::Anti) {
560 55613 : AntiDepReg = Edge->getReg();
561 : assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
562 55613 : if (!MRI.isAllocatable(AntiDepReg))
563 : // Don't break anti-dependencies on non-allocatable registers.
564 32714 : AntiDepReg = 0;
565 45798 : else if (KeepRegs.test(AntiDepReg))
566 : // Don't break anti-dependencies if a use down below requires
567 : // this exact register.
568 1901 : AntiDepReg = 0;
569 : else {
570 : // If the SUnit has other dependencies on the SUnit that it
571 : // anti-depends on, don't bother breaking the anti-dependency
572 : // since those edges would prevent such units from being
573 : // scheduled past each other regardless.
574 : //
575 : // Also, if there are dependencies on other SUnits with the
576 : // same register as the anti-dependency, don't attempt to
577 : // break it.
578 45648 : for (SUnit::const_pred_iterator P = CriticalPathSU->Preds.begin(),
579 66646 : PE = CriticalPathSU->Preds.end(); P != PE; ++P)
580 45855 : if (P->getSUnit() == NextSU ?
581 20998 : (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
582 24857 : (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
583 207 : AntiDepReg = 0;
584 207 : break;
585 : }
586 : }
587 : }
588 : CriticalPathSU = NextSU;
589 234888 : CriticalPathMI = CriticalPathSU->getInstr();
590 : } else {
591 : // We've reached the end of the critical path.
592 : CriticalPathSU = nullptr;
593 : CriticalPathMI = nullptr;
594 : }
595 : }
596 :
597 594529 : PrescanInstruction(MI);
598 :
599 : SmallVector<unsigned, 2> ForbidRegs;
600 :
601 : // If MI's defs have a special allocation requirement, don't allow
602 : // any def registers to be changed. Also assume all registers
603 : // defined in a call must not be changed (ABI).
604 1189058 : if (MI.isCall() || MI.hasExtraDefRegAllocReq() || TII->isPredicated(MI))
605 : // If this instruction's defs have special allocation requirement, don't
606 : // break this anti-dependency.
607 0 : AntiDepReg = 0;
608 594529 : else if (AntiDepReg) {
609 : // If this instruction has a use of AntiDepReg, breaking it
610 : // is invalid. If the instruction defines other registers,
611 : // save a list of them so that we don't pick a new register
612 : // that overlaps any of them.
613 144287 : for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
614 123672 : MachineOperand &MO = MI.getOperand(i);
615 164706 : if (!MO.isReg()) continue;
616 82607 : unsigned Reg = MO.getReg();
617 82607 : if (Reg == 0) continue;
618 41573 : if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) {
619 176 : AntiDepReg = 0;
620 176 : break;
621 : }
622 41397 : if (MO.isDef() && Reg != AntiDepReg)
623 2 : ForbidRegs.push_back(Reg);
624 : }
625 : }
626 :
627 : // Determine AntiDepReg's register class, if it is live and is
628 : // consistently used within a single class.
629 594529 : const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg]
630 : : nullptr;
631 : assert((AntiDepReg == 0 || RC != nullptr) &&
632 : "Register should be live if it's causing an anti-dependence!");
633 20615 : if (RC == reinterpret_cast<TargetRegisterClass *>(-1))
634 2197 : AntiDepReg = 0;
635 :
636 : // Look for a suitable register to use to break the anti-dependence.
637 : //
638 : // TODO: Instead of picking the first free register, consider which might
639 : // be the best.
640 594529 : if (AntiDepReg != 0) {
641 : std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
642 : std::multimap<unsigned, MachineOperand *>::iterator>
643 : Range = RegRefs.equal_range(AntiDepReg);
644 36836 : if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second,
645 : AntiDepReg,
646 18418 : LastNewReg[AntiDepReg],
647 : RC, ForbidRegs)) {
648 : LLVM_DEBUG(dbgs() << "Breaking anti-dependence edge on "
649 : << printReg(AntiDepReg, TRI) << " with "
650 : << RegRefs.count(AntiDepReg) << " references"
651 : << " using " << printReg(NewReg, TRI) << "!\n");
652 :
653 : // Update the references to the old register to refer to the new
654 : // register.
655 : for (std::multimap<unsigned, MachineOperand *>::iterator
656 113078 : Q = Range.first, QE = Range.second; Q != QE; ++Q) {
657 94875 : Q->second->setReg(NewReg);
658 : // If the SU for the instruction being updated has debug information
659 : // related to the anti-dependency register, make sure to update that
660 : // as well.
661 94875 : const SUnit *SU = MISUnitMap[Q->second->getParent()];
662 94875 : if (!SU) continue;
663 94875 : UpdateDbgValues(DbgValues, Q->second->getParent(),
664 : AntiDepReg, NewReg);
665 : }
666 :
667 : // We just went back in time and modified history; the
668 : // liveness information for the anti-dependence reg is now
669 : // inconsistent. Set the state as if it were dead.
670 36406 : Classes[NewReg] = Classes[AntiDepReg];
671 18203 : DefIndices[NewReg] = DefIndices[AntiDepReg];
672 18203 : KillIndices[NewReg] = KillIndices[AntiDepReg];
673 : assert(((KillIndices[NewReg] == ~0u) !=
674 : (DefIndices[NewReg] == ~0u)) &&
675 : "Kill and Def maps aren't consistent for NewReg!");
676 :
677 36406 : Classes[AntiDepReg] = nullptr;
678 18203 : DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
679 36406 : KillIndices[AntiDepReg] = ~0u;
680 : assert(((KillIndices[AntiDepReg] == ~0u) !=
681 : (DefIndices[AntiDepReg] == ~0u)) &&
682 : "Kill and Def maps aren't consistent for AntiDepReg!");
683 :
684 : RegRefs.erase(AntiDepReg);
685 18203 : LastNewReg[AntiDepReg] = NewReg;
686 18203 : ++Broken;
687 : }
688 : }
689 :
690 594529 : ScanInstruction(MI, Count);
691 : }
692 :
693 : return Broken;
694 : }
|