Line data Source code
1 : //===- ExecutionDepsFix.cpp - Fix execution dependecy issues ----*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 :
10 : #include "llvm/CodeGen/ExecutionDepsFix.h"
11 :
12 : #include "llvm/ADT/PostOrderIterator.h"
13 : #include "llvm/ADT/iterator_range.h"
14 : #include "llvm/CodeGen/LivePhysRegs.h"
15 : #include "llvm/CodeGen/MachineFunctionPass.h"
16 : #include "llvm/CodeGen/MachineRegisterInfo.h"
17 : #include "llvm/CodeGen/RegisterClassInfo.h"
18 : #include "llvm/Support/Allocator.h"
19 : #include "llvm/Support/Debug.h"
20 : #include "llvm/Support/raw_ostream.h"
21 : #include "llvm/Target/TargetInstrInfo.h"
22 : #include "llvm/Target/TargetSubtargetInfo.h"
23 :
24 : using namespace llvm;
25 :
26 : #define DEBUG_TYPE "execution-deps-fix"
27 :
28 : /// Translate TRI register number to a list of indices into our smaller tables
29 : /// of interesting registers.
30 : iterator_range<SmallVectorImpl<int>::const_iterator>
31 3717783 : ExecutionDepsFix::regIndices(unsigned Reg) const {
32 : assert(Reg < AliasMap.size() && "Invalid register");
33 7435566 : const auto &Entry = AliasMap[Reg];
34 14871132 : return make_range(Entry.begin(), Entry.end());
35 : }
36 :
37 414976 : DomainValue *ExecutionDepsFix::alloc(int domain) {
38 829952 : DomainValue *dv = Avail.empty() ?
39 517362 : new(Allocator.Allocate()) DomainValue :
40 657498 : Avail.pop_back_val();
41 414976 : if (domain >= 0)
42 321252 : dv->addDomain(domain);
43 : assert(dv->Refs == 0 && "Reference count wasn't cleared");
44 : assert(!dv->Next && "Chained DomainValue shouldn't have been recycled");
45 414976 : return dv;
46 : }
47 :
48 : /// Release a reference to DV. When the last reference is released,
49 : /// collapse if needed.
50 2116425 : void ExecutionDepsFix::release(DomainValue *DV) {
51 2946369 : while (DV) {
52 : assert(DV->Refs && "Bad DomainValue");
53 921366 : if (--DV->Refs)
54 : return;
55 :
56 : // There are no more DV references. Collapse any contained instructions.
57 829246 : if (DV->AvailableDomains && !DV->isCollapsed())
58 25018 : collapse(DV, DV->getFirstDomain());
59 :
60 414972 : DomainValue *Next = DV->Next;
61 829944 : DV->clear();
62 414972 : Avail.push_back(DV);
63 : // Also release the next DomainValue in the chain.
64 414972 : DV = Next;
65 : }
66 : }
67 :
68 : /// Follow the chain of dead DomainValues until a live DomainValue is reached.
69 : /// Update the referenced pointer when necessary.
70 7387520 : DomainValue *ExecutionDepsFix::resolve(DomainValue *&DVRef) {
71 7387520 : DomainValue *DV = DVRef;
72 7387520 : if (!DV || !DV->Next)
73 : return DV;
74 :
75 : // DV has a chain. Find the end.
76 86 : do DV = DV->Next;
77 86 : while (DV->Next);
78 :
79 : // Update DVRef to point to DV.
80 170 : retain(DV);
81 85 : release(DVRef);
82 85 : DVRef = DV;
83 85 : return DV;
84 : }
85 :
86 : /// Set LiveRegs[rx] = dv, updating reference counts.
87 920583 : void ExecutionDepsFix::setLiveReg(int rx, DomainValue *dv) {
88 : assert(unsigned(rx) < NumRegs && "Invalid index");
89 : assert(LiveRegs && "Must enter basic block first.");
90 :
91 920583 : if (LiveRegs[rx].Value == dv)
92 : return;
93 920583 : if (LiveRegs[rx].Value)
94 13597 : release(LiveRegs[rx].Value);
95 1841166 : LiveRegs[rx].Value = retain(dv);
96 : }
97 :
98 : // Kill register rx, recycle or collapse any DomainValue.
99 384560 : void ExecutionDepsFix::kill(int rx) {
100 : assert(unsigned(rx) < NumRegs && "Invalid index");
101 : assert(LiveRegs && "Must enter basic block first.");
102 384560 : if (!LiveRegs[rx].Value)
103 : return;
104 :
105 291668 : release(LiveRegs[rx].Value);
106 291668 : LiveRegs[rx].Value = nullptr;
107 : }
108 :
109 : /// Force register rx into domain.
110 720432 : void ExecutionDepsFix::force(int rx, unsigned domain) {
111 : assert(unsigned(rx) < NumRegs && "Invalid index");
112 : assert(LiveRegs && "Must enter basic block first.");
113 720432 : if (DomainValue *dv = LiveRegs[rx].Value) {
114 412026 : if (dv->isCollapsed())
115 331780 : dv->addDomain(domain);
116 160492 : else if (dv->hasDomain(domain))
117 80188 : collapse(dv, domain);
118 : else {
119 : // This is an incompatible open DomainValue. Collapse it to whatever and
120 : // force the new value into domain. This costs a domain crossing.
121 116 : collapse(dv, dv->getFirstDomain());
122 : assert(LiveRegs[rx].Value && "Not live after collapse?");
123 58 : LiveRegs[rx].Value->addDomain(domain);
124 : }
125 : } else {
126 : // Set up basic collapsed DomainValue.
127 308406 : setLiveReg(rx, alloc(domain));
128 : }
129 720432 : }
130 :
131 : /// Collapse open DomainValue into given domain. If there are multiple
132 : /// registers using dv, they each get a unique collapsed DomainValue.
133 93022 : void ExecutionDepsFix::collapse(DomainValue *dv, unsigned domain) {
134 : assert(dv->hasDomain(domain) && "Cannot collapse");
135 :
136 : // Collapse all the instructions.
137 372208 : while (!dv->Instrs.empty())
138 279186 : TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain);
139 186044 : dv->setSingleDomain(domain);
140 :
141 : // If there are multiple users, give them new, unique DomainValues.
142 93022 : if (LiveRegs && dv->Refs > 1)
143 410280 : for (unsigned rx = 0; rx != NumRegs; ++rx)
144 201984 : if (LiveRegs[rx].Value == dv)
145 12846 : setLiveReg(rx, alloc(domain));
146 93022 : }
147 :
148 : /// All instructions and registers in B are moved to A, and B is released.
149 4951 : bool ExecutionDepsFix::merge(DomainValue *A, DomainValue *B) {
150 : assert(!A->isCollapsed() && "Cannot merge into collapsed");
151 : assert(!B->isCollapsed() && "Cannot merge from collapsed");
152 4951 : if (A == B)
153 : return true;
154 : // Restrict to the domains that A and B have in common.
155 1396 : unsigned common = A->getCommonDomains(B->AvailableDomains);
156 698 : if (!common)
157 : return false;
158 698 : A->AvailableDomains = common;
159 2094 : A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
160 :
161 : // Clear the old DomainValue so we won't try to swizzle instructions twice.
162 698 : B->clear();
163 : // All uses of B are referred to A.
164 1396 : B->Next = retain(A);
165 :
166 23034 : for (unsigned rx = 0; rx != NumRegs; ++rx) {
167 : assert(LiveRegs && "no space allocated for live registers");
168 22336 : if (LiveRegs[rx].Value == B)
169 751 : setLiveReg(rx, A);
170 : }
171 : return true;
172 : }
173 :
174 : /// Set up LiveRegs by merging predecessor live-out values.
175 225958 : void ExecutionDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
176 : // Reset instruction counter in each basic block.
177 225958 : CurInstr = 0;
178 :
179 : // Set up UndefReads to track undefined register reads.
180 451916 : UndefReads.clear();
181 451916 : LiveRegSet.clear();
182 :
183 : // Set up LiveRegs to represent registers entering MBB.
184 225958 : if (!LiveRegs)
185 225958 : LiveRegs = new LiveReg[NumRegs];
186 :
187 : // Default values are 'nothing happened a long time ago'.
188 14687270 : for (unsigned rx = 0; rx != NumRegs; ++rx) {
189 7230656 : LiveRegs[rx].Value = nullptr;
190 7230656 : LiveRegs[rx].Def = -(1 << 20);
191 : }
192 :
193 : // This is the entry block.
194 225958 : if (MBB->pred_empty()) {
195 260922 : for (const auto &LI : MBB->liveins()) {
196 208840 : for (int rx : regIndices(LI.PhysReg)) {
197 : // Treat function live-ins as if they were defined just before the first
198 : // instruction. Usually, function arguments are set up immediately
199 : // before the call.
200 74294 : LiveRegs[rx].Def = -1;
201 : }
202 : }
203 : DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": entry\n");
204 : return;
205 : }
206 :
207 : // Try to coalesce live-out registers from predecessors.
208 : for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
209 396745 : pe = MBB->pred_end(); pi != pe; ++pi) {
210 233975 : auto fi = MBBInfos.find(*pi);
211 : assert(fi != MBBInfos.end() &&
212 : "Should have pre-allocated MBBInfos for all MBBs");
213 233975 : LiveReg *Incoming = fi->second.OutRegs;
214 : // Incoming is null if this is a backedge from a BB
215 : // we haven't processed yet
216 233975 : if (Incoming == nullptr) {
217 3115 : continue;
218 : }
219 :
220 15005900 : for (unsigned rx = 0; rx != NumRegs; ++rx) {
221 : // Use the most recent predecessor def for each register.
222 14775040 : LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, Incoming[rx].Def);
223 :
224 7387520 : DomainValue *pdv = resolve(Incoming[rx].Value);
225 7387520 : if (!pdv)
226 6679867 : continue;
227 1204592 : if (!LiveRegs[rx].Value) {
228 496939 : setLiveReg(rx, pdv);
229 496939 : continue;
230 : }
231 :
232 : // We have a live DomainValue from more than one predecessor.
233 421428 : if (LiveRegs[rx].Value->isCollapsed()) {
234 : // We are already collapsed, but predecessor is not. Force it.
235 412048 : unsigned Domain = LiveRegs[rx].Value->getFirstDomain();
236 206291 : if (!pdv->isCollapsed() && pdv->hasDomain(Domain))
237 267 : collapse(pdv, Domain);
238 206024 : continue;
239 : }
240 :
241 : // Currently open, merge in predecessor.
242 4690 : if (!pdv->isCollapsed())
243 4448 : merge(LiveRegs[rx].Value, pdv);
244 : else
245 484 : force(rx, pdv->getFirstDomain());
246 : }
247 : }
248 : DEBUG(
249 : dbgs() << "BB#" << MBB->getNumber()
250 : << (!isBlockDone(MBB) ? ": incomplete\n" : ": all preds known\n"));
251 : }
252 :
253 225958 : void ExecutionDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) {
254 : assert(LiveRegs && "Must enter basic block first.");
255 451916 : LiveReg *OldOutRegs = MBBInfos[MBB].OutRegs;
256 : // Save register clearances at end of MBB - used by enterBasicBlock().
257 451916 : MBBInfos[MBB].OutRegs = LiveRegs;
258 :
259 : // While processing the basic block, we kept `Def` relative to the start
260 : // of the basic block for convenience. However, future use of this information
261 : // only cares about the clearance from the end of the block, so adjust
262 : // everything to be relative to the end of the basic block.
263 7456614 : for (unsigned i = 0, e = NumRegs; i != e; ++i)
264 7230656 : LiveRegs[i].Def -= CurInstr;
265 225958 : if (OldOutRegs) {
266 : // This must be the second pass.
267 : // Release all the DomainValues instead of keeping them.
268 1384713 : for (unsigned i = 0, e = NumRegs; i != e; ++i)
269 1342752 : release(OldOutRegs[i].Value);
270 41961 : delete[] OldOutRegs;
271 : }
272 225958 : LiveRegs = nullptr;
273 225958 : }
274 :
275 2153733 : bool ExecutionDepsFix::visitInstr(MachineInstr *MI) {
276 : // Update instructions with explicit execution domains.
277 2153733 : std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI);
278 2153733 : if (DomP.first) {
279 458349 : if (DomP.second)
280 227984 : visitSoftInstr(MI, DomP.second);
281 : else
282 230365 : visitHardInstr(MI, DomP.first);
283 : }
284 :
285 2153733 : return !DomP.first;
286 : }
287 :
288 : /// \brief Helps avoid false dependencies on undef registers by updating the
289 : /// machine instructions' undef operand to use a register that the instruction
290 : /// is truly dependent on, or use a register with clearance higher than Pref.
291 : /// Returns true if it was able to find a true dependency, thus not requiring
292 : /// a dependency breaking instruction regardless of clearance.
293 1126 : bool ExecutionDepsFix::pickBestRegisterForUndef(MachineInstr *MI,
294 : unsigned OpIdx, unsigned Pref) {
295 2252 : MachineOperand &MO = MI->getOperand(OpIdx);
296 : assert(MO.isUndef() && "Expected undef machine operand");
297 :
298 1126 : unsigned OriginalReg = MO.getReg();
299 :
300 : // Update only undef operands that are mapped to one register.
301 3378 : if (AliasMap[OriginalReg].size() != 1)
302 : return false;
303 :
304 : // Get the undef operand's register class
305 : const TargetRegisterClass *OpRC =
306 1126 : TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF);
307 :
308 : // If the instruction has a true dependency, we can hide the false depdency
309 : // behind it.
310 4780 : for (MachineOperand &CurrMO : MI->operands()) {
311 14765 : if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() ||
312 2610 : !OpRC->contains(CurrMO.getReg()))
313 : continue;
314 : // We found a true dependency - replace the undef register with the true
315 : // dependency.
316 124 : MO.setReg(CurrMO.getReg());
317 124 : return true;
318 : }
319 :
320 : // Go over all registers in the register class and find the register with
321 : // max clearance or clearance higher than Pref.
322 1002 : unsigned MaxClearance = 0;
323 1002 : unsigned MaxClearanceReg = OriginalReg;
324 1002 : ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(OpRC);
325 5043 : for (auto Reg : Order) {
326 : assert(AliasMap[Reg].size() == 1 &&
327 : "Reg is expected to be mapped to a single index");
328 4031 : int RCrx = *regIndices(Reg).begin();
329 4031 : unsigned Clearance = CurInstr - LiveRegs[RCrx].Def;
330 4031 : if (Clearance <= MaxClearance)
331 1911 : continue;
332 2120 : MaxClearance = Clearance;
333 2120 : MaxClearanceReg = Reg;
334 :
335 2120 : if (MaxClearance > Pref)
336 : break;
337 : }
338 :
339 : // Update the operand if we found a register with better clearance.
340 1002 : if (MaxClearanceReg != OriginalReg)
341 887 : MO.setReg(MaxClearanceReg);
342 :
343 : return false;
344 : }
345 :
346 : /// \brief Return true to if it makes sense to break dependence on a partial def
347 : /// or undef use.
348 1473 : bool ExecutionDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
349 : unsigned Pref) {
350 2946 : unsigned reg = MI->getOperand(OpIdx).getReg();
351 1694 : for (int rx : regIndices(reg)) {
352 1483 : unsigned Clearance = CurInstr - LiveRegs[rx].Def;
353 : DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
354 :
355 1483 : if (Pref > Clearance) {
356 : DEBUG(dbgs() << ": Break dependency.\n");
357 : continue;
358 : }
359 : DEBUG(dbgs() << ": OK .\n");
360 : return false;
361 : }
362 : return true;
363 : }
364 :
365 : // Update def-ages for registers defined by MI.
366 : // If Kill is set, also kill off DomainValues clobbered by the defs.
367 : //
368 : // Also break dependencies on partial defs and undef uses.
369 2557084 : void ExecutionDepsFix::processDefs(MachineInstr *MI, bool breakDependency,
370 : bool Kill) {
371 : assert(!MI->isDebugValue() && "Won't process debug values");
372 :
373 : // Break dependence on undef uses. Do this before updating LiveRegs below.
374 : unsigned OpNum;
375 2557084 : if (breakDependency) {
376 2153734 : unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
377 2153734 : if (Pref) {
378 1126 : bool HadTrueDependency = pickBestRegisterForUndef(MI, OpNum, Pref);
379 : // We don't need to bother trying to break a dependency if this
380 : // instruction has a true dependency on that register through another
381 : // operand - we'll have to wait for it to be available regardless.
382 1126 : if (!HadTrueDependency && shouldBreakDependence(MI, OpNum, Pref))
383 30 : UndefReads.push_back(std::make_pair(MI, OpNum));
384 : }
385 : }
386 2557084 : const MCInstrDesc &MCID = MI->getDesc();
387 3935226 : for (unsigned i = 0,
388 7671252 : e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
389 3935226 : i != e; ++i) {
390 2756284 : MachineOperand &MO = MI->getOperand(i);
391 1378142 : if (!MO.isReg())
392 53711 : continue;
393 1324431 : if (MO.isUse())
394 84535 : continue;
395 1670102 : for (int rx : regIndices(MO.getReg())) {
396 : // This instruction explicitly defines rx.
397 : DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr
398 : << '\t' << *MI);
399 :
400 430206 : if (breakDependency) {
401 : // Check clearance before partial register updates.
402 : // Call breakDependence before setting LiveRegs[rx].Def.
403 380221 : unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI);
404 380221 : if (Pref && shouldBreakDependence(MI, i, Pref))
405 201 : TII->breakPartialRegDependency(*MI, i, TRI);
406 : }
407 :
408 : // How many instructions since rx was last written?
409 430206 : LiveRegs[rx].Def = CurInstr;
410 :
411 : // Kill off domains redefined by generic instructions.
412 430206 : if (Kill)
413 42948 : kill(rx);
414 : }
415 : }
416 2557084 : ++CurInstr;
417 2557084 : }
418 :
419 : /// \break Break false dependencies on undefined register reads.
420 : ///
421 : /// Walk the block backward computing precise liveness. This is expensive, so we
422 : /// only do it on demand. Note that the occurrence of undefined register reads
423 : /// that should be broken is very rare, but when they occur we may have many in
424 : /// a single block.
425 183997 : void ExecutionDepsFix::processUndefReads(MachineBasicBlock *MBB) {
426 367994 : if (UndefReads.empty())
427 : return;
428 :
429 : // Collect this block's live out register units.
430 20 : LiveRegSet.init(*TRI);
431 : // We do not need to care about pristine registers as they are just preserved
432 : // but not actually used in the function.
433 10 : LiveRegSet.addLiveOutsNoPristines(*MBB);
434 :
435 20 : MachineInstr *UndefMI = UndefReads.back().first;
436 20 : unsigned OpIdx = UndefReads.back().second;
437 :
438 188 : for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) {
439 : // Update liveness, including the current instruction's defs.
440 79 : LiveRegSet.stepBackward(I);
441 :
442 79 : if (UndefMI == &I) {
443 30 : if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg()))
444 9 : TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI);
445 :
446 10 : UndefReads.pop_back();
447 20 : if (UndefReads.empty())
448 10 : return;
449 :
450 0 : UndefMI = UndefReads.back().first;
451 0 : OpIdx = UndefReads.back().second;
452 : }
453 : }
454 : }
455 :
456 : // A hard instruction only works in one domain. All input registers will be
457 : // forced into that domain.
458 318752 : void ExecutionDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
459 : // Collapse all uses.
460 1678642 : for (unsigned i = mi->getDesc().getNumDefs(),
461 637504 : e = mi->getDesc().getNumOperands(); i != e; ++i) {
462 2082276 : MachineOperand &mo = mi->getOperand(i);
463 1041138 : if (!mo.isReg()) continue;
464 1270643 : for (int rx : regIndices(mo.getReg())) {
465 480219 : force(rx, domain);
466 : }
467 : }
468 :
469 : // Kill all defs and force them.
470 888240 : for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
471 501472 : MachineOperand &mo = mi->getOperand(i);
472 250736 : if (!mo.isReg()) continue;
473 490707 : for (int rx : regIndices(mo.getReg())) {
474 239971 : kill(rx);
475 239971 : force(rx, domain);
476 : }
477 : }
478 318752 : }
479 :
480 : // A soft instruction can be changed to work in other domains given by mask.
481 227984 : void ExecutionDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
482 : // Bitmask of available domains for this instruction after taking collapsed
483 : // operands into account.
484 227984 : unsigned available = mask;
485 :
486 : // Scan the explicit use operands for incoming domains.
487 367581 : SmallVector<int, 4> used;
488 227984 : if (LiveRegs)
489 1608192 : for (unsigned i = mi->getDesc().getNumDefs(),
490 455968 : e = mi->getDesc().getNumOperands(); i != e; ++i) {
491 2304448 : MachineOperand &mo = mi->getOperand(i);
492 1152224 : if (!mo.isReg()) continue;
493 919266 : for (int rx : regIndices(mo.getReg())) {
494 165017 : DomainValue *dv = LiveRegs[rx].Value;
495 165017 : if (dv == nullptr)
496 18339 : continue;
497 : // Bitmask of domains that dv and available have in common.
498 293356 : unsigned common = dv->getCommonDomains(available);
499 : // Is it possible to use this collapsed register for free?
500 146678 : if (dv->isCollapsed()) {
501 : // Restrict available domains to the ones in common with the operand.
502 : // If there are no common domains, we must pay the cross-domain
503 : // penalty for this operand.
504 98155 : if (common) available = common;
505 48523 : } else if (common)
506 : // Open DomainValue is compatible, save it for merging.
507 48523 : used.push_back(rx);
508 : else
509 : // Open DomainValue is not compatible with instruction. It is useless
510 : // now.
511 0 : kill(rx);
512 : }
513 : }
514 :
515 : // If the collapsed operands force a single domain, propagate the collapse.
516 227984 : if (isPowerOf2_32(available)) {
517 88387 : unsigned domain = countTrailingZeros(available);
518 88387 : TII->setExecutionDomain(*mi, domain);
519 88387 : visitHardInstr(mi, domain);
520 88387 : return;
521 : }
522 :
523 : // Kill off any remaining uses that don't match available, and build a list of
524 : // incoming DomainValues that we want to merge.
525 279194 : SmallVector<const LiveReg *, 4> Regs;
526 653048 : for (int rx : used) {
527 : assert(LiveRegs && "no space allocated for live registers");
528 47330 : const LiveReg &LR = LiveRegs[rx];
529 : // This useless DomainValue could have been missed above.
530 94660 : if (!LR.Value->getCommonDomains(available)) {
531 0 : kill(rx);
532 0 : continue;
533 : }
534 : // Sorted insertion.
535 141990 : auto I = std::upper_bound(Regs.begin(), Regs.end(), &LR,
536 : [](const LiveReg *LHS, const LiveReg *RHS) {
537 : return LHS->Def < RHS->Def;
538 94660 : });
539 47330 : Regs.insert(I, &LR);
540 : }
541 :
542 : // doms are now sorted in order of appearance. Try to merge them all, giving
543 : // priority to the latest ones.
544 : DomainValue *dv = nullptr;
545 186927 : while (!Regs.empty()) {
546 93203 : if (!dv) {
547 45873 : dv = Regs.pop_back_val()->Value;
548 : // Force the first dv to match the current instruction.
549 91746 : dv->AvailableDomains = dv->getCommonDomains(available);
550 : assert(dv->AvailableDomains && "Domain should have been filtered");
551 45873 : continue;
552 : }
553 :
554 1457 : DomainValue *Latest = Regs.pop_back_val()->Value;
555 : // Skip already merged values.
556 1457 : if (Latest == dv || Latest->Next)
557 954 : continue;
558 503 : if (merge(dv, Latest))
559 503 : continue;
560 :
561 : // If latest didn't merge, it is useless now. Kill all registers using it.
562 0 : for (int i : used) {
563 : assert(LiveRegs && "no space allocated for live registers");
564 0 : if (LiveRegs[i].Value == Latest)
565 0 : kill(i);
566 : }
567 : }
568 :
569 : // dv is the DomainValue we are going to use for this instruction.
570 139597 : if (!dv) {
571 93724 : dv = alloc();
572 93724 : dv->AvailableDomains = available;
573 : }
574 139597 : dv->Instrs.push_back(mi);
575 :
576 : // Finally set all defs and non-collapsed uses to dv. We must iterate through
577 : // all the operators, including imp-def ones.
578 938249 : for (MachineInstr::mop_iterator ii = mi->operands_begin(),
579 279194 : ee = mi->operands_end();
580 938249 : ii != ee; ++ii) {
581 798652 : MachineOperand &mo = *ii;
582 798652 : if (!mo.isReg()) continue;
583 704092 : for (int rx : regIndices(mo.getReg())) {
584 285331 : if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) {
585 101641 : kill(rx);
586 101641 : setLiveReg(rx, dv);
587 : }
588 : }
589 : }
590 : }
591 :
592 225958 : void ExecutionDepsFix::processBasicBlock(MachineBasicBlock *MBB,
593 : bool PrimaryPass) {
594 225958 : enterBasicBlock(MBB);
595 : // If this block is not done, it makes little sense to make any decisions
596 : // based on clearance information. We need to make a second pass anyway,
597 : // and by then we'll have better information, so we can avoid doing the work
598 : // to try and break dependencies now.
599 225958 : bool breakDependency = isBlockDone(MBB);
600 6049658 : for (MachineInstr &MI : *MBB) {
601 2572913 : if (!MI.isDebugValue()) {
602 2557084 : bool Kill = false;
603 2557084 : if (PrimaryPass)
604 2153733 : Kill = visitInstr(&MI);
605 2557084 : processDefs(&MI, breakDependency, Kill);
606 : }
607 : }
608 225958 : if (breakDependency)
609 183997 : processUndefReads(MBB);
610 225958 : leaveBasicBlock(MBB);
611 225958 : }
612 :
613 1101770 : bool ExecutionDepsFix::isBlockDone(MachineBasicBlock *MBB) {
614 2959862 : return MBBInfos[MBB].PrimaryCompleted &&
615 3218593 : MBBInfos[MBB].IncomingCompleted == MBBInfos[MBB].PrimaryIncoming &&
616 2914307 : MBBInfos[MBB].IncomingProcessed == MBB->pred_size();
617 : }
618 :
619 79719 : bool ExecutionDepsFix::runOnMachineFunction(MachineFunction &mf) {
620 79719 : if (skipFunction(*mf.getFunction()))
621 : return false;
622 79667 : MF = &mf;
623 79667 : TII = MF->getSubtarget().getInstrInfo();
624 79667 : TRI = MF->getSubtarget().getRegisterInfo();
625 79667 : RegClassInfo.runOnMachineFunction(mf);
626 79667 : LiveRegs = nullptr;
627 : assert(NumRegs == RC->getNumRegs() && "Bad regclass");
628 :
629 : DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: "
630 : << TRI->getRegClassName(RC) << " **********\n");
631 :
632 : // If no relevant registers are used in the function, we can skip it
633 : // completely.
634 79667 : bool anyregs = false;
635 79667 : const MachineRegisterInfo &MRI = mf.getRegInfo();
636 798494 : for (unsigned Reg : *RC) {
637 622681 : if (MRI.isPhysRegUsed(Reg)) {
638 : anyregs = true;
639 : break;
640 : }
641 : }
642 79667 : if (!anyregs) return false;
643 :
644 : // Initialize the AliasMap on the first use.
645 126376 : if (AliasMap.empty()) {
646 : // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and
647 : // therefore the LiveRegs array.
648 5067 : AliasMap.resize(TRI->getNumRegs());
649 172278 : for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
650 1881928 : for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true);
651 3277424 : AI.isValid(); ++AI)
652 3115280 : AliasMap[*AI].push_back(i);
653 : }
654 :
655 : // Initialize the MMBInfos
656 373561 : for (auto &MBB : mf) {
657 183997 : MBBInfo InitialInfo;
658 551991 : MBBInfos.insert(std::make_pair(&MBB, InitialInfo));
659 : }
660 :
661 : /*
662 : * We want to visit every instruction in every basic block in order to update
663 : * it's execution domain or break any false dependencies. However, for the
664 : * dependency breaking, we need to know clearances from all predecessors
665 : * (including any backedges). One way to do so would be to do two complete
666 : * passes over all basic blocks/instructions, the first for recording
667 : * clearances, the second to break the dependencies. However, for functions
668 : * without backedges, or functions with a lot of straight-line code, and
669 : * a small loop, that would be a lot of unnecessary work (since only the
670 : * BBs that are part of the loop require two passes). As an example,
671 : * consider the following loop.
672 : *
673 : *
674 : * PH -> A -> B (xmm<Undef> -> xmm<Def>) -> C -> D -> EXIT
675 : * ^ |
676 : * +----------------------------------+
677 : *
678 : * The iteration order is as follows:
679 : * Naive: PH A B C D A' B' C' D'
680 : * Optimized: PH A B C A' B' C' D
681 : *
682 : * Note that we avoid processing D twice, because we can entirely process
683 : * the predecessors before getting to D. We call a block that is ready
684 : * for its second round of processing `done` (isBlockDone). Once we finish
685 : * processing some block, we update the counters in MBBInfos and re-process
686 : * any successors that are now done.
687 : */
688 :
689 189564 : MachineBasicBlock *Entry = &*MF->begin();
690 63188 : ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry);
691 126376 : SmallVector<MachineBasicBlock *, 4> Workqueue;
692 : for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
693 310373 : MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
694 183997 : MachineBasicBlock *MBB = *MBBI;
695 : // N.B: IncomingProcessed and IncomingCompleted were already updated while
696 : // processing this block's predecessors.
697 367994 : MBBInfos[MBB].PrimaryCompleted = true;
698 551991 : MBBInfos[MBB].PrimaryIncoming = MBBInfos[MBB].IncomingProcessed;
699 183997 : bool Primary = true;
700 183997 : Workqueue.push_back(MBB);
701 635913 : while (!Workqueue.empty()) {
702 451916 : MachineBasicBlock *ActiveMBB = &*Workqueue.back();
703 225958 : Workqueue.pop_back();
704 225958 : processBasicBlock(ActiveMBB, Primary);
705 225958 : bool Done = isBlockDone(ActiveMBB);
706 686402 : for (auto *Succ : ActiveMBB->successors()) {
707 234486 : if (!isBlockDone(Succ)) {
708 231371 : if (Primary) {
709 344306 : MBBInfos[Succ].IncomingProcessed++;
710 : }
711 231371 : if (Done) {
712 338076 : MBBInfos[Succ].IncomingCompleted++;
713 : }
714 231371 : if (isBlockDone(Succ)) {
715 41961 : Workqueue.push_back(Succ);
716 : }
717 : }
718 : }
719 225958 : Primary = false;
720 : }
721 : }
722 :
723 : // We need to go through again and finalize any blocks that are not done yet.
724 : // This is possible if blocks have dead predecessors, so we didn't visit them
725 : // above.
726 : for (ReversePostOrderTraversal<MachineBasicBlock *>::rpo_iterator
727 63188 : MBBI = RPOT.begin(),
728 : MBBE = RPOT.end();
729 247185 : MBBI != MBBE; ++MBBI) {
730 183997 : MachineBasicBlock *MBB = *MBBI;
731 183997 : if (!isBlockDone(MBB)) {
732 0 : processBasicBlock(MBB, false);
733 : // Don't update successors here. We'll get to them anyway through this
734 : // loop.
735 : }
736 : }
737 :
738 : // Clear the LiveOuts vectors and collapse any remaining DomainValues.
739 : for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
740 310373 : MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
741 367994 : auto FI = MBBInfos.find(*MBBI);
742 551991 : if (FI == MBBInfos.end() || !FI->second.OutRegs)
743 0 : continue;
744 6071901 : for (unsigned i = 0, e = NumRegs; i != e; ++i)
745 5887904 : if (FI->second.OutRegs[i].Value)
746 468323 : release(FI->second.OutRegs[i].Value);
747 183997 : delete[] FI->second.OutRegs;
748 : }
749 63188 : MBBInfos.clear();
750 126376 : UndefReads.clear();
751 126376 : Avail.clear();
752 63188 : Allocator.DestroyAll();
753 :
754 63188 : return false;
755 : }
|