Line data Source code
1 : //===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This implements the ScheduleDAG class, which is a base class used by
11 : // scheduling implementation classes.
12 : //
13 : //===----------------------------------------------------------------------===//
14 :
15 : #include "ScheduleDAGSDNodes.h"
16 : #include "InstrEmitter.h"
17 : #include "SDNodeDbgValue.h"
18 : #include "llvm/ADT/DenseMap.h"
19 : #include "llvm/ADT/SmallPtrSet.h"
20 : #include "llvm/ADT/SmallSet.h"
21 : #include "llvm/ADT/SmallVector.h"
22 : #include "llvm/ADT/Statistic.h"
23 : #include "llvm/CodeGen/MachineInstrBuilder.h"
24 : #include "llvm/CodeGen/MachineRegisterInfo.h"
25 : #include "llvm/CodeGen/SelectionDAG.h"
26 : #include "llvm/CodeGen/TargetInstrInfo.h"
27 : #include "llvm/CodeGen/TargetLowering.h"
28 : #include "llvm/CodeGen/TargetRegisterInfo.h"
29 : #include "llvm/CodeGen/TargetSubtargetInfo.h"
30 : #include "llvm/Config/llvm-config.h"
31 : #include "llvm/MC/MCInstrItineraries.h"
32 : #include "llvm/Support/CommandLine.h"
33 : #include "llvm/Support/Debug.h"
34 : #include "llvm/Support/raw_ostream.h"
35 : using namespace llvm;
36 :
37 : #define DEBUG_TYPE "pre-RA-sched"
38 :
39 : STATISTIC(LoadsClustered, "Number of loads clustered together");
40 :
41 : // This allows the latency-based scheduler to notice high latency instructions
42 : // without a target itinerary. The choice of number here has more to do with
43 : // balancing scheduler heuristics than with the actual machine latency.
44 : static cl::opt<int> HighLatencyCycles(
45 : "sched-high-latency-cycles", cl::Hidden, cl::init(10),
46 : cl::desc("Roughly estimate the number of cycles that 'long latency'"
47 : "instructions take for targets with no itinerary"));
48 :
49 1269050 : ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
50 : : ScheduleDAG(mf), BB(nullptr), DAG(nullptr),
51 1269050 : InstrItins(mf.getSubtarget().getInstrItineraryData()) {}
52 :
53 : /// Run - perform scheduling.
54 : ///
55 1269050 : void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) {
56 1269050 : BB = bb;
57 1269050 : DAG = dag;
58 :
59 : // Clear the scheduler's SUnit DAG.
60 1269050 : ScheduleDAG::clearDAG();
61 : Sequence.clear();
62 :
63 : // Invoke the target's selection of scheduler.
64 1269049 : Schedule();
65 1269049 : }
66 :
67 : /// NewSUnit - Creates a new SUnit and return a ptr to it.
68 : ///
69 16310483 : SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) {
70 : #ifndef NDEBUG
71 : const SUnit *Addr = nullptr;
72 : if (!SUnits.empty())
73 : Addr = &SUnits[0];
74 : #endif
75 32620966 : SUnits.emplace_back(N, (unsigned)SUnits.size());
76 : assert((Addr == nullptr || Addr == &SUnits[0]) &&
77 : "SUnits std::vector reallocated on the fly!");
78 16310482 : SUnits.back().OrigNode = &SUnits.back();
79 : SUnit *SU = &SUnits.back();
80 16310482 : const TargetLowering &TLI = DAG->getTargetLoweringInfo();
81 16310482 : if (!N ||
82 16310352 : (N->isMachineOpcode() &&
83 : N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF))
84 27296 : SU->SchedulingPref = Sched::None;
85 : else
86 16283186 : SU->SchedulingPref = TLI.getSchedulingPreference(N);
87 16310483 : return SU;
88 : }
89 :
90 275 : SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
91 275 : SUnit *SU = newSUnit(Old->getNode());
92 275 : SU->OrigNode = Old->OrigNode;
93 275 : SU->Latency = Old->Latency;
94 275 : SU->isVRegCycle = Old->isVRegCycle;
95 275 : SU->isCall = Old->isCall;
96 275 : SU->isCallOp = Old->isCallOp;
97 275 : SU->isTwoAddress = Old->isTwoAddress;
98 275 : SU->isCommutable = Old->isCommutable;
99 275 : SU->hasPhysRegDefs = Old->hasPhysRegDefs;
100 275 : SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
101 275 : SU->isScheduleHigh = Old->isScheduleHigh;
102 275 : SU->isScheduleLow = Old->isScheduleLow;
103 275 : SU->SchedulingPref = Old->SchedulingPref;
104 275 : Old->isCloned = true;
105 275 : return SU;
106 : }
107 :
108 : /// CheckForPhysRegDependency - Check if the dependency between def and use of
109 : /// a specified operand is a physical register dependency. If so, returns the
110 : /// register and the cost of copying the register.
111 20096981 : static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
112 : const TargetRegisterInfo *TRI,
113 : const TargetInstrInfo *TII,
114 : unsigned &PhysReg, int &Cost) {
115 20096981 : if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
116 : return;
117 :
118 7228106 : unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
119 3614053 : if (TargetRegisterInfo::isVirtualRegister(Reg))
120 : return;
121 :
122 2424896 : unsigned ResNo = User->getOperand(2).getResNo();
123 2424896 : if (Def->getOpcode() == ISD::CopyFromReg &&
124 553490 : cast<RegisterSDNode>(Def->getOperand(1))->getReg() == Reg) {
125 10522 : PhysReg = Reg;
126 2414374 : } else if (Def->isMachineOpcode()) {
127 2148151 : const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
128 4296302 : if (ResNo >= II.getNumDefs() &&
129 221295 : II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg)
130 221240 : PhysReg = Reg;
131 : }
132 :
133 2424896 : if (PhysReg != 0) {
134 : const TargetRegisterClass *RC =
135 231762 : TRI->getMinimalPhysRegClass(Reg, Def->getSimpleValueType(ResNo));
136 463524 : Cost = RC->getCopyCost();
137 : }
138 : }
139 :
140 : // Helper for AddGlue to clone node operands.
141 176291 : static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, ArrayRef<EVT> VTs,
142 : SDValue ExtraOper = SDValue()) {
143 176291 : SmallVector<SDValue, 8> Ops(N->op_begin(), N->op_end());
144 176291 : if (ExtraOper.getNode())
145 78317 : Ops.push_back(ExtraOper);
146 :
147 176291 : SDVTList VTList = DAG->getVTList(VTs);
148 : MachineSDNode *MN = dyn_cast<MachineSDNode>(N);
149 :
150 : // Store memory references.
151 : SmallVector<MachineMemOperand *, 2> MMOs;
152 176291 : if (MN)
153 : MMOs.assign(MN->memoperands_begin(), MN->memoperands_end());
154 :
155 352582 : DAG->MorphNodeTo(N, N->getOpcode(), VTList, Ops);
156 :
157 : // Reset the memory references
158 176291 : if (MN)
159 176291 : DAG->setNodeMemRefs(MN, MMOs);
160 176291 : }
161 :
162 221999 : static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
163 221999 : SDNode *GlueDestNode = Glue.getNode();
164 :
165 : // Don't add glue from a node to itself.
166 221999 : if (GlueDestNode == N) return false;
167 :
168 : // Don't add a glue operand to something that already uses glue.
169 221992 : if (GlueDestNode &&
170 235038 : N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
171 6 : return false;
172 : }
173 : // Don't add glue to something that already has a glue value.
174 443972 : if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false;
175 :
176 : SmallVector<EVT, 4> VTs(N->value_begin(), N->value_end());
177 176264 : if (AddGlue)
178 78344 : VTs.push_back(MVT::Glue);
179 :
180 176264 : CloneNodeWithValues(N, DAG, VTs, Glue);
181 :
182 : return true;
183 : }
184 :
185 : // Cleanup after unsuccessful AddGlue. Use the standard method of morphing the
186 : // node even though simply shrinking the value list is sufficient.
187 27 : static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) {
188 : assert((N->getValueType(N->getNumValues() - 1) == MVT::Glue &&
189 : !N->hasAnyUseOfValue(N->getNumValues() - 1)) &&
190 : "expected an unused glue value");
191 :
192 54 : CloneNodeWithValues(N, DAG,
193 54 : makeArrayRef(N->value_begin(), N->getNumValues() - 1));
194 27 : }
195 :
196 : /// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
197 : /// This function finds loads of the same base and different offsets. If the
198 : /// offsets are not far apart (target specific), it add MVT::Glue inputs and
199 : /// outputs to ensure they are scheduled together and in order. This
200 : /// optimization may benefit some targets by improving cache locality.
201 2552990 : void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
202 : SDNode *Chain = nullptr;
203 2552990 : unsigned NumOps = Node->getNumOperands();
204 5105980 : if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
205 : Chain = Node->getOperand(NumOps-1).getNode();
206 2552990 : if (!Chain)
207 2455043 : return;
208 :
209 : // Look for other loads of the same chain. Find loads that are loading from
210 : // the same base pointer and different offsets.
211 : SmallPtrSet<SDNode*, 16> Visited;
212 : SmallVector<int64_t, 4> Offsets;
213 : DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode.
214 : bool Cluster = false;
215 : SDNode *Base = Node;
216 : // This algorithm requires a reasonably low use count before finding a match
217 : // to avoid uselessly blowing up compile time in large blocks.
218 : unsigned UseCount = 0;
219 9738473 : for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
220 9738473 : I != E && UseCount < 100; ++I, ++UseCount) {
221 : SDNode *User = *I;
222 7367209 : if (User == Node || !Visited.insert(User).second)
223 6958524 : continue;
224 : int64_t Offset1, Offset2;
225 4949799 : if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
226 408911 : Offset1 == Offset2)
227 : // FIXME: Should be ok if they addresses are identical. But earlier
228 : // optimizations really should have eliminated one of the loads.
229 : continue;
230 408685 : if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
231 115693 : Offsets.push_back(Offset1);
232 408685 : O2SMap.insert(std::make_pair(Offset2, User));
233 408685 : Offsets.push_back(Offset2);
234 408685 : if (Offset2 < Offset1)
235 : Base = User;
236 : Cluster = true;
237 : // Reset UseCount to allow more matches.
238 : UseCount = 0;
239 : }
240 :
241 2371264 : if (!Cluster)
242 : return;
243 :
244 : // Sort them in increasing order.
245 : llvm::sort(Offsets);
246 :
247 : // Check if the loads are close enough.
248 : SmallVector<SDNode*, 4> Loads;
249 : unsigned NumLoads = 0;
250 115693 : int64_t BaseOff = Offsets[0];
251 115693 : SDNode *BaseLoad = O2SMap[BaseOff];
252 115693 : Loads.push_back(BaseLoad);
253 239745 : for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
254 147807 : int64_t Offset = Offsets[i];
255 147807 : SDNode *Load = O2SMap[Offset];
256 147807 : if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads))
257 : break; // Stop right here. Ignore loads that are further away.
258 124052 : Loads.push_back(Load);
259 124052 : ++NumLoads;
260 : }
261 :
262 115693 : if (NumLoads == 0)
263 : return;
264 :
265 : // Cluster loads by adding MVT::Glue outputs and inputs. This also
266 : // ensure they are scheduled in order of increasing addresses.
267 97947 : SDNode *Lead = Loads[0];
268 : SDValue InGlue = SDValue(nullptr, 0);
269 97947 : if (AddGlue(Lead, InGlue, true, DAG))
270 143232 : InGlue = SDValue(Lead, Lead->getNumValues() - 1);
271 221999 : for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
272 124052 : bool OutGlue = I < E - 1;
273 124052 : SDNode *Load = Loads[I];
274 :
275 : // If AddGlue fails, we could leave an unsused glue value. This should not
276 : // cause any
277 124052 : if (AddGlue(Load, InGlue, OutGlue, DAG)) {
278 104648 : if (OutGlue)
279 13456 : InGlue = SDValue(Load, Load->getNumValues() - 1);
280 :
281 : ++LoadsClustered;
282 : }
283 19404 : else if (!OutGlue && InGlue.getNode())
284 27 : RemoveUnusedGlue(InGlue.getNode(), DAG);
285 : }
286 : }
287 :
288 : /// ClusterNodes - Cluster certain nodes which should be scheduled together.
289 : ///
290 1269036 : void ScheduleDAGSDNodes::ClusterNodes() {
291 39244619 : for (SDNode &NI : DAG->allnodes()) {
292 : SDNode *Node = &NI;
293 37975583 : if (!Node || !Node->isMachineOpcode())
294 : continue;
295 :
296 : unsigned Opc = Node->getMachineOpcode();
297 12152512 : const MCInstrDesc &MCID = TII->get(Opc);
298 24305024 : if (MCID.mayLoad())
299 : // Cluster loads from "near" addresses into combined SUnits.
300 2552990 : ClusterNeighboringLoads(Node);
301 : }
302 1269036 : }
303 :
304 1269036 : void ScheduleDAGSDNodes::BuildSchedUnits() {
305 : // During scheduling, the NodeId field of SDNode is used to map SDNodes
306 : // to their associated SUnits by holding SUnits table indices. A value
307 : // of -1 means the SDNode does not yet have an associated SUnit.
308 : unsigned NumNodes = 0;
309 39244616 : for (SDNode &NI : DAG->allnodes()) {
310 : NI.setNodeId(-1);
311 37975580 : ++NumNodes;
312 : }
313 :
314 : // Reserve entries in the vector for each of the SUnits we are creating. This
315 : // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
316 : // invalidated.
317 : // FIXME: Multiply by 2 because we may clone nodes during scheduling.
318 : // This is a temporary workaround.
319 1269036 : SUnits.reserve(NumNodes * 2);
320 :
321 : // Add all nodes in depth first order.
322 : SmallVector<SDNode*, 64> Worklist;
323 : SmallPtrSet<SDNode*, 32> Visited;
324 1269036 : Worklist.push_back(DAG->getRoot().getNode());
325 1269036 : Visited.insert(DAG->getRoot().getNode());
326 :
327 : SmallVector<SUnit*, 8> CallSUnits;
328 39193291 : while (!Worklist.empty()) {
329 : SDNode *NI = Worklist.pop_back_val();
330 :
331 : // Add all operands to the worklist unless they've already been added.
332 116971593 : for (const SDValue &Op : NI->op_values())
333 79047338 : if (Visited.insert(Op.getNode()).second)
334 36655217 : Worklist.push_back(Op.getNode());
335 :
336 37924255 : if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
337 21614447 : continue;
338 :
339 : // If this node has already been processed, stop now.
340 20309250 : if (NI->getNodeId() != -1) continue;
341 :
342 16309808 : SUnit *NodeSUnit = newSUnit(NI);
343 :
344 : // See if anything is glued to this node, if so, add them to glued
345 : // nodes. Nodes can have at most one glue input and one glue output. Glue
346 : // is required to be the last operand and result of a node.
347 :
348 : // Scan up to find glued preds.
349 : SDNode *N = NI;
350 40558888 : while (N->getNumOperands() &&
351 40020930 : N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
352 : N = N->getOperand(N->getNumOperands()-1).getNode();
353 : assert(N->getNodeId() == -1 && "Node already inserted!");
354 3969636 : N->setNodeId(NodeSUnit->NodeNum);
355 3969636 : if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
356 998936 : NodeSUnit->isCall = true;
357 : }
358 :
359 : // Scan down to find any glued succs.
360 : N = NI;
361 32649422 : while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
362 : SDValue GlueVal(N, N->getNumValues()-1);
363 :
364 : // There are either zero or one users of the Glue result.
365 : bool HasGlueUse = false;
366 2057697 : for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
367 4676727 : UI != E; ++UI)
368 2648836 : if (GlueVal.isOperandOf(*UI)) {
369 : HasGlueUse = true;
370 : assert(N->getNodeId() == -1 && "Node already inserted!");
371 29806 : N->setNodeId(NodeSUnit->NodeNum);
372 : N = *UI;
373 29806 : if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
374 0 : NodeSUnit->isCall = true;
375 : break;
376 : }
377 2057697 : if (!HasGlueUse) break;
378 : }
379 :
380 16309808 : if (NodeSUnit->isCall)
381 998936 : CallSUnits.push_back(NodeSUnit);
382 :
383 : // Schedule zero-latency TokenFactor below any nodes that may increase the
384 : // schedule height. Otherwise, ancestors of the TokenFactor may appear to
385 : // have false stalls.
386 16309808 : if (NI->getOpcode() == ISD::TokenFactor)
387 1411320 : NodeSUnit->isScheduleLow = true;
388 :
389 : // If there are glue operands involved, N is now the bottom-most node
390 : // of the sequence of nodes that are glued together.
391 : // Update the SUnit.
392 16309808 : NodeSUnit->setNode(N);
393 : assert(N->getNodeId() == -1 && "Node already inserted!");
394 16309808 : N->setNodeId(NodeSUnit->NodeNum);
395 :
396 : // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
397 16309808 : InitNumRegDefsLeft(NodeSUnit);
398 :
399 : // Assign the Latency field of NodeSUnit using target-provided information.
400 16309808 : computeLatency(NodeSUnit);
401 : }
402 :
403 : // Find all call operands.
404 2267972 : while (!CallSUnits.empty()) {
405 : SUnit *SU = CallSUnits.pop_back_val();
406 4473494 : for (const SDNode *SUNode = SU->getNode(); SUNode;
407 : SUNode = SUNode->getGluedNode()) {
408 4473494 : if (SUNode->getOpcode() != ISD::CopyToReg)
409 : continue;
410 2064825 : SDNode *SrcN = SUNode->getOperand(2).getNode();
411 2064825 : if (isPassiveNode(SrcN)) continue; // Not scheduled.
412 2017735 : SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
413 2017735 : SrcSU->isCallOp = true;
414 : }
415 : }
416 1269036 : }
417 :
418 1269036 : void ScheduleDAGSDNodes::AddSchedEdges() {
419 1269036 : const TargetSubtargetInfo &ST = MF.getSubtarget();
420 :
421 : // Check to see if the scheduler cares about latencies.
422 1269036 : bool UnitLatencies = forceUnitLatencies();
423 :
424 : // Pass 2: add the preds, succs, etc.
425 18847880 : for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
426 16309808 : SUnit *SU = &SUnits[su];
427 16309808 : SDNode *MainNode = SU->getNode();
428 :
429 16309808 : if (MainNode->isMachineOpcode()) {
430 : unsigned Opc = MainNode->getMachineOpcode();
431 10642256 : const MCInstrDesc &MCID = TII->get(Opc);
432 66027336 : for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
433 : if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
434 514774 : SU->isTwoAddress = true;
435 514774 : break;
436 : }
437 : }
438 21284512 : if (MCID.isCommutable())
439 207919 : SU->isCommutable = true;
440 : }
441 :
442 : // Find all predecessors and successors of the group.
443 20309249 : for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
444 20309249 : if (N->isMachineOpcode() &&
445 36443178 : TII->get(N->getMachineOpcode()).getImplicitDefs()) {
446 2910989 : SU->hasPhysRegClobbers = true;
447 2910989 : unsigned NumUsed = InstrEmitter::CountResults(N);
448 5545188 : while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
449 : --NumUsed; // Skip over unused values at the end.
450 11643952 : if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
451 219158 : SU->hasPhysRegDefs = true;
452 : }
453 :
454 99356584 : for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
455 79047335 : SDNode *OpN = N->getOperand(i).getNode();
456 86900156 : if (isPassiveNode(OpN)) continue; // Not scheduled.
457 27949802 : SUnit *OpSU = &SUnits[OpN->getNodeId()];
458 : assert(OpSU && "Node has no SUnit!");
459 27949802 : if (OpSU == SU) continue; // In the same group.
460 :
461 20096981 : EVT OpVT = N->getOperand(i).getValueType();
462 : assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
463 : bool isChain = OpVT == MVT::Other;
464 :
465 20096981 : unsigned PhysReg = 0;
466 20096981 : int Cost = 1;
467 : // Determine if this is a physical register dependency.
468 20096981 : CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
469 : assert((PhysReg == 0 || !isChain) &&
470 : "Chain dependence via physreg data?");
471 : // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
472 : // emits a copy from the physical register to a virtual register unless
473 : // it requires a cross class copy (cost < 0). That means we are only
474 : // treating "expensive to copy" register dependency as physical register
475 : // dependency. This may change in the future though.
476 20096981 : if (Cost >= 0 && !StressSched)
477 19870467 : PhysReg = 0;
478 :
479 : // If this is a ctrl dep, latency is 1.
480 20096981 : unsigned OpLatency = isChain ? 1 : OpSU->Latency;
481 : // Special-case TokenFactor chains as zero-latency.
482 20096981 : if(isChain && OpN->getOpcode() == ISD::TokenFactor)
483 : OpLatency = 0;
484 :
485 : SDep Dep = isChain ? SDep(OpSU, SDep::Barrier)
486 20096981 : : SDep(OpSU, SDep::Data, PhysReg);
487 : Dep.setLatency(OpLatency);
488 20096981 : if (!isChain && !UnitLatencies) {
489 172536 : computeOperandLatency(OpN, N, i, Dep);
490 172536 : ST.adjustSchedDependency(OpSU, SU, Dep);
491 : }
492 :
493 20096981 : if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
494 : // Multiple register uses are combined in the same SUnit. For example,
495 : // we could have a set of glued nodes with all their defs consumed by
496 : // another set of glued nodes. Register pressure tracking sees this as
497 : // a single use, so to keep pressure balanced we reduce the defs.
498 : //
499 : // We can't tell (without more book-keeping) if this results from
500 : // glued nodes or duplicate operands. As long as we don't reduce
501 : // NumRegDefsLeft to zero, we handle the common cases well.
502 42315 : --OpSU->NumRegDefsLeft;
503 : }
504 : }
505 : }
506 : }
507 1269036 : }
508 :
509 : /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
510 : /// are input. This SUnit graph is similar to the SelectionDAG, but
511 : /// excludes nodes that aren't interesting to scheduling, and represents
512 : /// glued together nodes with a single SUnit.
513 1269036 : void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
514 : // Cluster certain nodes which should be scheduled together.
515 1269036 : ClusterNodes();
516 : // Populate the SUnits array.
517 1269036 : BuildSchedUnits();
518 : // Compute all the scheduling dependencies between nodes.
519 1269036 : AddSchedEdges();
520 1269036 : }
521 :
522 : // Initialize NumNodeDefs for the current Node's opcode.
523 21224061 : void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
524 : // Check for phys reg copy.
525 21224061 : if (!Node)
526 8512395 : return;
527 :
528 21224061 : if (!Node->isMachineOpcode()) {
529 8483937 : if (Node->getOpcode() == ISD::CopyFromReg)
530 2259109 : NodeNumDefs = 1;
531 : else
532 6224828 : NodeNumDefs = 0;
533 8483937 : return;
534 : }
535 : unsigned POpc = Node->getMachineOpcode();
536 12740124 : if (POpc == TargetOpcode::IMPLICIT_DEF) {
537 : // No register need be allocated for this.
538 28359 : NodeNumDefs = 0;
539 28359 : return;
540 : }
541 12711765 : if (POpc == TargetOpcode::PATCHPOINT &&
542 146 : Node->getValueType(0) == MVT::Other) {
543 : // PATCHPOINT is defined to have one result, but it might really have none
544 : // if we're not using CallingConv::AnyReg. Don't mistake the chain for a
545 : // real definition.
546 99 : NodeNumDefs = 0;
547 99 : return;
548 : }
549 25423332 : unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
550 : // Some instructions define regs that are not represented in the selection DAG
551 : // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
552 25423332 : NodeNumDefs = std::min(Node->getNumValues(), NRegDefs);
553 12711666 : DefIdx = 0;
554 : }
555 :
556 : // Construct a RegDefIter for this SUnit and find the first valid value.
557 17056983 : ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
558 17056983 : const ScheduleDAGSDNodes *SD)
559 17056983 : : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
560 17056983 : InitNodeNumDefs();
561 17056983 : Advance();
562 17056982 : }
563 :
564 : // Advance to the next valid value defined by the SUnit.
565 24933352 : void ScheduleDAGSDNodes::RegDefIter::Advance() {
566 29100430 : for (;Node;) { // Visit all glued nodes.
567 29192358 : for (;DefIdx < NodeNumDefs; ++DefIdx) {
568 8128331 : if (!Node->hasAnyUseOfValue(DefIdx))
569 : continue;
570 8036403 : ValueType = Node->getSimpleValueType(DefIdx);
571 8036403 : ++DefIdx;
572 8036403 : return; // Found a normal regdef.
573 : }
574 21064027 : Node = Node->getGluedNode();
575 21064027 : if (!Node) {
576 : return; // No values left to visit.
577 : }
578 4167078 : InitNodeNumDefs();
579 : }
580 : }
581 :
582 16310077 : void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
583 : assert(SU->NumRegDefsLeft == 0 && "expect a new node");
584 23640848 : for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
585 : assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected");
586 7330770 : ++SU->NumRegDefsLeft;
587 : }
588 16310078 : }
589 :
590 16310078 : void ScheduleDAGSDNodes::computeLatency(SUnit *SU) {
591 16310078 : SDNode *N = SU->getNode();
592 :
593 : // TokenFactor operands are considered zero latency, and some schedulers
594 : // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
595 : // whenever node latency is nonzero.
596 16310078 : if (N && N->getOpcode() == ISD::TokenFactor) {
597 1411320 : SU->Latency = 0;
598 1411320 : return;
599 : }
600 :
601 : // Check to see if the scheduler cares about latencies.
602 14898758 : if (forceUnitLatencies()) {
603 14695744 : SU->Latency = 1;
604 14695744 : return;
605 : }
606 :
607 203014 : if (!InstrItins || InstrItins->isEmpty()) {
608 221467 : if (N && N->isMachineOpcode() &&
609 195536 : TII->isHighLatencyDef(N->getMachineOpcode()))
610 0 : SU->Latency = HighLatencyCycles;
611 : else
612 123699 : SU->Latency = 1;
613 123699 : return;
614 : }
615 :
616 : // Compute the latency for the node. We use the sum of the latencies for
617 : // all nodes glued together into this SUnit.
618 79315 : SU->Latency = 0;
619 104997 : for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
620 104997 : if (N->isMachineOpcode())
621 67150 : SU->Latency += TII->getInstrLatency(InstrItins, N);
622 : }
623 :
624 172536 : void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
625 : unsigned OpIdx, SDep& dep) const{
626 : // Check to see if the scheduler cares about latencies.
627 172536 : if (forceUnitLatencies())
628 : return;
629 :
630 172536 : if (dep.getKind() != SDep::Data)
631 : return;
632 :
633 172536 : unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
634 172536 : if (Use->isMachineOpcode())
635 : // Adjust the use operand index by num of defs.
636 531596 : OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
637 172536 : int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
638 172536 : if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
639 6960 : !BB->succ_empty()) {
640 542 : unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
641 271 : if (TargetRegisterInfo::isVirtualRegister(Reg))
642 : // This copy is a liveout value. It is likely coalesced, so reduce the
643 : // latency so not to penalize the def.
644 : // FIXME: need target specific adjustment here?
645 88 : Latency = (Latency > 1) ? Latency - 1 : 1;
646 : }
647 172536 : if (Latency >= 0)
648 64707 : dep.setLatency(Latency);
649 : }
650 :
651 0 : void ScheduleDAGSDNodes::dumpNode(const SUnit &SU) const {
652 : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
653 : dumpNodeName(SU);
654 : dbgs() << ": ";
655 :
656 : if (!SU.getNode()) {
657 : dbgs() << "PHYS REG COPY\n";
658 : return;
659 : }
660 :
661 : SU.getNode()->dump(DAG);
662 : dbgs() << "\n";
663 : SmallVector<SDNode *, 4> GluedNodes;
664 : for (SDNode *N = SU.getNode()->getGluedNode(); N; N = N->getGluedNode())
665 : GluedNodes.push_back(N);
666 : while (!GluedNodes.empty()) {
667 : dbgs() << " ";
668 : GluedNodes.back()->dump(DAG);
669 : dbgs() << "\n";
670 : GluedNodes.pop_back();
671 : }
672 : #endif
673 0 : }
674 :
675 0 : void ScheduleDAGSDNodes::dump() const {
676 : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
677 : if (EntrySU.getNode() != nullptr)
678 : dumpNodeAll(EntrySU);
679 : for (const SUnit &SU : SUnits)
680 : dumpNodeAll(SU);
681 : if (ExitSU.getNode() != nullptr)
682 : dumpNodeAll(ExitSU);
683 : #endif
684 0 : }
685 :
686 : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
687 : void ScheduleDAGSDNodes::dumpSchedule() const {
688 : for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
689 : if (SUnit *SU = Sequence[i])
690 : dumpNode(*SU);
691 : else
692 : dbgs() << "**** NOOP ****\n";
693 : }
694 : }
695 : #endif
696 :
697 : #ifndef NDEBUG
698 : /// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
699 : /// their state is consistent with the nodes listed in Sequence.
700 : ///
701 : void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
702 : unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp);
703 : unsigned Noops = 0;
704 : for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
705 : if (!Sequence[i])
706 : ++Noops;
707 : assert(Sequence.size() - Noops == ScheduledNodes &&
708 : "The number of nodes scheduled doesn't match the expected number!");
709 : }
710 : #endif // NDEBUG
711 :
712 : /// ProcessSDDbgValues - Process SDDbgValues associated with this node.
713 : static void
714 214378 : ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
715 : SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
716 : DenseMap<SDValue, unsigned> &VRBaseMap, unsigned Order) {
717 214378 : if (!N->getHasDebugValue())
718 : return;
719 :
720 : // Opportunistically insert immediate dbg_value uses, i.e. those with the same
721 : // source order number as N.
722 16010 : MachineBasicBlock *BB = Emitter.getBlock();
723 : MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
724 65651 : for (auto DV : DAG->GetDbgValues(N)) {
725 33631 : if (DV->isInvalidated())
726 : continue;
727 33629 : unsigned DVOrder = DV->getOrder();
728 33629 : if (!Order || DVOrder == Order) {
729 32154 : MachineInstr *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap);
730 32154 : if (DbgMI) {
731 64308 : Orders.push_back({DVOrder, DbgMI});
732 : BB->insert(InsertPos, DbgMI);
733 : }
734 : DV->setIsInvalidated();
735 : }
736 : }
737 : }
738 :
739 : // ProcessSourceNode - Process nodes with source order numbers. These are added
740 : // to a vector which EmitSchedule uses to determine how to insert dbg_value
741 : // instructions in the right order.
742 : static void
743 230859 : ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
744 : DenseMap<SDValue, unsigned> &VRBaseMap,
745 : SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
746 : SmallSet<unsigned, 8> &Seen) {
747 230859 : unsigned Order = N->getIROrder();
748 230859 : if (!Order || !Seen.insert(Order).second) {
749 : // Process any valid SDDbgValues even if node does not have any order
750 : // assigned.
751 134139 : ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
752 150620 : return;
753 : }
754 :
755 96720 : MachineBasicBlock *BB = Emitter.getBlock();
756 : auto IP = Emitter.getInsertPos();
757 96720 : if (IP == BB->begin() || BB->back().isPHI() ||
758 : // Fast-isel may have inserted some instructions, in which case the
759 : // BB->back().isPHI() test will not fire when we want it to.
760 80239 : std::prev(IP)->isPHI()) {
761 : // Did not insert any instruction.
762 32962 : Orders.push_back({Order, (MachineInstr *)nullptr});
763 16481 : return;
764 : }
765 :
766 80239 : Orders.push_back({Order, &*std::prev(IP)});
767 80239 : ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
768 : }
769 :
770 130 : void ScheduleDAGSDNodes::
771 : EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
772 : MachineBasicBlock::iterator InsertPos) {
773 0 : for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
774 130 : I != E; ++I) {
775 130 : if (I->isCtrl()) continue; // ignore chain preds
776 130 : if (I->getSUnit()->CopyDstRC) {
777 : // Copy to physical register.
778 65 : DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
779 : assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
780 : // Find the destination physical register.
781 : unsigned Reg = 0;
782 12 : for (SUnit::const_succ_iterator II = SU->Succs.begin(),
783 77 : EE = SU->Succs.end(); II != EE; ++II) {
784 77 : if (II->isCtrl()) continue; // ignore chain preds
785 76 : if (II->getReg()) {
786 : Reg = II->getReg();
787 : break;
788 : }
789 : }
790 195 : BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
791 65 : .addReg(VRI->second);
792 : } else {
793 : // Copy from physical register.
794 : assert(I->getReg() && "Unknown physical register!");
795 130 : unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
796 65 : bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
797 : (void)isNew; // Silence compiler warning.
798 : assert(isNew && "Node emitted out of order - early");
799 195 : BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
800 65 : .addReg(I->getReg());
801 : }
802 : break;
803 : }
804 130 : }
805 :
806 : /// EmitSchedule - Emit the machine code in scheduled order. Return the new
807 : /// InsertPos and MachineBasicBlock that contains this insertion
808 : /// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
809 : /// not necessarily refer to returned BB. The emitter may split blocks.
810 1269036 : MachineBasicBlock *ScheduleDAGSDNodes::
811 : EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
812 1269036 : InstrEmitter Emitter(BB, InsertPos);
813 : DenseMap<SDValue, unsigned> VRBaseMap;
814 : DenseMap<SUnit*, unsigned> CopyVRBaseMap;
815 : SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
816 1269036 : SmallSet<unsigned, 8> Seen;
817 1269035 : bool HasDbg = DAG->hasDebugValues();
818 :
819 : // If this is the first BB, emit byval parameter dbg_value's.
820 1269035 : if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
821 : SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
822 : SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
823 1675 : for (; PDI != PDE; ++PDI) {
824 6 : MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
825 6 : if (DbgMI)
826 6 : BB->insert(InsertPos, DbgMI);
827 : }
828 : }
829 :
830 18848416 : for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
831 16310345 : SUnit *SU = Sequence[i];
832 16310345 : if (!SU) {
833 : // Null SUnit* is a noop.
834 0 : TII->insertNoop(*Emitter.getBlock(), InsertPos);
835 130 : continue;
836 : }
837 :
838 : // For pre-regalloc scheduling, create instructions corresponding to the
839 : // SDNode and any glued SDNodes and append them to the block.
840 16310345 : if (!SU->getNode()) {
841 : // Emit a copy.
842 130 : EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos);
843 130 : continue;
844 : }
845 :
846 : SmallVector<SDNode *, 4> GluedNodes;
847 40350339 : for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
848 3999442 : GluedNodes.push_back(N);
849 20309661 : while (!GluedNodes.empty()) {
850 3999444 : SDNode *N = GluedNodes.back();
851 3999444 : Emitter.EmitNode(N, SU->OrigNode != SU, SU->isCloned, VRBaseMap);
852 : // Remember the source order of the inserted instruction.
853 3999444 : if (HasDbg)
854 40525 : ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
855 : GluedNodes.pop_back();
856 : }
857 16310217 : Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
858 : VRBaseMap);
859 : // Remember the source order of the inserted instruction.
860 16310216 : if (HasDbg)
861 190334 : ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders,
862 : Seen);
863 : }
864 :
865 : // Insert all the dbg_values which have not already been inserted in source
866 : // order sequence.
867 1269036 : if (HasDbg) {
868 19224 : MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();
869 :
870 : // Sort the source order instructions and use the order to insert debug
871 : // values. Use stable_sort so that DBG_VALUEs are inserted in the same order
872 : // regardless of the host's implementation fo std::sort.
873 : std::stable_sort(Orders.begin(), Orders.end(), less_first());
874 19224 : std::stable_sort(DAG->DbgBegin(), DAG->DbgEnd(),
875 : [](const SDDbgValue *LHS, const SDDbgValue *RHS) {
876 0 : return LHS->getOrder() < RHS->getOrder();
877 : });
878 :
879 19224 : SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
880 : SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
881 : // Now emit the rest according to source order.
882 : unsigned LastOrder = 0;
883 122136 : for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) {
884 102912 : unsigned Order = Orders[i].first;
885 102912 : MachineInstr *MI = Orders[i].second;
886 : // Insert all SDDbgValue's whose order(s) are before "Order".
887 102912 : if (!MI)
888 : continue;
889 189525 : for (; DI != DE; ++DI) {
890 171209 : if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order)
891 : break;
892 103089 : if ((*DI)->isInvalidated())
893 : continue;
894 :
895 43279 : MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
896 43279 : if (DbgMI) {
897 43279 : if (!LastOrder)
898 : // Insert to start of the BB (after PHIs).
899 25452 : BB->insert(BBBegin, DbgMI);
900 : else {
901 : // Insert at the instruction, which may be in a different
902 : // block, if the block was split by a custom inserter.
903 : MachineBasicBlock::iterator Pos = MI;
904 17827 : MI->getParent()->insert(Pos, DbgMI);
905 : }
906 : }
907 : }
908 : LastOrder = Order;
909 : }
910 : // Add trailing DbgValue's before the terminator. FIXME: May want to add
911 : // some of them before one or more conditional branches?
912 : SmallVector<MachineInstr*, 8> DbgMIs;
913 21106 : for (; DI != DE; ++DI) {
914 1882 : if ((*DI)->isInvalidated())
915 : continue;
916 : assert((*DI)->getOrder() >= LastOrder &&
917 : "emitting DBG_VALUE out of order");
918 791 : if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
919 791 : DbgMIs.push_back(DbgMI);
920 : }
921 :
922 19224 : MachineBasicBlock *InsertBB = Emitter.getBlock();
923 19224 : MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
924 : InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());
925 :
926 19224 : SDDbgInfo::DbgLabelIterator DLI = DAG->DbgLabelBegin();
927 : SDDbgInfo::DbgLabelIterator DLE = DAG->DbgLabelEnd();
928 : // Now emit the rest according to source order.
929 : LastOrder = 0;
930 35566 : for (const auto &InstrOrder : Orders) {
931 35323 : unsigned Order = InstrOrder.first;
932 35323 : MachineInstr *MI = InstrOrder.second;
933 35323 : if (!MI)
934 : continue;
935 :
936 : // Insert all SDDbgLabel's whose order(s) are before "Order".
937 1 : for (; DLI != DLE &&
938 18983 : (*DLI)->getOrder() >= LastOrder && (*DLI)->getOrder() < Order;
939 : ++DLI) {
940 1 : MachineInstr *DbgMI = Emitter.EmitDbgLabel(*DLI);
941 1 : if (DbgMI) {
942 1 : if (!LastOrder)
943 : // Insert to start of the BB (after PHIs).
944 0 : BB->insert(BBBegin, DbgMI);
945 : else {
946 : // Insert at the instruction, which may be in a different
947 : // block, if the block was split by a custom inserter.
948 : MachineBasicBlock::iterator Pos = MI;
949 1 : MI->getParent()->insert(Pos, DbgMI);
950 : }
951 : }
952 : }
953 18982 : if (DLI == DLE)
954 : break;
955 :
956 : LastOrder = Order;
957 : }
958 : }
959 :
960 1269036 : InsertPos = Emitter.getInsertPos();
961 1269036 : return Emitter.getBlock();
962 : }
963 :
964 : /// Return the basic block label.
965 0 : std::string ScheduleDAGSDNodes::getDAGName() const {
966 0 : return "sunit-dag." + BB->getFullName();
967 : }
|