/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp

Bug Summary

File:	llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
Warning:	line 790, column 9 Value stored to 'ProposedColor' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SIMachineScheduler.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-12/lib/clang/12.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/build-llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/build-llvm/include -I /build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-12/lib/clang/12.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/build-llvm/lib/Target/AMDGPU -fdebug-prefix-map=/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2021-01-03-170124-30778-1 -x c++ /build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp

1	//===-- SIMachineScheduler.cpp - SI Scheduler Interface -------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// SI Machine Scheduler interface
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "SIMachineScheduler.h"
15	#include "AMDGPU.h"
16	#include "SIInstrInfo.h"
17	#include "SIRegisterInfo.h"
18	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19	#include "llvm/ADT/STLExtras.h"
20	#include "llvm/ADT/SmallVector.h"
21	#include "llvm/CodeGen/LiveInterval.h"
22	#include "llvm/CodeGen/LiveIntervals.h"
23	#include "llvm/CodeGen/MachineInstr.h"
24	#include "llvm/CodeGen/MachineRegisterInfo.h"
25	#include "llvm/CodeGen/MachineScheduler.h"
26	#include "llvm/CodeGen/RegisterPressure.h"
27	#include "llvm/CodeGen/SlotIndexes.h"
28	#include "llvm/CodeGen/TargetRegisterInfo.h"
29	#include "llvm/Support/Debug.h"
30	#include "llvm/Support/ErrorHandling.h"
31	#include "llvm/Support/raw_ostream.h"
32	#include <algorithm>
33	#include <cassert>
34	#include <map>
35	#include <set>
36	#include <utility>
37	#include <vector>
38
39	using namespace llvm;
40
41	#define DEBUG_TYPE"machine-scheduler" "machine-scheduler"
42
43	// This scheduler implements a different scheduling algorithm than
44	// GenericScheduler.
45	//
46	// There are several specific architecture behaviours that can't be modelled
47	// for GenericScheduler:
48	// . When accessing the result of an SGPR load instruction, you have to wait
49	// for all the SGPR load instructions before your current instruction to
50	// have finished.
51	// . When accessing the result of an VGPR load instruction, you have to wait
52	// for all the VGPR load instructions previous to the VGPR load instruction
53	// you are interested in to finish.
54	// . The less the register pressure, the best load latencies are hidden
55	//
56	// Moreover some specifities (like the fact a lot of instructions in the shader
57	// have few dependencies) makes the generic scheduler have some unpredictable
58	// behaviours. For example when register pressure becomes high, it can either
59	// manage to prevent register pressure from going too high, or it can
60	// increase register pressure even more than if it hadn't taken register
61	// pressure into account.
62	//
63	// Also some other bad behaviours are generated, like loading at the beginning
64	// of the shader a constant in VGPR you won't need until the end of the shader.
65	//
66	// The scheduling problem for SI can distinguish three main parts:
67	// . Hiding high latencies (texture sampling, etc)
68	// . Hiding low latencies (SGPR constant loading, etc)
69	// . Keeping register usage low for better latency hiding and general
70	// performance
71	//
72	// Some other things can also affect performance, but are hard to predict
73	// (cache usage, the fact the HW can issue several instructions from different
74	// wavefronts if different types, etc)
75	//
76	// This scheduler tries to solve the scheduling problem by dividing it into
77	// simpler sub-problems. It divides the instructions into blocks, schedules
78	// locally inside the blocks where it takes care of low latencies, and then
79	// chooses the order of the blocks by taking care of high latencies.
80	// Dividing the instructions into blocks helps control keeping register
81	// usage low.
82	//
83	// First the instructions are put into blocks.
84	// We want the blocks help control register usage and hide high latencies
85	// later. To help control register usage, we typically want all local
86	// computations, when for example you create a result that can be comsummed
87	// right away, to be contained in a block. Block inputs and outputs would
88	// typically be important results that are needed in several locations of
89	// the shader. Since we do want blocks to help hide high latencies, we want
90	// the instructions inside the block to have a minimal set of dependencies
91	// on high latencies. It will make it easy to pick blocks to hide specific
92	// high latencies.
93	// The block creation algorithm is divided into several steps, and several
94	// variants can be tried during the scheduling process.
95	//
96	// Second the order of the instructions inside the blocks is chosen.
97	// At that step we do take into account only register usage and hiding
98	// low latency instructions
99	//
100	// Third the block order is chosen, there we try to hide high latencies
101	// and keep register usage low.
102	//
103	// After the third step, a pass is done to improve the hiding of low
104	// latencies.
105	//
106	// Actually when talking about 'low latency' or 'high latency' it includes
107	// both the latency to get the cache (or global mem) data go to the register,
108	// and the bandwidth limitations.
109	// Increasing the number of active wavefronts helps hide the former, but it
110	// doesn't solve the latter, thus why even if wavefront count is high, we have
111	// to try have as many instructions hiding high latencies as possible.
112	// The OpenCL doc says for example latency of 400 cycles for a global mem access,
113	// which is hidden by 10 instructions if the wavefront count is 10.
114
115	// Some figures taken from AMD docs:
116	// Both texture and constant L1 caches are 4-way associative with 64 bytes
117	// lines.
118	// Constant cache is shared with 4 CUs.
119	// For texture sampling, the address generation unit receives 4 texture
120	// addresses per cycle, thus we could expect texture sampling latency to be
121	// equivalent to 4 instructions in the very best case (a VGPR is 64 work items,
122	// instructions in a wavefront group are executed every 4 cycles),
123	// or 16 instructions if the other wavefronts associated to the 3 other VALUs
124	// of the CU do texture sampling too. (Don't take these figures too seriously,
125	// as I'm not 100% sure of the computation)
126	// Data exports should get similar latency.
127	// For constant loading, the cache is shader with 4 CUs.
128	// The doc says "a throughput of 16B/cycle for each of the 4 Compute Unit"
129	// I guess if the other CU don't read the cache, it can go up to 64B/cycle.
130	// It means a simple s_buffer_load should take one instruction to hide, as
131	// well as a s_buffer_loadx2 and potentially a s_buffer_loadx8 if on the same
132	// cache line.
133	//
134	// As of today the driver doesn't preload the constants in cache, thus the
135	// first loads get extra latency. The doc says global memory access can be
136	// 300-600 cycles. We do not specially take that into account when scheduling
137	// As we expect the driver to be able to preload the constants soon.
138
139	// common code //
140
141	#ifndef NDEBUG
142
143	static const char *getReasonStr(SIScheduleCandReason Reason) {
144	switch (Reason) {
145	case NoCand: return "NOCAND";
146	case RegUsage: return "REGUSAGE";
147	case Latency: return "LATENCY";
148	case Successor: return "SUCCESSOR";
149	case Depth: return "DEPTH";
150	case NodeOrder: return "ORDER";
151	}
152	llvm_unreachable("Unknown reason!")::llvm::llvm_unreachable_internal("Unknown reason!", "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 152);
153	}
154
155	#endif
156
157	namespace llvm {
158	namespace SISched {
159	static bool tryLess(int TryVal, int CandVal,
160	SISchedulerCandidate &TryCand,
161	SISchedulerCandidate &Cand,
162	SIScheduleCandReason Reason) {
163	if (TryVal < CandVal) {
164	TryCand.Reason = Reason;
165	return true;
166	}
167	if (TryVal > CandVal) {
168	if (Cand.Reason > Reason)
169	Cand.Reason = Reason;
170	return true;
171	}
172	Cand.setRepeat(Reason);
173	return false;
174	}
175
176	static bool tryGreater(int TryVal, int CandVal,
177	SISchedulerCandidate &TryCand,
178	SISchedulerCandidate &Cand,
179	SIScheduleCandReason Reason) {
180	if (TryVal > CandVal) {
181	TryCand.Reason = Reason;
182	return true;
183	}
184	if (TryVal < CandVal) {
185	if (Cand.Reason > Reason)
186	Cand.Reason = Reason;
187	return true;
188	}
189	Cand.setRepeat(Reason);
190	return false;
191	}
192	} // end namespace SISched
193	} // end namespace llvm
194
195	// SIScheduleBlock //
196
197	void SIScheduleBlock::addUnit(SUnit *SU) {
198	NodeNum2Index[SU->NodeNum] = SUnits.size();
199	SUnits.push_back(SU);
200	}
201
202	#ifndef NDEBUG
203	void SIScheduleBlock::traceCandidate(const SISchedCandidate &Cand) {
204
205	dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
206	dbgs() << '\n';
207	}
208	#endif
209
210	void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
211	SISchedCandidate &TryCand) {
212	// Initialize the candidate if needed.
213	if (!Cand.isValid()) {
214	TryCand.Reason = NodeOrder;
215	return;
216	}
217
218	if (Cand.SGPRUsage > 60 &&
219	SISched::tryLess(TryCand.SGPRUsage, Cand.SGPRUsage,
220	TryCand, Cand, RegUsage))
221	return;
222
223	// Schedule low latency instructions as top as possible.
224	// Order of priority is:
225	// . Low latency instructions which do not depend on other low latency
226	// instructions we haven't waited for
227	// . Other instructions which do not depend on low latency instructions
228	// we haven't waited for
229	// . Low latencies
230	// . All other instructions
231	// Goal is to get: low latency instructions - independent instructions
232	// - (eventually some more low latency instructions)
233	// - instructions that depend on the first low latency instructions.
234	// If in the block there is a lot of constant loads, the SGPR usage
235	// could go quite high, thus above the arbitrary limit of 60 will encourage
236	// use the already loaded constants (in order to release some SGPRs) before
237	// loading more.
238	if (SISched::tryLess(TryCand.HasLowLatencyNonWaitedParent,
239	Cand.HasLowLatencyNonWaitedParent,
240	TryCand, Cand, SIScheduleCandReason::Depth))
241	return;
242
243	if (SISched::tryGreater(TryCand.IsLowLatency, Cand.IsLowLatency,
244	TryCand, Cand, SIScheduleCandReason::Depth))
245	return;
246
247	if (TryCand.IsLowLatency &&
248	SISched::tryLess(TryCand.LowLatencyOffset, Cand.LowLatencyOffset,
249	TryCand, Cand, SIScheduleCandReason::Depth))
250	return;
251
252	if (SISched::tryLess(TryCand.VGPRUsage, Cand.VGPRUsage,
253	TryCand, Cand, RegUsage))
254	return;
255
256	// Fall through to original instruction order.
257	if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
258	TryCand.Reason = NodeOrder;
259	}
260	}
261
262	SUnit* SIScheduleBlock::pickNode() {
263	SISchedCandidate TopCand;
264
265	for (SUnit* SU : TopReadySUs) {
266	SISchedCandidate TryCand;
267	std::vector<unsigned> pressure;
268	std::vector<unsigned> MaxPressure;
269	// Predict register usage after this instruction.
270	TryCand.SU = SU;
271	TopRPTracker.getDownwardPressure(SU->getInstr(), pressure, MaxPressure);
272	TryCand.SGPRUsage = pressure[AMDGPU::RegisterPressureSets::SReg_32];
273	TryCand.VGPRUsage = pressure[AMDGPU::RegisterPressureSets::VGPR_32];
274	TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum];
275	TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum];
276	TryCand.HasLowLatencyNonWaitedParent =
277	HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
278	tryCandidateTopDown(TopCand, TryCand);
279	if (TryCand.Reason != NoCand)
280	TopCand.setBest(TryCand);
281	}
282
283	return TopCand.SU;
284	}
285
286
287	// Schedule something valid.
288	void SIScheduleBlock::fastSchedule() {
289	TopReadySUs.clear();
290	if (Scheduled)
291	undoSchedule();
292
293	for (SUnit* SU : SUnits) {
294	if (!SU->NumPredsLeft)
295	TopReadySUs.push_back(SU);
296	}
297
298	while (!TopReadySUs.empty()) {
299	SUnit *SU = TopReadySUs[0];
300	ScheduledSUnits.push_back(SU);
301	nodeScheduled(SU);
302	}
303
304	Scheduled = true;
305	}
306
307	// Returns if the register was set between first and last.
308	static bool isDefBetween(unsigned Reg,
309	SlotIndex First, SlotIndex Last,
310	const MachineRegisterInfo *MRI,
311	const LiveIntervals *LIS) {
312	for (MachineRegisterInfo::def_instr_iterator
313	UI = MRI->def_instr_begin(Reg),
314	UE = MRI->def_instr_end(); UI != UE; ++UI) {
315	const MachineInstr* MI = &*UI;
316	if (MI->isDebugValue())
317	continue;
318	SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot();
319	if (InstSlot >= First && InstSlot <= Last)
320	return true;
321	}
322	return false;
323	}
324
325	void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock,
326	MachineBasicBlock::iterator EndBlock) {
327	IntervalPressure Pressure, BotPressure;
328	RegPressureTracker RPTracker(Pressure), BotRPTracker(BotPressure);
329	LiveIntervals *LIS = DAG->getLIS();
330	MachineRegisterInfo *MRI = DAG->getMRI();
331	DAG->initRPTracker(TopRPTracker);
332	DAG->initRPTracker(BotRPTracker);
333	DAG->initRPTracker(RPTracker);
334
335	// Goes though all SU. RPTracker captures what had to be alive for the SUs
336	// to execute, and what is still alive at the end.
337	for (SUnit* SU : ScheduledSUnits) {
338	RPTracker.setPos(SU->getInstr());
339	RPTracker.advance();
340	}
341
342	// Close the RPTracker to finalize live ins/outs.
343	RPTracker.closeRegion();
344
345	// Initialize the live ins and live outs.
346	TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
347	BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
348
349	// Do not Track Physical Registers, because it messes up.
350	for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
351	if (Register::isVirtualRegister(RegMaskPair.RegUnit))
352	LiveInRegs.insert(RegMaskPair.RegUnit);
353	}
354	LiveOutRegs.clear();
355	// There is several possibilities to distinguish:
356	// 1) Reg is not input to any instruction in the block, but is output of one
357	// 2) 1) + read in the block and not needed after it
358	// 3) 1) + read in the block but needed in another block
359	// 4) Reg is input of an instruction but another block will read it too
360	// 5) Reg is input of an instruction and then rewritten in the block.
361	// result is not read in the block (implies used in another block)
362	// 6) Reg is input of an instruction and then rewritten in the block.
363	// result is read in the block and not needed in another block
364	// 7) Reg is input of an instruction and then rewritten in the block.
365	// result is read in the block but also needed in another block
366	// LiveInRegs will contains all the regs in situation 4, 5, 6, 7
367	// We want LiveOutRegs to contain only Regs whose content will be read after
368	// in another block, and whose content was written in the current block,
369	// that is we want it to get 1, 3, 5, 7
370	// Since we made the MIs of a block to be packed all together before
371	// scheduling, then the LiveIntervals were correct, and the RPTracker was
372	// able to correctly handle 5 vs 6, 2 vs 3.
373	// (Note: This is not sufficient for RPTracker to not do mistakes for case 4)
374	// The RPTracker's LiveOutRegs has 1, 3, (some correct or incorrect)4, 5, 7
375	// Comparing to LiveInRegs is not sufficient to differenciate 4 vs 5, 7
376	// The use of findDefBetween removes the case 4.
377	for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
378	Register Reg = RegMaskPair.RegUnit;
379	if (Reg.isVirtual() &&
380	isDefBetween(Reg, LIS->getInstructionIndex(*BeginBlock).getRegSlot(),
381	LIS->getInstructionIndex(*EndBlock).getRegSlot(), MRI,
382	LIS)) {
383	LiveOutRegs.insert(Reg);
384	}
385	}
386
387	// Pressure = sum_alive_registers register size
388	// Internally llvm will represent some registers as big 128 bits registers
389	// for example, but they actually correspond to 4 actual 32 bits registers.
390	// Thus Pressure is not equal to num_alive_registers * constant.
391	LiveInPressure = TopPressure.MaxSetPressure;
392	LiveOutPressure = BotPressure.MaxSetPressure;
393
394	// Prepares TopRPTracker for top down scheduling.
395	TopRPTracker.closeTop();
396	}
397
398	void SIScheduleBlock::schedule(MachineBasicBlock::iterator BeginBlock,
399	MachineBasicBlock::iterator EndBlock) {
400	if (!Scheduled)
401	fastSchedule();
402
403	// PreScheduling phase to set LiveIn and LiveOut.
404	initRegPressure(BeginBlock, EndBlock);
405	undoSchedule();
406
407	// Schedule for real now.
408
409	TopReadySUs.clear();
410
411	for (SUnit* SU : SUnits) {
412	if (!SU->NumPredsLeft)
413	TopReadySUs.push_back(SU);
414	}
415
416	while (!TopReadySUs.empty()) {
417	SUnit *SU = pickNode();
418	ScheduledSUnits.push_back(SU);
419	TopRPTracker.setPos(SU->getInstr());
420	TopRPTracker.advance();
421	nodeScheduled(SU);
422	}
423
424	// TODO: compute InternalAdditionnalPressure.
425	InternalAdditionnalPressure.resize(TopPressure.MaxSetPressure.size());
426
427	// Check everything is right.
428	#ifndef NDEBUG
429	assert(SUnits.size() == ScheduledSUnits.size() &&((SUnits.size() == ScheduledSUnits.size() && TopReadySUs .empty()) ? static_cast<void> (0) : __assert_fail ("SUnits.size() == ScheduledSUnits.size() && TopReadySUs.empty()" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 430, __PRETTY_FUNCTION__))
430	TopReadySUs.empty())((SUnits.size() == ScheduledSUnits.size() && TopReadySUs .empty()) ? static_cast<void> (0) : __assert_fail ("SUnits.size() == ScheduledSUnits.size() && TopReadySUs.empty()" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 430, __PRETTY_FUNCTION__));
431	for (SUnit* SU : SUnits) {
432	assert(SU->isScheduled &&((SU->isScheduled && SU->NumPredsLeft == 0) ? static_cast <void> (0) : __assert_fail ("SU->isScheduled && SU->NumPredsLeft == 0" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 433, __PRETTY_FUNCTION__))
433	SU->NumPredsLeft == 0)((SU->isScheduled && SU->NumPredsLeft == 0) ? static_cast <void> (0) : __assert_fail ("SU->isScheduled && SU->NumPredsLeft == 0" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 433, __PRETTY_FUNCTION__));
434	}
435	#endif
436
437	Scheduled = true;
438	}
439
440	void SIScheduleBlock::undoSchedule() {
441	for (SUnit* SU : SUnits) {
442	SU->isScheduled = false;
443	for (SDep& Succ : SU->Succs) {
444	if (BC->isSUInBlock(Succ.getSUnit(), ID))
445	undoReleaseSucc(SU, &Succ);
446	}
447	}
448	HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
449	ScheduledSUnits.clear();
450	Scheduled = false;
451	}
452
453	void SIScheduleBlock::undoReleaseSucc(SUnit SU, SDep SuccEdge) {
454	SUnit *SuccSU = SuccEdge->getSUnit();
455
456	if (SuccEdge->isWeak()) {
457	++SuccSU->WeakPredsLeft;
458	return;
459	}
460	++SuccSU->NumPredsLeft;
461	}
462
463	void SIScheduleBlock::releaseSucc(SUnit SU, SDep SuccEdge) {
464	SUnit *SuccSU = SuccEdge->getSUnit();
465
466	if (SuccEdge->isWeak()) {
467	--SuccSU->WeakPredsLeft;
468	return;
469	}
470	#ifndef NDEBUG
471	if (SuccSU->NumPredsLeft == 0) {
472	dbgs() << "* Scheduling failed! *\n";
473	DAG->dumpNode(*SuccSU);
474	dbgs() << " has been released too many times!\n";
475	llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 475);
476	}
477	#endif
478
479	--SuccSU->NumPredsLeft;
480	}
481
482	/// Release Successors of the SU that are in the block or not.
483	void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) {
484	for (SDep& Succ : SU->Succs) {
485	SUnit *SuccSU = Succ.getSUnit();
486
487	if (SuccSU->NodeNum >= DAG->SUnits.size())
488	continue;
489
490	if (BC->isSUInBlock(SuccSU, ID) != InOrOutBlock)
491	continue;
492
493	releaseSucc(SU, &Succ);
494	if (SuccSU->NumPredsLeft == 0 && InOrOutBlock)
495	TopReadySUs.push_back(SuccSU);
496	}
497	}
498
499	void SIScheduleBlock::nodeScheduled(SUnit *SU) {
500	// Is in TopReadySUs
501	assert (!SU->NumPredsLeft)((!SU->NumPredsLeft) ? static_cast<void> (0) : __assert_fail ("!SU->NumPredsLeft", "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 501, __PRETTY_FUNCTION__));
502	std::vector<SUnit *>::iterator I = llvm::find(TopReadySUs, SU);
503	if (I == TopReadySUs.end()) {
504	dbgs() << "Data Structure Bug in SI Scheduler\n";
505	llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 505);
506	}
507	TopReadySUs.erase(I);
508
509	releaseSuccessors(SU, true);
510	// Scheduling this node will trigger a wait,
511	// thus propagate to other instructions that they do not need to wait either.
512	if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]])
513	HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
514
515	if (DAG->IsLowLatencySU[SU->NodeNum]) {
516	for (SDep& Succ : SU->Succs) {
517	std::map<unsigned, unsigned>::iterator I =
518	NodeNum2Index.find(Succ.getSUnit()->NodeNum);
519	if (I != NodeNum2Index.end())
520	HasLowLatencyNonWaitedParent[I->second] = 1;
521	}
522	}
523	SU->isScheduled = true;
524	}
525
526	void SIScheduleBlock::finalizeUnits() {
527	// We remove links from outside blocks to enable scheduling inside the block.
528	for (SUnit* SU : SUnits) {
529	releaseSuccessors(SU, false);
530	if (DAG->IsHighLatencySU[SU->NodeNum])
531	HighLatencyBlock = true;
532	}
533	HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
534	}
535
536	// we maintain ascending order of IDs
537	void SIScheduleBlock::addPred(SIScheduleBlock *Pred) {
538	unsigned PredID = Pred->getID();
539
540	// Check if not already predecessor.
541	for (SIScheduleBlock* P : Preds) {
542	if (PredID == P->getID())
543	return;
544	}
545	Preds.push_back(Pred);
546
547	assert(none_of(Succs,((none_of(Succs, [=](std::pair<SIScheduleBlock, SIScheduleBlockLinkKind > S) { return PredID == S.first->getID(); }) && "Loop in the Block Graph!") ? static_cast<void> (0) : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\"" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 552, __PRETTY_FUNCTION__))
548	[=](std::pair<SIScheduleBlock,((none_of(Succs, [=](std::pair<SIScheduleBlock, SIScheduleBlockLinkKind > S) { return PredID == S.first->getID(); }) && "Loop in the Block Graph!") ? static_cast<void> (0) : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\"" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 552, __PRETTY_FUNCTION__))
549	SIScheduleBlockLinkKind> S) {((none_of(Succs, [=](std::pair<SIScheduleBlock, SIScheduleBlockLinkKind > S) { return PredID == S.first->getID(); }) && "Loop in the Block Graph!") ? static_cast<void> (0) : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\"" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 552, __PRETTY_FUNCTION__))
550	return PredID == S.first->getID();((none_of(Succs, [=](std::pair<SIScheduleBlock, SIScheduleBlockLinkKind > S) { return PredID == S.first->getID(); }) && "Loop in the Block Graph!") ? static_cast<void> (0) : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\"" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 552, __PRETTY_FUNCTION__))
551	}) &&((none_of(Succs, [=](std::pair<SIScheduleBlock, SIScheduleBlockLinkKind > S) { return PredID == S.first->getID(); }) && "Loop in the Block Graph!") ? static_cast<void> (0) : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\"" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 552, __PRETTY_FUNCTION__))
552	"Loop in the Block Graph!")((none_of(Succs, [=](std::pair<SIScheduleBlock, SIScheduleBlockLinkKind > S) { return PredID == S.first->getID(); }) && "Loop in the Block Graph!") ? static_cast<void> (0) : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\"" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 552, __PRETTY_FUNCTION__));
553	}
554
555	void SIScheduleBlock::addSucc(SIScheduleBlock *Succ,
556	SIScheduleBlockLinkKind Kind) {
557	unsigned SuccID = Succ->getID();
558
559	// Check if not already predecessor.
560	for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {
561	if (SuccID == S.first->getID()) {
562	if (S.second == SIScheduleBlockLinkKind::NoData &&
563	Kind == SIScheduleBlockLinkKind::Data)
564	S.second = Kind;
565	return;
566	}
567	}
568	if (Succ->isHighLatencyBlock())
569	++NumHighLatencySuccessors;
570	Succs.push_back(std::make_pair(Succ, Kind));
571
572	assert(none_of(Preds,((none_of(Preds, [=](SIScheduleBlock P) { return SuccID == P ->getID(); }) && "Loop in the Block Graph!") ? static_cast <void> (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\"" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 574, __PRETTY_FUNCTION__))
573	[=](SIScheduleBlock P) { return SuccID == P->getID(); }) &&((none_of(Preds, [=](SIScheduleBlock P) { return SuccID == P ->getID(); }) && "Loop in the Block Graph!") ? static_cast <void> (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\"" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 574, __PRETTY_FUNCTION__))
574	"Loop in the Block Graph!")((none_of(Preds, [=](SIScheduleBlock P) { return SuccID == P ->getID(); }) && "Loop in the Block Graph!") ? static_cast <void> (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\"" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 574, __PRETTY_FUNCTION__));
575	}
576
577	#ifndef NDEBUG
578	void SIScheduleBlock::printDebug(bool full) {
579	dbgs() << "Block (" << ID << ")\n";
580	if (!full)
581	return;
582
583	dbgs() << "\nContains High Latency Instruction: "
584	<< HighLatencyBlock << '\n';
585	dbgs() << "\nDepends On:\n";
586	for (SIScheduleBlock* P : Preds) {
587	P->printDebug(false);
588	}
589
590	dbgs() << "\nSuccessors:\n";
591	for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {
592	if (S.second == SIScheduleBlockLinkKind::Data)
593	dbgs() << "(Data Dep) ";
594	S.first->printDebug(false);
595	}
596
597	if (Scheduled) {
598	dbgs() << "LiveInPressure "
599	<< LiveInPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '
600	<< LiveInPressure[AMDGPU::RegisterPressureSets::VGPR_32] << '\n';
601	dbgs() << "LiveOutPressure "
602	<< LiveOutPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '
603	<< LiveOutPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n\n";
604	dbgs() << "LiveIns:\n";
605	for (unsigned Reg : LiveInRegs)
606	dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
607
608	dbgs() << "\nLiveOuts:\n";
609	for (unsigned Reg : LiveOutRegs)
610	dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
611	}
612
613	dbgs() << "\nInstructions:\n";
614	for (const SUnit* SU : SUnits)
615	DAG->dumpNode(*SU);
616
617	dbgs() << "///////////////////////\n";
618	}
619	#endif
620
621	// SIScheduleBlockCreator //
622
623	SIScheduleBlockCreator::SIScheduleBlockCreator(SIScheduleDAGMI *DAG)
624	: DAG(DAG) {}
625
626	SIScheduleBlocks
627	SIScheduleBlockCreator::getBlocks(SISchedulerBlockCreatorVariant BlockVariant) {
628	std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator B =
629	Blocks.find(BlockVariant);
630	if (B == Blocks.end()) {
631	SIScheduleBlocks Res;
632	createBlocksForVariant(BlockVariant);
633	topologicalSort();
634	scheduleInsideBlocks();
635	fillStats();
636	Res.Blocks = CurrentBlocks;
637	Res.TopDownIndex2Block = TopDownIndex2Block;
638	Res.TopDownBlock2Index = TopDownBlock2Index;
639	Blocks[BlockVariant] = Res;
640	return Res;
641	} else {
642	return B->second;
643	}
644	}
645
646	bool SIScheduleBlockCreator::isSUInBlock(SUnit *SU, unsigned ID) {
647	if (SU->NodeNum >= DAG->SUnits.size())
648	return false;
649	return CurrentBlocks[Node2CurrentBlock[SU->NodeNum]]->getID() == ID;
650	}
651
652	void SIScheduleBlockCreator::colorHighLatenciesAlone() {
653	unsigned DAGSize = DAG->SUnits.size();
654
655	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
656	SUnit *SU = &DAG->SUnits[i];
657	if (DAG->IsHighLatencySU[SU->NodeNum]) {
658	CurrentColoring[SU->NodeNum] = NextReservedID++;
659	}
660	}
661	}
662
663	static bool
664	hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU) {
665	for (const auto &PredDep : SU.Preds) {
666	if (PredDep.getSUnit() == &FromSU &&
667	PredDep.getKind() == llvm::SDep::Data)
668	return true;
669	}
670	return false;
671	}
672
673	void SIScheduleBlockCreator::colorHighLatenciesGroups() {
674	unsigned DAGSize = DAG->SUnits.size();
675	unsigned NumHighLatencies = 0;
676	unsigned GroupSize;
677	int Color = NextReservedID;
678	unsigned Count = 0;
679	std::set<unsigned> FormingGroup;
680
681	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
682	SUnit *SU = &DAG->SUnits[i];
683	if (DAG->IsHighLatencySU[SU->NodeNum])
684	++NumHighLatencies;
685	}
686
687	if (NumHighLatencies == 0)
688	return;
689
690	if (NumHighLatencies <= 6)
691	GroupSize = 2;
692	else if (NumHighLatencies <= 12)
693	GroupSize = 3;
694	else
695	GroupSize = 4;
696
697	for (unsigned SUNum : DAG->TopDownIndex2SU) {
698	const SUnit &SU = DAG->SUnits[SUNum];
699	if (DAG->IsHighLatencySU[SU.NodeNum]) {
700	unsigned CompatibleGroup = true;
701	int ProposedColor = Color;
702	std::vector<int> AdditionalElements;
703
704	// We don't want to put in the same block
705	// two high latency instructions that depend
706	// on each other.
707	// One way would be to check canAddEdge
708	// in both directions, but that currently is not
709	// enough because there the high latency order is
710	// enforced (via links).
711	// Instead, look at the dependencies between the
712	// high latency instructions and deduce if it is
713	// a data dependency or not.
714	for (unsigned j : FormingGroup) {
715	bool HasSubGraph;
716	std::vector<int> SubGraph;
717	// By construction (topological order), if SU and
718	// DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary
719	// in the parent graph of SU.
720	#ifndef NDEBUG
721	SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
722	HasSubGraph);
723	assert(!HasSubGraph)((!HasSubGraph) ? static_cast<void> (0) : __assert_fail ("!HasSubGraph", "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 723, __PRETTY_FUNCTION__));
724	#endif
725	SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,
726	HasSubGraph);
727	if (!HasSubGraph)
728	continue; // No dependencies between each other
729	else if (SubGraph.size() > 5) {
730	// Too many elements would be required to be added to the block.
731	CompatibleGroup = false;
732	break;
733	}
734	else {
735	// Check the type of dependency
736	for (unsigned k : SubGraph) {
737	// If in the path to join the two instructions,
738	// there is another high latency instruction,
739	// or instructions colored for another block
740	// abort the merge.
741	if (DAG->IsHighLatencySU[k] \|\|
742	(CurrentColoring[k] != ProposedColor &&
743	CurrentColoring[k] != 0)) {
744	CompatibleGroup = false;
745	break;
746	}
747	// If one of the SU in the subgraph depends on the result of SU j,
748	// there'll be a data dependency.
749	if (hasDataDependencyPred(DAG->SUnits[k], DAG->SUnits[j])) {
750	CompatibleGroup = false;
751	break;
752	}
753	}
754	if (!CompatibleGroup)
755	break;
756	// Same check for the SU
757	if (hasDataDependencyPred(SU, DAG->SUnits[j])) {
758	CompatibleGroup = false;
759	break;
760	}
761	// Add all the required instructions to the block
762	// These cannot live in another block (because they
763	// depend (order dependency) on one of the
764	// instruction in the block, and are required for the
765	// high latency instruction we add.
766	AdditionalElements.insert(AdditionalElements.end(),
767	SubGraph.begin(), SubGraph.end());
768	}
769	}
770	if (CompatibleGroup) {
771	FormingGroup.insert(SU.NodeNum);
772	for (unsigned j : AdditionalElements)
773	CurrentColoring[j] = ProposedColor;
774	CurrentColoring[SU.NodeNum] = ProposedColor;
775	++Count;
776	}
777	// Found one incompatible instruction,
778	// or has filled a big enough group.
779	// -> start a new one.
780	if (!CompatibleGroup) {
781	FormingGroup.clear();
782	Color = ++NextReservedID;
783	ProposedColor = Color;
784	FormingGroup.insert(SU.NodeNum);
785	CurrentColoring[SU.NodeNum] = ProposedColor;
786	Count = 0;
787	} else if (Count == GroupSize) {
788	FormingGroup.clear();
789	Color = ++NextReservedID;
790	ProposedColor = Color;
	Value stored to 'ProposedColor' is never read
791	Count = 0;
792	}
793	}
794	}
795	}
796
797	void SIScheduleBlockCreator::colorComputeReservedDependencies() {
798	unsigned DAGSize = DAG->SUnits.size();
799	std::map<std::set<unsigned>, unsigned> ColorCombinations;
800
801	CurrentTopDownReservedDependencyColoring.clear();
802	CurrentBottomUpReservedDependencyColoring.clear();
803
804	CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
805	CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
806
807	// Traverse TopDown, and give different colors to SUs depending
808	// on which combination of High Latencies they depend on.
809
810	for (unsigned SUNum : DAG->TopDownIndex2SU) {
811	SUnit *SU = &DAG->SUnits[SUNum];
812	std::set<unsigned> SUColors;
813
814	// Already given.
815	if (CurrentColoring[SU->NodeNum]) {
816	CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
817	CurrentColoring[SU->NodeNum];
818	continue;
819	}
820
821	for (SDep& PredDep : SU->Preds) {
822	SUnit *Pred = PredDep.getSUnit();
823	if (PredDep.isWeak() \|\| Pred->NodeNum >= DAGSize)
824	continue;
825	if (CurrentTopDownReservedDependencyColoring[Pred->NodeNum] > 0)
826	SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->NodeNum]);
827	}
828	// Color 0 by default.
829	if (SUColors.empty())
830	continue;
831	// Same color than parents.
832	if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
833	CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
834	*SUColors.begin();
835	else {
836	std::map<std::set<unsigned>, unsigned>::iterator Pos =
837	ColorCombinations.find(SUColors);
838	if (Pos != ColorCombinations.end()) {
839	CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second;
840	} else {
841	CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
842	NextNonReservedID;
843	ColorCombinations[SUColors] = NextNonReservedID++;
844	}
845	}
846	}
847
848	ColorCombinations.clear();
849
850	// Same as before, but BottomUp.
851
852	for (unsigned SUNum : DAG->BottomUpIndex2SU) {
853	SUnit *SU = &DAG->SUnits[SUNum];
854	std::set<unsigned> SUColors;
855
856	// Already given.
857	if (CurrentColoring[SU->NodeNum]) {
858	CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
859	CurrentColoring[SU->NodeNum];
860	continue;
861	}
862
863	for (SDep& SuccDep : SU->Succs) {
864	SUnit *Succ = SuccDep.getSUnit();
865	if (SuccDep.isWeak() \|\| Succ->NodeNum >= DAGSize)
866	continue;
867	if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0)
868	SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->NodeNum]);
869	}
870	// Keep color 0.
871	if (SUColors.empty())
872	continue;
873	// Same color than parents.
874	if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
875	CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
876	*SUColors.begin();
877	else {
878	std::map<std::set<unsigned>, unsigned>::iterator Pos =
879	ColorCombinations.find(SUColors);
880	if (Pos != ColorCombinations.end()) {
881	CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = Pos->second;
882	} else {
883	CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
884	NextNonReservedID;
885	ColorCombinations[SUColors] = NextNonReservedID++;
886	}
887	}
888	}
889	}
890
891	void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
892	unsigned DAGSize = DAG->SUnits.size();
893	std::map<std::pair<unsigned, unsigned>, unsigned> ColorCombinations;
894
895	// Every combination of colors given by the top down
896	// and bottom up Reserved node dependency
897
898	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
899	SUnit *SU = &DAG->SUnits[i];
900	std::pair<unsigned, unsigned> SUColors;
901
902	// High latency instructions: already given.
903	if (CurrentColoring[SU->NodeNum])
904	continue;
905
906	SUColors.first = CurrentTopDownReservedDependencyColoring[SU->NodeNum];
907	SUColors.second = CurrentBottomUpReservedDependencyColoring[SU->NodeNum];
908
909	std::map<std::pair<unsigned, unsigned>, unsigned>::iterator Pos =
910	ColorCombinations.find(SUColors);
911	if (Pos != ColorCombinations.end()) {
912	CurrentColoring[SU->NodeNum] = Pos->second;
913	} else {
914	CurrentColoring[SU->NodeNum] = NextNonReservedID;
915	ColorCombinations[SUColors] = NextNonReservedID++;
916	}
917	}
918	}
919
920	void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
921	unsigned DAGSize = DAG->SUnits.size();
922	std::vector<int> PendingColoring = CurrentColoring;
923
924	assert(DAGSize >= 1 &&((DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring .size() == DAGSize && CurrentTopDownReservedDependencyColoring .size() == DAGSize) ? static_cast<void> (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 926, __PRETTY_FUNCTION__))
925	CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&((DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring .size() == DAGSize && CurrentTopDownReservedDependencyColoring .size() == DAGSize) ? static_cast<void> (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 926, __PRETTY_FUNCTION__))
926	CurrentTopDownReservedDependencyColoring.size() == DAGSize)((DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring .size() == DAGSize && CurrentTopDownReservedDependencyColoring .size() == DAGSize) ? static_cast<void> (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 926, __PRETTY_FUNCTION__));
927	// If there is no reserved block at all, do nothing. We don't want
928	// everything in one block.
929	if (*std::max_element(CurrentBottomUpReservedDependencyColoring.begin(),
930	CurrentBottomUpReservedDependencyColoring.end()) == 0 &&
931	*std::max_element(CurrentTopDownReservedDependencyColoring.begin(),
932	CurrentTopDownReservedDependencyColoring.end()) == 0)
933	return;
934
935	for (unsigned SUNum : DAG->BottomUpIndex2SU) {
936	SUnit *SU = &DAG->SUnits[SUNum];
937	std::set<unsigned> SUColors;
938	std::set<unsigned> SUColorsPending;
939
940	if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
941	continue;
942
943	if (CurrentBottomUpReservedDependencyColoring[SU->NodeNum] > 0 \|\|
944	CurrentTopDownReservedDependencyColoring[SU->NodeNum] > 0)
945	continue;
946
947	for (SDep& SuccDep : SU->Succs) {
948	SUnit *Succ = SuccDep.getSUnit();
949	if (SuccDep.isWeak() \|\| Succ->NodeNum >= DAGSize)
950	continue;
951	if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0 \|\|
952	CurrentTopDownReservedDependencyColoring[Succ->NodeNum] > 0)
953	SUColors.insert(CurrentColoring[Succ->NodeNum]);
954	SUColorsPending.insert(PendingColoring[Succ->NodeNum]);
955	}
956	// If there is only one child/parent block, and that block
957	// is not among the ones we are removing in this path, then
958	// merge the instruction to that block
959	if (SUColors.size() == 1 && SUColorsPending.size() == 1)
960	PendingColoring[SU->NodeNum] = *SUColors.begin();
961	else // TODO: Attribute new colors depending on color
962	// combination of children.
963	PendingColoring[SU->NodeNum] = NextNonReservedID++;
964	}
965	CurrentColoring = PendingColoring;
966	}
967
968
969	void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
970	unsigned DAGSize = DAG->SUnits.size();
971	unsigned PreviousColor;
972	std::set<unsigned> SeenColors;
973
974	if (DAGSize <= 1)
975	return;
976
977	PreviousColor = CurrentColoring[0];
978
979	for (unsigned i = 1, e = DAGSize; i != e; ++i) {
980	SUnit *SU = &DAG->SUnits[i];
981	unsigned CurrentColor = CurrentColoring[i];
982	unsigned PreviousColorSave = PreviousColor;
983	assert(i == SU->NodeNum)((i == SU->NodeNum) ? static_cast<void> (0) : __assert_fail ("i == SU->NodeNum", "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 983, __PRETTY_FUNCTION__));
984
985	if (CurrentColor != PreviousColor)
986	SeenColors.insert(PreviousColor);
987	PreviousColor = CurrentColor;
988
989	if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
990	continue;
991
992	if (SeenColors.find(CurrentColor) == SeenColors.end())
993	continue;
994
995	if (PreviousColorSave != CurrentColor)
996	CurrentColoring[i] = NextNonReservedID++;
997	else
998	CurrentColoring[i] = CurrentColoring[i-1];
999	}
1000	}
1001
1002	void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
1003	unsigned DAGSize = DAG->SUnits.size();
1004
1005	for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1006	SUnit *SU = &DAG->SUnits[SUNum];
1007	std::set<unsigned> SUColors;
1008
1009	if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1010	continue;
1011
1012	// No predecessor: Vgpr constant loading.
1013	// Low latency instructions usually have a predecessor (the address)
1014	if (SU->Preds.size() > 0 && !DAG->IsLowLatencySU[SU->NodeNum])
1015	continue;
1016
1017	for (SDep& SuccDep : SU->Succs) {
1018	SUnit *Succ = SuccDep.getSUnit();
1019	if (SuccDep.isWeak() \|\| Succ->NodeNum >= DAGSize)
1020	continue;
1021	SUColors.insert(CurrentColoring[Succ->NodeNum]);
1022	}
1023	if (SUColors.size() == 1)
1024	CurrentColoring[SU->NodeNum] = *SUColors.begin();
1025	}
1026	}
1027
1028	void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
1029	unsigned DAGSize = DAG->SUnits.size();
1030
1031	for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1032	SUnit *SU = &DAG->SUnits[SUNum];
1033	std::set<unsigned> SUColors;
1034
1035	if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1036	continue;
1037
1038	for (SDep& SuccDep : SU->Succs) {
1039	SUnit *Succ = SuccDep.getSUnit();
1040	if (SuccDep.isWeak() \|\| Succ->NodeNum >= DAGSize)
1041	continue;
1042	SUColors.insert(CurrentColoring[Succ->NodeNum]);
1043	}
1044	if (SUColors.size() == 1)
1045	CurrentColoring[SU->NodeNum] = *SUColors.begin();
1046	}
1047	}
1048
1049	void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
1050	unsigned DAGSize = DAG->SUnits.size();
1051
1052	for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1053	SUnit *SU = &DAG->SUnits[SUNum];
1054	std::set<unsigned> SUColors;
1055
1056	if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1057	continue;
1058
1059	for (SDep& SuccDep : SU->Succs) {
1060	SUnit *Succ = SuccDep.getSUnit();
1061	if (SuccDep.isWeak() \|\| Succ->NodeNum >= DAGSize)
1062	continue;
1063	SUColors.insert(CurrentColoring[Succ->NodeNum]);
1064	}
1065	if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
1066	CurrentColoring[SU->NodeNum] = *SUColors.begin();
1067	}
1068	}
1069
1070	void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
1071	unsigned DAGSize = DAG->SUnits.size();
1072	std::map<unsigned, unsigned> ColorCount;
1073
1074	for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1075	SUnit *SU = &DAG->SUnits[SUNum];
1076	unsigned color = CurrentColoring[SU->NodeNum];
1077	++ColorCount[color];
1078	}
1079
1080	for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1081	SUnit *SU = &DAG->SUnits[SUNum];
1082	unsigned color = CurrentColoring[SU->NodeNum];
1083	std::set<unsigned> SUColors;
1084
1085	if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1086	continue;
1087
1088	if (ColorCount[color] > 1)
1089	continue;
1090
1091	for (SDep& SuccDep : SU->Succs) {
1092	SUnit *Succ = SuccDep.getSUnit();
1093	if (SuccDep.isWeak() \|\| Succ->NodeNum >= DAGSize)
1094	continue;
1095	SUColors.insert(CurrentColoring[Succ->NodeNum]);
1096	}
1097	if (SUColors.size() == 1 && *SUColors.begin() != color) {
1098	--ColorCount[color];
1099	CurrentColoring[SU->NodeNum] = *SUColors.begin();
1100	++ColorCount[*SUColors.begin()];
1101	}
1102	}
1103	}
1104
1105	void SIScheduleBlockCreator::cutHugeBlocks() {
1106	// TODO
1107	}
1108
1109	void SIScheduleBlockCreator::regroupNoUserInstructions() {
1110	unsigned DAGSize = DAG->SUnits.size();
1111	int GroupID = NextNonReservedID++;
1112
1113	for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1114	SUnit *SU = &DAG->SUnits[SUNum];
1115	bool hasSuccessor = false;
1116
1117	if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1118	continue;
1119
1120	for (SDep& SuccDep : SU->Succs) {
1121	SUnit *Succ = SuccDep.getSUnit();
1122	if (SuccDep.isWeak() \|\| Succ->NodeNum >= DAGSize)
1123	continue;
1124	hasSuccessor = true;
1125	}
1126	if (!hasSuccessor)
1127	CurrentColoring[SU->NodeNum] = GroupID;
1128	}
1129	}
1130
1131	void SIScheduleBlockCreator::colorExports() {
1132	unsigned ExportColor = NextNonReservedID++;
1133	SmallVector<unsigned, 8> ExpGroup;
1134
1135	// Put all exports together in a block.
1136	// The block will naturally end up being scheduled last,
1137	// thus putting exports at the end of the schedule, which
1138	// is better for performance.
1139	// However we must ensure, for safety, the exports can be put
1140	// together in the same block without any other instruction.
1141	// This could happen, for example, when scheduling after regalloc
1142	// if reloading a spilled register from memory using the same
1143	// register than used in a previous export.
1144	// If that happens, do not regroup the exports.
1145	for (unsigned SUNum : DAG->TopDownIndex2SU) {
1146	const SUnit &SU = DAG->SUnits[SUNum];
1147	if (SIInstrInfo::isEXP(*SU.getInstr())) {
1148	// Check the EXP can be added to the group safely,
1149	// ie without needing any other instruction.
1150	// The EXP is allowed to depend on other EXP
1151	// (they will be in the same group).
1152	for (unsigned j : ExpGroup) {
1153	bool HasSubGraph;
1154	std::vector<int> SubGraph;
1155	// By construction (topological order), if SU and
1156	// DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary
1157	// in the parent graph of SU.
1158	#ifndef NDEBUG
1159	SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
1160	HasSubGraph);
1161	assert(!HasSubGraph)((!HasSubGraph) ? static_cast<void> (0) : __assert_fail ("!HasSubGraph", "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 1161, __PRETTY_FUNCTION__));
1162	#endif
1163	SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,
1164	HasSubGraph);
1165	if (!HasSubGraph)
1166	continue; // No dependencies between each other
1167
1168	// SubGraph contains all the instructions required
1169	// between EXP SUnits[j] and EXP SU.
1170	for (unsigned k : SubGraph) {
1171	if (!SIInstrInfo::isEXP(*DAG->SUnits[k].getInstr()))
1172	// Other instructions than EXP would be required in the group.
1173	// Abort the groupping.
1174	return;
1175	}
1176	}
1177
1178	ExpGroup.push_back(SUNum);
1179	}
1180	}
1181
1182	// The group can be formed. Give the color.
1183	for (unsigned j : ExpGroup)
1184	CurrentColoring[j] = ExportColor;
1185	}
1186
1187	void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant) {
1188	unsigned DAGSize = DAG->SUnits.size();
1189	std::map<unsigned,unsigned> RealID;
1190
1191	CurrentBlocks.clear();
1192	CurrentColoring.clear();
1193	CurrentColoring.resize(DAGSize, 0);
1194	Node2CurrentBlock.clear();
1195
1196	// Restore links previous scheduling variant has overridden.
1197	DAG->restoreSULinksLeft();
1198
1199	NextReservedID = 1;
1200	NextNonReservedID = DAGSize + 1;
1201
1202	LLVM_DEBUG(dbgs() << "Coloring the graph\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Coloring the graph\n" ; } } while (false);
1203
1204	if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesGrouped)
1205	colorHighLatenciesGroups();
1206	else
1207	colorHighLatenciesAlone();
1208	colorComputeReservedDependencies();
1209	colorAccordingToReservedDependencies();
1210	colorEndsAccordingToDependencies();
1211	if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesAlonePlusConsecutive)
1212	colorForceConsecutiveOrderInGroup();
1213	regroupNoUserInstructions();
1214	colorMergeConstantLoadsNextGroup();
1215	colorMergeIfPossibleNextGroupOnlyForReserved();
1216	colorExports();
1217
1218	// Put SUs of same color into same block
1219	Node2CurrentBlock.resize(DAGSize, -1);
1220	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1221	SUnit *SU = &DAG->SUnits[i];
1222	unsigned Color = CurrentColoring[SU->NodeNum];
1223	if (RealID.find(Color) == RealID.end()) {
1224	int ID = CurrentBlocks.size();
1225	BlockPtrs.push_back(std::make_unique<SIScheduleBlock>(DAG, this, ID));
1226	CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
1227	RealID[Color] = ID;
1228	}
1229	CurrentBlocks[RealID[Color]]->addUnit(SU);
1230	Node2CurrentBlock[SU->NodeNum] = RealID[Color];
1231	}
1232
1233	// Build dependencies between blocks.
1234	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1235	SUnit *SU = &DAG->SUnits[i];
1236	int SUID = Node2CurrentBlock[i];
1237	for (SDep& SuccDep : SU->Succs) {
1238	SUnit *Succ = SuccDep.getSUnit();
1239	if (SuccDep.isWeak() \|\| Succ->NodeNum >= DAGSize)
1240	continue;
1241	if (Node2CurrentBlock[Succ->NodeNum] != SUID)
1242	CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]],
1243	SuccDep.isCtrl() ? NoData : Data);
1244	}
1245	for (SDep& PredDep : SU->Preds) {
1246	SUnit *Pred = PredDep.getSUnit();
1247	if (PredDep.isWeak() \|\| Pred->NodeNum >= DAGSize)
1248	continue;
1249	if (Node2CurrentBlock[Pred->NodeNum] != SUID)
1250	CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->NodeNum]]);
1251	}
1252	}
1253
1254	// Free root and leafs of all blocks to enable scheduling inside them.
1255	for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
1256	SIScheduleBlock *Block = CurrentBlocks[i];
1257	Block->finalizeUnits();
1258	}
1259	LLVM_DEBUG(dbgs() << "Blocks created:\n\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Blocks created:\n\n" ; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug (true); }; } } while (false)
1260	for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Blocks created:\n\n" ; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug (true); }; } } while (false)
1261	SIScheduleBlock Block = CurrentBlocks[i];do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Blocks created:\n\n" ; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) { SIScheduleBlock Block = CurrentBlocks[i]; Block->printDebug (true); }; } } while (false)
1262	Block->printDebug(true);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Blocks created:\n\n" ; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug (true); }; } } while (false)
1263	})do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Blocks created:\n\n" ; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug (true); }; } } while (false);
1264	}
1265
1266	// Two functions taken from Codegen/MachineScheduler.cpp
1267
1268	/// Non-const version.
1269	static MachineBasicBlock::iterator
1270	nextIfDebug(MachineBasicBlock::iterator I,
1271	MachineBasicBlock::const_iterator End) {
1272	for (; I != End; ++I) {
1273	if (!I->isDebugInstr())
1274	break;
1275	}
1276	return I;
1277	}
1278
1279	void SIScheduleBlockCreator::topologicalSort() {
1280	unsigned DAGSize = CurrentBlocks.size();
1281	std::vector<int> WorkList;
1282
1283	LLVM_DEBUG(dbgs() << "Topological Sort\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Topological Sort\n" ; } } while (false);
1284
1285	WorkList.reserve(DAGSize);
1286	TopDownIndex2Block.resize(DAGSize);
1287	TopDownBlock2Index.resize(DAGSize);
1288	BottomUpIndex2Block.resize(DAGSize);
1289
1290	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1291	SIScheduleBlock *Block = CurrentBlocks[i];
1292	unsigned Degree = Block->getSuccs().size();
1293	TopDownBlock2Index[i] = Degree;
1294	if (Degree == 0) {
1295	WorkList.push_back(i);
1296	}
1297	}
1298
1299	int Id = DAGSize;
1300	while (!WorkList.empty()) {
1301	int i = WorkList.back();
1302	SIScheduleBlock *Block = CurrentBlocks[i];
1303	WorkList.pop_back();
1304	TopDownBlock2Index[i] = --Id;
1305	TopDownIndex2Block[Id] = i;
1306	for (SIScheduleBlock* Pred : Block->getPreds()) {
1307	if (!--TopDownBlock2Index[Pred->getID()])
1308	WorkList.push_back(Pred->getID());
1309	}
1310	}
1311
1312	#ifndef NDEBUG
1313	// Check correctness of the ordering.
1314	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1315	SIScheduleBlock *Block = CurrentBlocks[i];
1316	for (SIScheduleBlock* Pred : Block->getPreds()) {
1317	assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&((TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID ()] && "Wrong Top Down topological sorting") ? static_cast <void> (0) : __assert_fail ("TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] && \"Wrong Top Down topological sorting\"" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 1318, __PRETTY_FUNCTION__))
1318	"Wrong Top Down topological sorting")((TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID ()] && "Wrong Top Down topological sorting") ? static_cast <void> (0) : __assert_fail ("TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] && \"Wrong Top Down topological sorting\"" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 1318, __PRETTY_FUNCTION__));
1319	}
1320	}
1321	#endif
1322
1323	BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
1324	TopDownIndex2Block.rend());
1325	}
1326
1327	void SIScheduleBlockCreator::scheduleInsideBlocks() {
1328	unsigned DAGSize = CurrentBlocks.size();
1329
1330	LLVM_DEBUG(dbgs() << "\nScheduling Blocks\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "\nScheduling Blocks\n\n" ; } } while (false);
1331
1332	// We do schedule a valid scheduling such that a Block corresponds
1333	// to a range of instructions.
1334	LLVM_DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "First phase: Fast scheduling for Reg Liveness\n" ; } } while (false);
1335	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1336	SIScheduleBlock *Block = CurrentBlocks[i];
1337	Block->fastSchedule();
1338	}
1339
1340	// Note: the following code, and the part restoring previous position
1341	// is by far the most expensive operation of the Scheduler.
1342
1343	// Do not update CurrentTop.
1344	MachineBasicBlock::iterator CurrentTopFastSched = DAG->getCurrentTop();
1345	std::vector<MachineBasicBlock::iterator> PosOld;
1346	std::vector<MachineBasicBlock::iterator> PosNew;
1347	PosOld.reserve(DAG->SUnits.size());
1348	PosNew.reserve(DAG->SUnits.size());
1349
1350	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1351	int BlockIndice = TopDownIndex2Block[i];
1352	SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1353	std::vector<SUnit*> SUs = Block->getScheduledUnits();
1354
1355	for (SUnit* SU : SUs) {
1356	MachineInstr *MI = SU->getInstr();
1357	MachineBasicBlock::iterator Pos = MI;
1358	PosOld.push_back(Pos);
1359	if (&*CurrentTopFastSched == MI) {
1360	PosNew.push_back(Pos);
1361	CurrentTopFastSched = nextIfDebug(++CurrentTopFastSched,
1362	DAG->getCurrentBottom());
1363	} else {
1364	// Update the instruction stream.
1365	DAG->getBB()->splice(CurrentTopFastSched, DAG->getBB(), MI);
1366
1367	// Update LiveIntervals.
1368	// Note: Moving all instructions and calling handleMove every time
1369	// is the most cpu intensive operation of the scheduler.
1370	// It would gain a lot if there was a way to recompute the
1371	// LiveIntervals for the entire scheduling region.
1372	DAG->getLIS()->handleMove(MI, /UpdateFlags=*/true);
1373	PosNew.push_back(CurrentTopFastSched);
1374	}
1375	}
1376	}
1377
1378	// Now we have Block of SUs == Block of MI.
1379	// We do the final schedule for the instructions inside the block.
1380	// The property that all the SUs of the Block are grouped together as MI
1381	// is used for correct reg usage tracking.
1382	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1383	SIScheduleBlock *Block = CurrentBlocks[i];
1384	std::vector<SUnit*> SUs = Block->getScheduledUnits();
1385	Block->schedule((SUs.begin())->getInstr(), (SUs.rbegin())->getInstr());
1386	}
1387
1388	LLVM_DEBUG(dbgs() << "Restoring MI Pos\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Restoring MI Pos\n" ; } } while (false);
1389	// Restore old ordering (which prevents a LIS->handleMove bug).
1390	for (unsigned i = PosOld.size(), e = 0; i != e; --i) {
1391	MachineBasicBlock::iterator POld = PosOld[i-1];
1392	MachineBasicBlock::iterator PNew = PosNew[i-1];
1393	if (PNew != POld) {
1394	// Update the instruction stream.
1395	DAG->getBB()->splice(POld, DAG->getBB(), PNew);
1396
1397	// Update LiveIntervals.
1398	DAG->getLIS()->handleMove(POld, /UpdateFlags=*/true);
1399	}
1400	}
1401
1402	LLVM_DEBUG(for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks .size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks [i]; Block->printDebug(true); }; } } while (false)
1403	SIScheduleBlock Block = CurrentBlocks[i];do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks .size(); i != e; ++i) { SIScheduleBlock Block = CurrentBlocks [i]; Block->printDebug(true); }; } } while (false)
1404	Block->printDebug(true);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks .size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks [i]; Block->printDebug(true); }; } } while (false)
1405	})do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks .size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks [i]; Block->printDebug(true); }; } } while (false);
1406	}
1407
1408	void SIScheduleBlockCreator::fillStats() {
1409	unsigned DAGSize = CurrentBlocks.size();
1410
1411	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1412	int BlockIndice = TopDownIndex2Block[i];
1413	SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1414	if (Block->getPreds().empty())
1415	Block->Depth = 0;
1416	else {
1417	unsigned Depth = 0;
1418	for (SIScheduleBlock *Pred : Block->getPreds()) {
1419	if (Depth < Pred->Depth + Pred->getCost())
1420	Depth = Pred->Depth + Pred->getCost();
1421	}
1422	Block->Depth = Depth;
1423	}
1424	}
1425
1426	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1427	int BlockIndice = BottomUpIndex2Block[i];
1428	SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1429	if (Block->getSuccs().empty())
1430	Block->Height = 0;
1431	else {
1432	unsigned Height = 0;
1433	for (const auto &Succ : Block->getSuccs())
1434	Height = std::max(Height, Succ.first->Height + Succ.first->getCost());
1435	Block->Height = Height;
1436	}
1437	}
1438	}
1439
1440	// SIScheduleBlockScheduler //
1441
1442	SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
1443	SISchedulerBlockSchedulerVariant Variant,
1444	SIScheduleBlocks BlocksStruct) :
1445	DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks),
1446	LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
1447	SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
1448
1449	// Fill the usage of every output
1450	// Warning: while by construction we always have a link between two blocks
1451	// when one needs a result from the other, the number of users of an output
1452	// is not the sum of child blocks having as input the same virtual register.
1453	// Here is an example. A produces x and y. B eats x and produces x'.
1454	// C eats x' and y. The register coalescer may have attributed the same
1455	// virtual register to x and x'.
1456	// To count accurately, we do a topological sort. In case the register is
1457	// found for several parents, we increment the usage of the one with the
1458	// highest topological index.
1459	LiveOutRegsNumUsages.resize(Blocks.size());
1460	for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1461	SIScheduleBlock *Block = Blocks[i];
1462	for (unsigned Reg : Block->getInRegs()) {
1463	bool Found = false;
1464	int topoInd = -1;
1465	for (SIScheduleBlock* Pred: Block->getPreds()) {
1466	std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1467	std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1468
1469	if (RegPos != PredOutRegs.end()) {
1470	Found = true;
1471	if (topoInd < BlocksStruct.TopDownBlock2Index[Pred->getID()]) {
1472	topoInd = BlocksStruct.TopDownBlock2Index[Pred->getID()];
1473	}
1474	}
1475	}
1476
1477	if (!Found)
1478	continue;
1479
1480	int PredID = BlocksStruct.TopDownIndex2Block[topoInd];
1481	++LiveOutRegsNumUsages[PredID][Reg];
1482	}
1483	}
1484
1485	LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0);
1486	BlockNumPredsLeft.resize(Blocks.size());
1487	BlockNumSuccsLeft.resize(Blocks.size());
1488
1489	for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1490	SIScheduleBlock *Block = Blocks[i];
1491	BlockNumPredsLeft[i] = Block->getPreds().size();
1492	BlockNumSuccsLeft[i] = Block->getSuccs().size();
1493	}
1494
1495	#ifndef NDEBUG
1496	for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1497	SIScheduleBlock *Block = Blocks[i];
1498	assert(Block->getID() == i)((Block->getID() == i) ? static_cast<void> (0) : __assert_fail ("Block->getID() == i", "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 1498, __PRETTY_FUNCTION__));
1499	}
1500	#endif
1501
1502	std::set<unsigned> InRegs = DAG->getInRegs();
1503	addLiveRegs(InRegs);
1504
1505	// Increase LiveOutRegsNumUsages for blocks
1506	// producing registers consumed in another
1507	// scheduling region.
1508	for (unsigned Reg : DAG->getOutRegs()) {
1509	for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1510	// Do reverse traversal
1511	int ID = BlocksStruct.TopDownIndex2Block[Blocks.size()-1-i];
1512	SIScheduleBlock *Block = Blocks[ID];
1513	const std::set<unsigned> &OutRegs = Block->getOutRegs();
1514
1515	if (OutRegs.find(Reg) == OutRegs.end())
1516	continue;
1517
1518	++LiveOutRegsNumUsages[ID][Reg];
1519	break;
1520	}
1521	}
1522
1523	// Fill LiveRegsConsumers for regs that were already
1524	// defined before scheduling.
1525	for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1526	SIScheduleBlock *Block = Blocks[i];
1527	for (unsigned Reg : Block->getInRegs()) {
1528	bool Found = false;
1529	for (SIScheduleBlock* Pred: Block->getPreds()) {
1530	std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1531	std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1532
1533	if (RegPos != PredOutRegs.end()) {
1534	Found = true;
1535	break;
1536	}
1537	}
1538
1539	if (!Found)
1540	++LiveRegsConsumers[Reg];
1541	}
1542	}
1543
1544	for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1545	SIScheduleBlock *Block = Blocks[i];
1546	if (BlockNumPredsLeft[i] == 0) {
1547	ReadyBlocks.push_back(Block);
1548	}
1549	}
1550
1551	while (SIScheduleBlock *Block = pickBlock()) {
1552	BlocksScheduled.push_back(Block);
1553	blockScheduled(Block);
1554	}
1555
1556	LLVM_DEBUG(dbgs() << "Block Order:"; for (SIScheduleBlock Blockdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Block Order:"; for ( SIScheduleBlock Block : BlocksScheduled) { dbgs() << ' ' << Block->getID(); } dbgs() << '\n';; } } while (false)
1557	: BlocksScheduled) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Block Order:"; for ( SIScheduleBlock *Block : BlocksScheduled) { dbgs() << ' ' << Block->getID(); } dbgs() << '\n';; } } while (false)
1558	dbgs() << ' ' << Block->getID();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Block Order:"; for ( SIScheduleBlock *Block : BlocksScheduled) { dbgs() << ' ' << Block->getID(); } dbgs() << '\n';; } } while (false)
1559	} dbgs() << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Block Order:"; for ( SIScheduleBlock *Block : BlocksScheduled) { dbgs() << ' ' << Block->getID(); } dbgs() << '\n';; } } while (false);
1560	}
1561
1562	bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
1563	SIBlockSchedCandidate &TryCand) {
1564	if (!Cand.isValid()) {
1565	TryCand.Reason = NodeOrder;
1566	return true;
1567	}
1568
1569	// Try to hide high latencies.
1570	if (SISched::tryLess(TryCand.LastPosHighLatParentScheduled,
1571	Cand.LastPosHighLatParentScheduled, TryCand, Cand, Latency))
1572	return true;
1573	// Schedule high latencies early so you can hide them better.
1574	if (SISched::tryGreater(TryCand.IsHighLatency, Cand.IsHighLatency,
1575	TryCand, Cand, Latency))
1576	return true;
1577	if (TryCand.IsHighLatency && SISched::tryGreater(TryCand.Height, Cand.Height,
1578	TryCand, Cand, Depth))
1579	return true;
1580	if (SISched::tryGreater(TryCand.NumHighLatencySuccessors,
1581	Cand.NumHighLatencySuccessors,
1582	TryCand, Cand, Successor))
1583	return true;
1584	return false;
1585	}
1586
1587	bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
1588	SIBlockSchedCandidate &TryCand) {
1589	if (!Cand.isValid()) {
1590	TryCand.Reason = NodeOrder;
1591	return true;
1592	}
1593
1594	if (SISched::tryLess(TryCand.VGPRUsageDiff > 0, Cand.VGPRUsageDiff > 0,
1595	TryCand, Cand, RegUsage))
1596	return true;
1597	if (SISched::tryGreater(TryCand.NumSuccessors > 0,
1598	Cand.NumSuccessors > 0,
1599	TryCand, Cand, Successor))
1600	return true;
1601	if (SISched::tryGreater(TryCand.Height, Cand.Height, TryCand, Cand, Depth))
1602	return true;
1603	if (SISched::tryLess(TryCand.VGPRUsageDiff, Cand.VGPRUsageDiff,
1604	TryCand, Cand, RegUsage))
1605	return true;
1606	return false;
1607	}
1608
1609	SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() {
1610	SIBlockSchedCandidate Cand;
1611	std::vector<SIScheduleBlock*>::iterator Best;
1612	SIScheduleBlock *Block;
1613	if (ReadyBlocks.empty())
1614	return nullptr;
1615
1616	DAG->fillVgprSgprCost(LiveRegs.begin(), LiveRegs.end(),
1617	VregCurrentUsage, SregCurrentUsage);
1618	if (VregCurrentUsage > maxVregUsage)
1619	maxVregUsage = VregCurrentUsage;
1620	if (SregCurrentUsage > maxSregUsage)
1621	maxSregUsage = SregCurrentUsage;
1622	LLVM_DEBUG(dbgs() << "Picking New Blocks\n"; dbgs() << "Available: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking New Blocks\n" ; dbgs() << "Available: "; for (SIScheduleBlock *Block : ReadyBlocks) dbgs() << Block->getID() << ' '; dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs ) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';; } } while (false)
1623	for (SIScheduleBlock Blockdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking New Blocks\n" ; dbgs() << "Available: "; for (SIScheduleBlock Block : ReadyBlocks) dbgs() << Block->getID() << ' '; dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs ) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';; } } while (false)
1624	: ReadyBlocks) dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking New Blocks\n" ; dbgs() << "Available: "; for (SIScheduleBlock *Block : ReadyBlocks) dbgs() << Block->getID() << ' '; dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs ) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';; } } while (false)
1625	<< Block->getID() << ' ';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking New Blocks\n" ; dbgs() << "Available: "; for (SIScheduleBlock *Block : ReadyBlocks) dbgs() << Block->getID() << ' '; dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs ) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';; } } while (false)
1626	dbgs() << "\nCurrent Live:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking New Blocks\n" ; dbgs() << "Available: "; for (SIScheduleBlock *Block : ReadyBlocks) dbgs() << Block->getID() << ' '; dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs ) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';; } } while (false)
1627	for (unsigned Regdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking New Blocks\n" ; dbgs() << "Available: "; for (SIScheduleBlock *Block : ReadyBlocks) dbgs() << Block->getID() << ' '; dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs ) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';; } } while (false)
1628	: LiveRegs) dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking New Blocks\n" ; dbgs() << "Available: "; for (SIScheduleBlock *Block : ReadyBlocks) dbgs() << Block->getID() << ' '; dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs ) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';; } } while (false)
1629	<< printVRegOrUnit(Reg, DAG->getTRI()) << ' ';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking New Blocks\n" ; dbgs() << "Available: "; for (SIScheduleBlock *Block : ReadyBlocks) dbgs() << Block->getID() << ' '; dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs ) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';; } } while (false)
1630	dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking New Blocks\n" ; dbgs() << "Available: "; for (SIScheduleBlock *Block : ReadyBlocks) dbgs() << Block->getID() << ' '; dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs ) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';; } } while (false)
1631	dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking New Blocks\n" ; dbgs() << "Available: "; for (SIScheduleBlock *Block : ReadyBlocks) dbgs() << Block->getID() << ' '; dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs ) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';; } } while (false)
1632	dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking New Blocks\n" ; dbgs() << "Available: "; for (SIScheduleBlock *Block : ReadyBlocks) dbgs() << Block->getID() << ' '; dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs ) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';; } } while (false);
1633
1634	Cand.Block = nullptr;
1635	for (std::vector<SIScheduleBlock*>::iterator I = ReadyBlocks.begin(),
1636	E = ReadyBlocks.end(); I != E; ++I) {
1637	SIBlockSchedCandidate TryCand;
1638	TryCand.Block = *I;
1639	TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
1640	TryCand.VGPRUsageDiff =
1641	checkRegUsageImpact(TryCand.Block->getInRegs(),
1642	TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32];
1643	TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
1644	TryCand.NumHighLatencySuccessors =
1645	TryCand.Block->getNumHighLatencySuccessors();
1646	TryCand.LastPosHighLatParentScheduled =
1647	(unsigned int) std::max<int> (0,
1648	LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
1649	LastPosWaitedHighLatency);
1650	TryCand.Height = TryCand.Block->Height;
1651	// Try not to increase VGPR usage too much, else we may spill.
1652	if (VregCurrentUsage > 120 \|\|
1653	Variant != SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage) {
1654	if (!tryCandidateRegUsage(Cand, TryCand) &&
1655	Variant != SISchedulerBlockSchedulerVariant::BlockRegUsage)
1656	tryCandidateLatency(Cand, TryCand);
1657	} else {
1658	if (!tryCandidateLatency(Cand, TryCand))
1659	tryCandidateRegUsage(Cand, TryCand);
1660	}
1661	if (TryCand.Reason != NoCand) {
1662	Cand.setBest(TryCand);
1663	Best = I;
1664	LLVM_DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' 'do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' ' << getReasonStr (Cand.Reason) << '\n'; } } while (false)
1665	<< getReasonStr(Cand.Reason) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' ' << getReasonStr (Cand.Reason) << '\n'; } } while (false);
1666	}
1667	}
1668
1669	LLVM_DEBUG(dbgs() << "Picking: " << Cand.Block->getID() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking: " << Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: " << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() << "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled << '\n'; dbgs() << "VGPRUsageDiff: " << Cand .VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while (false)
1670	dbgs() << "Is a block with high latency instruction: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking: " << Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: " << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() << "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled << '\n'; dbgs() << "VGPRUsageDiff: " << Cand .VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while (false)
1671	<< (Cand.IsHighLatency ? "yes\n" : "no\n");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking: " << Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: " << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() << "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled << '\n'; dbgs() << "VGPRUsageDiff: " << Cand .VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while (false)
1672	dbgs() << "Position of last high latency dependency: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking: " << Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: " << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() << "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled << '\n'; dbgs() << "VGPRUsageDiff: " << Cand .VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while (false)
1673	<< Cand.LastPosHighLatParentScheduled << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking: " << Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: " << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() << "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled << '\n'; dbgs() << "VGPRUsageDiff: " << Cand .VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while (false)
1674	dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking: " << Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: " << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() << "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled << '\n'; dbgs() << "VGPRUsageDiff: " << Cand .VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while (false)
1675	dbgs() << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking: " << Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: " << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() << "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled << '\n'; dbgs() << "VGPRUsageDiff: " << Cand .VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while (false);
1676
1677	Block = Cand.Block;
1678	ReadyBlocks.erase(Best);
1679	return Block;
1680	}
1681
1682	// Tracking of currently alive registers to determine VGPR Usage.
1683
1684	void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
1685	for (Register Reg : Regs) {
1686	// For now only track virtual registers.
1687	if (!Reg.isVirtual())
1688	continue;
1689	// If not already in the live set, then add it.
1690	(void) LiveRegs.insert(Reg);
1691	}
1692	}
1693
1694	void SIScheduleBlockScheduler::decreaseLiveRegs(SIScheduleBlock *Block,
1695	std::set<unsigned> &Regs) {
1696	for (unsigned Reg : Regs) {
1697	// For now only track virtual registers.
1698	std::set<unsigned>::iterator Pos = LiveRegs.find(Reg);
1699	assert (Pos != LiveRegs.end() && // Reg must be live.((Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg ) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg ] >= 1) ? static_cast<void> (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 1701, __PRETTY_FUNCTION__))
1700	LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&((Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg ) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg ] >= 1) ? static_cast<void> (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 1701, __PRETTY_FUNCTION__))
1701	LiveRegsConsumers[Reg] >= 1)((Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg ) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg ] >= 1) ? static_cast<void> (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 1701, __PRETTY_FUNCTION__));
1702	--LiveRegsConsumers[Reg];
1703	if (LiveRegsConsumers[Reg] == 0)
1704	LiveRegs.erase(Pos);
1705	}
1706	}
1707
1708	void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) {
1709	for (const auto &Block : Parent->getSuccs()) {
1710	if (--BlockNumPredsLeft[Block.first->getID()] == 0)
1711	ReadyBlocks.push_back(Block.first);
1712
1713	if (Parent->isHighLatencyBlock() &&
1714	Block.second == SIScheduleBlockLinkKind::Data)
1715	LastPosHighLatencyParentScheduled[Block.first->getID()] = NumBlockScheduled;
1716	}
1717	}
1718
1719	void SIScheduleBlockScheduler::blockScheduled(SIScheduleBlock *Block) {
1720	decreaseLiveRegs(Block, Block->getInRegs());
1721	addLiveRegs(Block->getOutRegs());
1722	releaseBlockSuccs(Block);
1723	for (std::map<unsigned, unsigned>::iterator RegI =
1724	LiveOutRegsNumUsages[Block->getID()].begin(),
1725	E = LiveOutRegsNumUsages[Block->getID()].end(); RegI != E; ++RegI) {
1726	std::pair<unsigned, unsigned> RegP = *RegI;
1727	// We produce this register, thus it must not be previously alive.
1728	assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() \|\|((LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end () \|\| LiveRegsConsumers[RegP.first] == 0) ? static_cast<void > (0) : __assert_fail ("LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() \|\| LiveRegsConsumers[RegP.first] == 0" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 1729, __PRETTY_FUNCTION__))
1729	LiveRegsConsumers[RegP.first] == 0)((LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end () \|\| LiveRegsConsumers[RegP.first] == 0) ? static_cast<void > (0) : __assert_fail ("LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() \|\| LiveRegsConsumers[RegP.first] == 0" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 1729, __PRETTY_FUNCTION__));
1730	LiveRegsConsumers[RegP.first] += RegP.second;
1731	}
1732	if (LastPosHighLatencyParentScheduled[Block->getID()] >
1733	(unsigned)LastPosWaitedHighLatency)
1734	LastPosWaitedHighLatency =
1735	LastPosHighLatencyParentScheduled[Block->getID()];
1736	++NumBlockScheduled;
1737	}
1738
1739	std::vector<int>
1740	SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
1741	std::set<unsigned> &OutRegs) {
1742	std::vector<int> DiffSetPressure;
1743	DiffSetPressure.assign(DAG->getTRI()->getNumRegPressureSets(), 0);
1744
1745	for (Register Reg : InRegs) {
1746	// For now only track virtual registers.
1747	if (!Reg.isVirtual())
1748	continue;
1749	if (LiveRegsConsumers[Reg] > 1)
1750	continue;
1751	PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
1752	for (; PSetI.isValid(); ++PSetI) {
1753	DiffSetPressure[*PSetI] -= PSetI.getWeight();
1754	}
1755	}
1756
1757	for (Register Reg : OutRegs) {
1758	// For now only track virtual registers.
1759	if (!Reg.isVirtual())
1760	continue;
1761	PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
1762	for (; PSetI.isValid(); ++PSetI) {
1763	DiffSetPressure[*PSetI] += PSetI.getWeight();
1764	}
1765	}
1766
1767	return DiffSetPressure;
1768	}
1769
1770	// SIScheduler //
1771
1772	struct SIScheduleBlockResult
1773	SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
1774	SISchedulerBlockSchedulerVariant ScheduleVariant) {
1775	SIScheduleBlocks Blocks = BlockCreator.getBlocks(BlockVariant);
1776	SIScheduleBlockScheduler Scheduler(DAG, ScheduleVariant, Blocks);
1777	std::vector<SIScheduleBlock*> ScheduledBlocks;
1778	struct SIScheduleBlockResult Res;
1779
1780	ScheduledBlocks = Scheduler.getBlocks();
1781
1782	for (unsigned b = 0; b < ScheduledBlocks.size(); ++b) {
1783	SIScheduleBlock *Block = ScheduledBlocks[b];
1784	std::vector<SUnit*> SUs = Block->getScheduledUnits();
1785
1786	for (SUnit* SU : SUs)
1787	Res.SUs.push_back(SU->NodeNum);
1788	}
1789
1790	Res.MaxSGPRUsage = Scheduler.getSGPRUsage();
1791	Res.MaxVGPRUsage = Scheduler.getVGPRUsage();
1792	return Res;
1793	}
1794
1795	// SIScheduleDAGMI //
1796
1797	SIScheduleDAGMI::SIScheduleDAGMI(MachineSchedContext *C) :
1798	ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C)) {
1799	SITII = static_cast<const SIInstrInfo*>(TII);
1800	SITRI = static_cast<const SIRegisterInfo*>(TRI);
1801	}
1802
1803	SIScheduleDAGMI::~SIScheduleDAGMI() = default;
1804
1805	// Code adapted from scheduleDAG.cpp
1806	// Does a topological sort over the SUs.
1807	// Both TopDown and BottomUp
1808	void SIScheduleDAGMI::topologicalSort() {
1809	Topo.InitDAGTopologicalSorting();
1810
1811	TopDownIndex2SU = std::vector<int>(Topo.begin(), Topo.end());
1812	BottomUpIndex2SU = std::vector<int>(Topo.rbegin(), Topo.rend());
1813	}
1814
1815	// Move low latencies further from their user without
1816	// increasing SGPR usage (in general)
1817	// This is to be replaced by a better pass that would
1818	// take into account SGPR usage (based on VGPR Usage
1819	// and the corresponding wavefront count), that would
1820	// try to merge groups of loads if it make sense, etc
1821	void SIScheduleDAGMI::moveLowLatencies() {
1822	unsigned DAGSize = SUnits.size();
1823	int LastLowLatencyUser = -1;
1824	int LastLowLatencyPos = -1;
1825
1826	for (unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {
1827	SUnit *SU = &SUnits[ScheduledSUnits[i]];
1828	bool IsLowLatencyUser = false;
1829	unsigned MinPos = 0;
1830
1831	for (SDep& PredDep : SU->Preds) {
1832	SUnit *Pred = PredDep.getSUnit();
1833	if (SITII->isLowLatencyInstruction(*Pred->getInstr())) {
1834	IsLowLatencyUser = true;
1835	}
1836	if (Pred->NodeNum >= DAGSize)
1837	continue;
1838	unsigned PredPos = ScheduledSUnitsInv[Pred->NodeNum];
1839	if (PredPos >= MinPos)
1840	MinPos = PredPos + 1;
1841	}
1842
1843	if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
1844	unsigned BestPos = LastLowLatencyUser + 1;
1845	if ((int)BestPos <= LastLowLatencyPos)
1846	BestPos = LastLowLatencyPos + 1;
1847	if (BestPos < MinPos)
1848	BestPos = MinPos;
1849	if (BestPos < i) {
1850	for (unsigned u = i; u > BestPos; --u) {
1851	++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1852	ScheduledSUnits[u] = ScheduledSUnits[u-1];
1853	}
1854	ScheduledSUnits[BestPos] = SU->NodeNum;
1855	ScheduledSUnitsInv[SU->NodeNum] = BestPos;
1856	}
1857	LastLowLatencyPos = BestPos;
1858	if (IsLowLatencyUser)
1859	LastLowLatencyUser = BestPos;
1860	} else if (IsLowLatencyUser) {
1861	LastLowLatencyUser = i;
1862	// Moves COPY instructions on which depends
1863	// the low latency instructions too.
1864	} else if (SU->getInstr()->getOpcode() == AMDGPU::COPY) {
1865	bool CopyForLowLat = false;
1866	for (SDep& SuccDep : SU->Succs) {
1867	SUnit *Succ = SuccDep.getSUnit();
1868	if (SuccDep.isWeak() \|\| Succ->NodeNum >= DAGSize)
1869	continue;
1870	if (SITII->isLowLatencyInstruction(*Succ->getInstr())) {
1871	CopyForLowLat = true;
1872	}
1873	}
1874	if (!CopyForLowLat)
1875	continue;
1876	if (MinPos < i) {
1877	for (unsigned u = i; u > MinPos; --u) {
1878	++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1879	ScheduledSUnits[u] = ScheduledSUnits[u-1];
1880	}
1881	ScheduledSUnits[MinPos] = SU->NodeNum;
1882	ScheduledSUnitsInv[SU->NodeNum] = MinPos;
1883	}
1884	}
1885	}
1886	}
1887
1888	void SIScheduleDAGMI::restoreSULinksLeft() {
1889	for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1890	SUnits[i].isScheduled = false;
1891	SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;
1892	SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;
1893	SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;
1894	SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;
1895	}
1896	}
1897
1898	// Return the Vgpr and Sgpr usage corresponding to some virtual registers.
1899	template<typename _Iterator> void
1900	SIScheduleDAGMI::fillVgprSgprCost(_Iterator First, _Iterator End,
1901	unsigned &VgprUsage, unsigned &SgprUsage) {
1902	VgprUsage = 0;
1903	SgprUsage = 0;
1904	for (_Iterator RegI = First; RegI != End; ++RegI) {
1905	Register Reg = *RegI;
1906	// For now only track virtual registers
1907	if (!Reg.isVirtual())
1908	continue;
1909	PSetIterator PSetI = MRI.getPressureSets(Reg);
1910	for (; PSetI.isValid(); ++PSetI) {
1911	if (*PSetI == AMDGPU::RegisterPressureSets::VGPR_32)
1912	VgprUsage += PSetI.getWeight();
1913	else if (*PSetI == AMDGPU::RegisterPressureSets::SReg_32)
1914	SgprUsage += PSetI.getWeight();
1915	}
1916	}
1917	}
1918
1919	void SIScheduleDAGMI::schedule()
1920	{
1921	SmallVector<SUnit*, 8> TopRoots, BotRoots;
1922	SIScheduleBlockResult Best, Temp;
1923	LLVM_DEBUG(dbgs() << "Preparing Scheduling\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Preparing Scheduling\n" ; } } while (false);
1924
1925	buildDAGWithRegPressure();
1926	LLVM_DEBUG(dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dump(); } } while (false);
1927
1928	topologicalSort();
1929	findRootsAndBiasEdges(TopRoots, BotRoots);
1930	// We reuse several ScheduleDAGMI and ScheduleDAGMILive
1931	// functions, but to make them happy we must initialize
1932	// the default Scheduler implementation (even if we do not
1933	// run it)
1934	SchedImpl->initialize(this);
1935	initQueues(TopRoots, BotRoots);
1936
1937	// Fill some stats to help scheduling.
1938
1939	SUnitsLinksBackup = SUnits;
1940	IsLowLatencySU.clear();
1941	LowLatencyOffset.clear();
1942	IsHighLatencySU.clear();
1943
1944	IsLowLatencySU.resize(SUnits.size(), 0);
1945	LowLatencyOffset.resize(SUnits.size(), 0);
1946	IsHighLatencySU.resize(SUnits.size(), 0);
1947
1948	for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
1949	SUnit *SU = &SUnits[i];
1950	const MachineOperand *BaseLatOp;
1951	int64_t OffLatReg;
1952	if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
1953	IsLowLatencySU[i] = 1;
1954	bool OffsetIsScalable;
1955	if (SITII->getMemOperandWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg,
1956	OffsetIsScalable, TRI))
1957	LowLatencyOffset[i] = OffLatReg;
1958	} else if (SITII->isHighLatencyDef(SU->getInstr()->getOpcode()))
1959	IsHighLatencySU[i] = 1;
1960	}
1961
1962	SIScheduler Scheduler(this);
1963	Best = Scheduler.scheduleVariant(SISchedulerBlockCreatorVariant::LatenciesAlone,
1964	SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage);
1965
1966	// if VGPR usage is extremely high, try other good performing variants
1967	// which could lead to lower VGPR usage
1968	if (Best.MaxVGPRUsage > 180) {
1969	static const std::pair<SISchedulerBlockCreatorVariant,
1970	SISchedulerBlockSchedulerVariant>
1971	Variants[] = {
1972	{ LatenciesAlone, BlockRegUsageLatency },
1973	// { LatenciesAlone, BlockRegUsage },
1974	{ LatenciesGrouped, BlockLatencyRegUsage },
1975	// { LatenciesGrouped, BlockRegUsageLatency },
1976	// { LatenciesGrouped, BlockRegUsage },
1977	{ LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
1978	// { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
1979	// { LatenciesAlonePlusConsecutive, BlockRegUsage }
1980	};
1981	for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1982	Temp = Scheduler.scheduleVariant(v.first, v.second);
1983	if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
1984	Best = Temp;
1985	}
1986	}
1987	// if VGPR usage is still extremely high, we may spill. Try other variants
1988	// which are less performing, but that could lead to lower VGPR usage.
1989	if (Best.MaxVGPRUsage > 200) {
1990	static const std::pair<SISchedulerBlockCreatorVariant,
1991	SISchedulerBlockSchedulerVariant>
1992	Variants[] = {
1993	// { LatenciesAlone, BlockRegUsageLatency },
1994	{ LatenciesAlone, BlockRegUsage },
1995	// { LatenciesGrouped, BlockLatencyRegUsage },
1996	{ LatenciesGrouped, BlockRegUsageLatency },
1997	{ LatenciesGrouped, BlockRegUsage },
1998	// { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
1999	{ LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
2000	{ LatenciesAlonePlusConsecutive, BlockRegUsage }
2001	};
2002	for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
2003	Temp = Scheduler.scheduleVariant(v.first, v.second);
2004	if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
2005	Best = Temp;
2006	}
2007	}
2008
2009	ScheduledSUnits = Best.SUs;
2010	ScheduledSUnitsInv.resize(SUnits.size());
2011
2012	for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
2013	ScheduledSUnitsInv[ScheduledSUnits[i]] = i;
2014	}
2015
2016	moveLowLatencies();
2017
2018	// Tell the outside world about the result of the scheduling.
2019
2020	assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker")((TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker" ) ? static_cast<void> (0) : __assert_fail ("TopRPTracker.getPos() == RegionBegin && \"bad initial Top tracker\"" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 2020, __PRETTY_FUNCTION__));
2021	TopRPTracker.setPos(CurrentTop);
2022
2023	for (std::vector<unsigned>::iterator I = ScheduledSUnits.begin(),
2024	E = ScheduledSUnits.end(); I != E; ++I) {
2025	SUnit SU = &SUnits[I];
2026
2027	scheduleMI(SU, true);
2028
2029	LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr(); } } while (false)
2030	<< SU->getInstr())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << SU->getInstr(); } } while (false);
2031	}
2032
2033	assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.")((CurrentTop == CurrentBottom && "Nonempty unscheduled zone." ) ? static_cast<void> (0) : __assert_fail ("CurrentTop == CurrentBottom && \"Nonempty unscheduled zone.\"" , "/build/llvm-toolchain-snapshot-12~++20210103111111+fe9976c02c09/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 2033, __PRETTY_FUNCTION__));
2034
2035	placeDebugValues();
2036
2037	LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { { dbgs() << "*** Final schedule for " << printMBBReference(begin()->getParent()) << " **\n"; dumpSchedule(); dbgs() << '\n'; }; } } while (false)
2038	dbgs() << "* Final schedule for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { { dbgs() << "* Final schedule for " << printMBBReference(begin()->getParent()) << " **\n"; dumpSchedule(); dbgs() << '\n'; }; } } while (false)
2039	<< printMBBReference(begin()->getParent()) << " \n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { { dbgs() << "* Final schedule for " << printMBBReference(begin()->getParent()) << " **\n"; dumpSchedule(); dbgs() << '\n'; }; } } while (false)
2040	dumpSchedule();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { { dbgs() << "*** Final schedule for " << printMBBReference(begin()->getParent()) << " **\n"; dumpSchedule(); dbgs() << '\n'; }; } } while (false)
2041	dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { { dbgs() << "*** Final schedule for " << printMBBReference(begin()->getParent()) << " **\n"; dumpSchedule(); dbgs() << '\n'; }; } } while (false)
2042	})do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { { dbgs() << "*** Final schedule for " << printMBBReference(begin()->getParent()) << " **\n"; dumpSchedule(); dbgs() << '\n'; }; } } while (false);
2043	}