/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp

Bug Summary

File:	llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
Warning:	line 770, column 9 Value stored to 'ProposedColor' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SIMachineScheduler.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/Target/AMDGPU -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/Target/AMDGPU -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-08-28-193554-24367-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp

1	//===-- SIMachineScheduler.cpp - SI Scheduler Interface -------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// SI Machine Scheduler interface
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "SIMachineScheduler.h"
15	#include "SIInstrInfo.h"
16	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17	#include "llvm/CodeGen/LiveIntervals.h"
18	#include "llvm/CodeGen/MachineRegisterInfo.h"
19
20	using namespace llvm;
21
22	#define DEBUG_TYPE"machine-scheduler" "machine-scheduler"
23
24	// This scheduler implements a different scheduling algorithm than
25	// GenericScheduler.
26	//
27	// There are several specific architecture behaviours that can't be modelled
28	// for GenericScheduler:
29	// . When accessing the result of an SGPR load instruction, you have to wait
30	// for all the SGPR load instructions before your current instruction to
31	// have finished.
32	// . When accessing the result of an VGPR load instruction, you have to wait
33	// for all the VGPR load instructions previous to the VGPR load instruction
34	// you are interested in to finish.
35	// . The less the register pressure, the best load latencies are hidden
36	//
37	// Moreover some specifities (like the fact a lot of instructions in the shader
38	// have few dependencies) makes the generic scheduler have some unpredictable
39	// behaviours. For example when register pressure becomes high, it can either
40	// manage to prevent register pressure from going too high, or it can
41	// increase register pressure even more than if it hadn't taken register
42	// pressure into account.
43	//
44	// Also some other bad behaviours are generated, like loading at the beginning
45	// of the shader a constant in VGPR you won't need until the end of the shader.
46	//
47	// The scheduling problem for SI can distinguish three main parts:
48	// . Hiding high latencies (texture sampling, etc)
49	// . Hiding low latencies (SGPR constant loading, etc)
50	// . Keeping register usage low for better latency hiding and general
51	// performance
52	//
53	// Some other things can also affect performance, but are hard to predict
54	// (cache usage, the fact the HW can issue several instructions from different
55	// wavefronts if different types, etc)
56	//
57	// This scheduler tries to solve the scheduling problem by dividing it into
58	// simpler sub-problems. It divides the instructions into blocks, schedules
59	// locally inside the blocks where it takes care of low latencies, and then
60	// chooses the order of the blocks by taking care of high latencies.
61	// Dividing the instructions into blocks helps control keeping register
62	// usage low.
63	//
64	// First the instructions are put into blocks.
65	// We want the blocks help control register usage and hide high latencies
66	// later. To help control register usage, we typically want all local
67	// computations, when for example you create a result that can be comsummed
68	// right away, to be contained in a block. Block inputs and outputs would
69	// typically be important results that are needed in several locations of
70	// the shader. Since we do want blocks to help hide high latencies, we want
71	// the instructions inside the block to have a minimal set of dependencies
72	// on high latencies. It will make it easy to pick blocks to hide specific
73	// high latencies.
74	// The block creation algorithm is divided into several steps, and several
75	// variants can be tried during the scheduling process.
76	//
77	// Second the order of the instructions inside the blocks is chosen.
78	// At that step we do take into account only register usage and hiding
79	// low latency instructions
80	//
81	// Third the block order is chosen, there we try to hide high latencies
82	// and keep register usage low.
83	//
84	// After the third step, a pass is done to improve the hiding of low
85	// latencies.
86	//
87	// Actually when talking about 'low latency' or 'high latency' it includes
88	// both the latency to get the cache (or global mem) data go to the register,
89	// and the bandwidth limitations.
90	// Increasing the number of active wavefronts helps hide the former, but it
91	// doesn't solve the latter, thus why even if wavefront count is high, we have
92	// to try have as many instructions hiding high latencies as possible.
93	// The OpenCL doc says for example latency of 400 cycles for a global mem access,
94	// which is hidden by 10 instructions if the wavefront count is 10.
95
96	// Some figures taken from AMD docs:
97	// Both texture and constant L1 caches are 4-way associative with 64 bytes
98	// lines.
99	// Constant cache is shared with 4 CUs.
100	// For texture sampling, the address generation unit receives 4 texture
101	// addresses per cycle, thus we could expect texture sampling latency to be
102	// equivalent to 4 instructions in the very best case (a VGPR is 64 work items,
103	// instructions in a wavefront group are executed every 4 cycles),
104	// or 16 instructions if the other wavefronts associated to the 3 other VALUs
105	// of the CU do texture sampling too. (Don't take these figures too seriously,
106	// as I'm not 100% sure of the computation)
107	// Data exports should get similar latency.
108	// For constant loading, the cache is shader with 4 CUs.
109	// The doc says "a throughput of 16B/cycle for each of the 4 Compute Unit"
110	// I guess if the other CU don't read the cache, it can go up to 64B/cycle.
111	// It means a simple s_buffer_load should take one instruction to hide, as
112	// well as a s_buffer_loadx2 and potentially a s_buffer_loadx8 if on the same
113	// cache line.
114	//
115	// As of today the driver doesn't preload the constants in cache, thus the
116	// first loads get extra latency. The doc says global memory access can be
117	// 300-600 cycles. We do not specially take that into account when scheduling
118	// As we expect the driver to be able to preload the constants soon.
119
120	// common code //
121
122	#ifndef NDEBUG
123
124	static const char *getReasonStr(SIScheduleCandReason Reason) {
125	switch (Reason) {
126	case NoCand: return "NOCAND";
127	case RegUsage: return "REGUSAGE";
128	case Latency: return "LATENCY";
129	case Successor: return "SUCCESSOR";
130	case Depth: return "DEPTH";
131	case NodeOrder: return "ORDER";
132	}
133	llvm_unreachable("Unknown reason!")::llvm::llvm_unreachable_internal("Unknown reason!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 133);
134	}
135
136	#endif
137
138	namespace llvm {
139	namespace SISched {
140	static bool tryLess(int TryVal, int CandVal,
141	SISchedulerCandidate &TryCand,
142	SISchedulerCandidate &Cand,
143	SIScheduleCandReason Reason) {
144	if (TryVal < CandVal) {
145	TryCand.Reason = Reason;
146	return true;
147	}
148	if (TryVal > CandVal) {
149	if (Cand.Reason > Reason)
150	Cand.Reason = Reason;
151	return true;
152	}
153	Cand.setRepeat(Reason);
154	return false;
155	}
156
157	static bool tryGreater(int TryVal, int CandVal,
158	SISchedulerCandidate &TryCand,
159	SISchedulerCandidate &Cand,
160	SIScheduleCandReason Reason) {
161	if (TryVal > CandVal) {
162	TryCand.Reason = Reason;
163	return true;
164	}
165	if (TryVal < CandVal) {
166	if (Cand.Reason > Reason)
167	Cand.Reason = Reason;
168	return true;
169	}
170	Cand.setRepeat(Reason);
171	return false;
172	}
173	} // end namespace SISched
174	} // end namespace llvm
175
176	// SIScheduleBlock //
177
178	void SIScheduleBlock::addUnit(SUnit *SU) {
179	NodeNum2Index[SU->NodeNum] = SUnits.size();
180	SUnits.push_back(SU);
181	}
182
183	#ifndef NDEBUG
184	void SIScheduleBlock::traceCandidate(const SISchedCandidate &Cand) {
185
186	dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
187	dbgs() << '\n';
188	}
189	#endif
190
191	void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
192	SISchedCandidate &TryCand) {
193	// Initialize the candidate if needed.
194	if (!Cand.isValid()) {
195	TryCand.Reason = NodeOrder;
196	return;
197	}
198
199	if (Cand.SGPRUsage > 60 &&
200	SISched::tryLess(TryCand.SGPRUsage, Cand.SGPRUsage,
201	TryCand, Cand, RegUsage))
202	return;
203
204	// Schedule low latency instructions as top as possible.
205	// Order of priority is:
206	// . Low latency instructions which do not depend on other low latency
207	// instructions we haven't waited for
208	// . Other instructions which do not depend on low latency instructions
209	// we haven't waited for
210	// . Low latencies
211	// . All other instructions
212	// Goal is to get: low latency instructions - independent instructions
213	// - (eventually some more low latency instructions)
214	// - instructions that depend on the first low latency instructions.
215	// If in the block there is a lot of constant loads, the SGPR usage
216	// could go quite high, thus above the arbitrary limit of 60 will encourage
217	// use the already loaded constants (in order to release some SGPRs) before
218	// loading more.
219	if (SISched::tryLess(TryCand.HasLowLatencyNonWaitedParent,
220	Cand.HasLowLatencyNonWaitedParent,
221	TryCand, Cand, SIScheduleCandReason::Depth))
222	return;
223
224	if (SISched::tryGreater(TryCand.IsLowLatency, Cand.IsLowLatency,
225	TryCand, Cand, SIScheduleCandReason::Depth))
226	return;
227
228	if (TryCand.IsLowLatency &&
229	SISched::tryLess(TryCand.LowLatencyOffset, Cand.LowLatencyOffset,
230	TryCand, Cand, SIScheduleCandReason::Depth))
231	return;
232
233	if (SISched::tryLess(TryCand.VGPRUsage, Cand.VGPRUsage,
234	TryCand, Cand, RegUsage))
235	return;
236
237	// Fall through to original instruction order.
238	if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
239	TryCand.Reason = NodeOrder;
240	}
241	}
242
243	SUnit* SIScheduleBlock::pickNode() {
244	SISchedCandidate TopCand;
245
246	for (SUnit* SU : TopReadySUs) {
247	SISchedCandidate TryCand;
248	std::vector<unsigned> pressure;
249	std::vector<unsigned> MaxPressure;
250	// Predict register usage after this instruction.
251	TryCand.SU = SU;
252	TopRPTracker.getDownwardPressure(SU->getInstr(), pressure, MaxPressure);
253	TryCand.SGPRUsage = pressure[AMDGPU::RegisterPressureSets::SReg_32];
254	TryCand.VGPRUsage = pressure[AMDGPU::RegisterPressureSets::VGPR_32];
255	TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum];
256	TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum];
257	TryCand.HasLowLatencyNonWaitedParent =
258	HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
259	tryCandidateTopDown(TopCand, TryCand);
260	if (TryCand.Reason != NoCand)
261	TopCand.setBest(TryCand);
262	}
263
264	return TopCand.SU;
265	}
266
267
268	// Schedule something valid.
269	void SIScheduleBlock::fastSchedule() {
270	TopReadySUs.clear();
271	if (Scheduled)
272	undoSchedule();
273
274	for (SUnit* SU : SUnits) {
275	if (!SU->NumPredsLeft)
276	TopReadySUs.push_back(SU);
277	}
278
279	while (!TopReadySUs.empty()) {
280	SUnit *SU = TopReadySUs[0];
281	ScheduledSUnits.push_back(SU);
282	nodeScheduled(SU);
283	}
284
285	Scheduled = true;
286	}
287
288	// Returns if the register was set between first and last.
289	static bool isDefBetween(unsigned Reg,
290	SlotIndex First, SlotIndex Last,
291	const MachineRegisterInfo *MRI,
292	const LiveIntervals *LIS) {
293	for (MachineRegisterInfo::def_instr_iterator
294	UI = MRI->def_instr_begin(Reg),
295	UE = MRI->def_instr_end(); UI != UE; ++UI) {
296	const MachineInstr* MI = &*UI;
297	if (MI->isDebugValue())
298	continue;
299	SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot();
300	if (InstSlot >= First && InstSlot <= Last)
301	return true;
302	}
303	return false;
304	}
305
306	void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock,
307	MachineBasicBlock::iterator EndBlock) {
308	IntervalPressure Pressure, BotPressure;
309	RegPressureTracker RPTracker(Pressure), BotRPTracker(BotPressure);
310	LiveIntervals *LIS = DAG->getLIS();
311	MachineRegisterInfo *MRI = DAG->getMRI();
312	DAG->initRPTracker(TopRPTracker);
313	DAG->initRPTracker(BotRPTracker);
314	DAG->initRPTracker(RPTracker);
315
316	// Goes though all SU. RPTracker captures what had to be alive for the SUs
317	// to execute, and what is still alive at the end.
318	for (SUnit* SU : ScheduledSUnits) {
319	RPTracker.setPos(SU->getInstr());
320	RPTracker.advance();
321	}
322
323	// Close the RPTracker to finalize live ins/outs.
324	RPTracker.closeRegion();
325
326	// Initialize the live ins and live outs.
327	TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
328	BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
329
330	// Do not Track Physical Registers, because it messes up.
331	for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
332	if (Register::isVirtualRegister(RegMaskPair.RegUnit))
333	LiveInRegs.insert(RegMaskPair.RegUnit);
334	}
335	LiveOutRegs.clear();
336	// There is several possibilities to distinguish:
337	// 1) Reg is not input to any instruction in the block, but is output of one
338	// 2) 1) + read in the block and not needed after it
339	// 3) 1) + read in the block but needed in another block
340	// 4) Reg is input of an instruction but another block will read it too
341	// 5) Reg is input of an instruction and then rewritten in the block.
342	// result is not read in the block (implies used in another block)
343	// 6) Reg is input of an instruction and then rewritten in the block.
344	// result is read in the block and not needed in another block
345	// 7) Reg is input of an instruction and then rewritten in the block.
346	// result is read in the block but also needed in another block
347	// LiveInRegs will contains all the regs in situation 4, 5, 6, 7
348	// We want LiveOutRegs to contain only Regs whose content will be read after
349	// in another block, and whose content was written in the current block,
350	// that is we want it to get 1, 3, 5, 7
351	// Since we made the MIs of a block to be packed all together before
352	// scheduling, then the LiveIntervals were correct, and the RPTracker was
353	// able to correctly handle 5 vs 6, 2 vs 3.
354	// (Note: This is not sufficient for RPTracker to not do mistakes for case 4)
355	// The RPTracker's LiveOutRegs has 1, 3, (some correct or incorrect)4, 5, 7
356	// Comparing to LiveInRegs is not sufficient to differenciate 4 vs 5, 7
357	// The use of findDefBetween removes the case 4.
358	for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
359	Register Reg = RegMaskPair.RegUnit;
360	if (Reg.isVirtual() &&
361	isDefBetween(Reg, LIS->getInstructionIndex(*BeginBlock).getRegSlot(),
362	LIS->getInstructionIndex(*EndBlock).getRegSlot(), MRI,
363	LIS)) {
364	LiveOutRegs.insert(Reg);
365	}
366	}
367
368	// Pressure = sum_alive_registers register size
369	// Internally llvm will represent some registers as big 128 bits registers
370	// for example, but they actually correspond to 4 actual 32 bits registers.
371	// Thus Pressure is not equal to num_alive_registers * constant.
372	LiveInPressure = TopPressure.MaxSetPressure;
373	LiveOutPressure = BotPressure.MaxSetPressure;
374
375	// Prepares TopRPTracker for top down scheduling.
376	TopRPTracker.closeTop();
377	}
378
379	void SIScheduleBlock::schedule(MachineBasicBlock::iterator BeginBlock,
380	MachineBasicBlock::iterator EndBlock) {
381	if (!Scheduled)
382	fastSchedule();
383
384	// PreScheduling phase to set LiveIn and LiveOut.
385	initRegPressure(BeginBlock, EndBlock);
386	undoSchedule();
387
388	// Schedule for real now.
389
390	TopReadySUs.clear();
391
392	for (SUnit* SU : SUnits) {
393	if (!SU->NumPredsLeft)
394	TopReadySUs.push_back(SU);
395	}
396
397	while (!TopReadySUs.empty()) {
398	SUnit *SU = pickNode();
399	ScheduledSUnits.push_back(SU);
400	TopRPTracker.setPos(SU->getInstr());
401	TopRPTracker.advance();
402	nodeScheduled(SU);
403	}
404
405	// TODO: compute InternalAdditionnalPressure.
406	InternalAdditionnalPressure.resize(TopPressure.MaxSetPressure.size());
407
408	// Check everything is right.
409	#ifndef NDEBUG
410	assert(SUnits.size() == ScheduledSUnits.size() &&(static_cast <bool> (SUnits.size() == ScheduledSUnits.size () && TopReadySUs.empty()) ? void (0) : __assert_fail ("SUnits.size() == ScheduledSUnits.size() && TopReadySUs.empty()" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 411, __extension__ __PRETTY_FUNCTION__))
411	TopReadySUs.empty())(static_cast <bool> (SUnits.size() == ScheduledSUnits.size () && TopReadySUs.empty()) ? void (0) : __assert_fail ("SUnits.size() == ScheduledSUnits.size() && TopReadySUs.empty()" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 411, __extension__ __PRETTY_FUNCTION__));
412	for (SUnit* SU : SUnits) {
413	assert(SU->isScheduled &&(static_cast <bool> (SU->isScheduled && SU-> NumPredsLeft == 0) ? void (0) : __assert_fail ("SU->isScheduled && SU->NumPredsLeft == 0" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 414, __extension__ __PRETTY_FUNCTION__))
414	SU->NumPredsLeft == 0)(static_cast <bool> (SU->isScheduled && SU-> NumPredsLeft == 0) ? void (0) : __assert_fail ("SU->isScheduled && SU->NumPredsLeft == 0" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 414, __extension__ __PRETTY_FUNCTION__));
415	}
416	#endif
417
418	Scheduled = true;
419	}
420
421	void SIScheduleBlock::undoSchedule() {
422	for (SUnit* SU : SUnits) {
423	SU->isScheduled = false;
424	for (SDep& Succ : SU->Succs) {
425	if (BC->isSUInBlock(Succ.getSUnit(), ID))
426	undoReleaseSucc(SU, &Succ);
427	}
428	}
429	HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
430	ScheduledSUnits.clear();
431	Scheduled = false;
432	}
433
434	void SIScheduleBlock::undoReleaseSucc(SUnit SU, SDep SuccEdge) {
435	SUnit *SuccSU = SuccEdge->getSUnit();
436
437	if (SuccEdge->isWeak()) {
438	++SuccSU->WeakPredsLeft;
439	return;
440	}
441	++SuccSU->NumPredsLeft;
442	}
443
444	void SIScheduleBlock::releaseSucc(SUnit SU, SDep SuccEdge) {
445	SUnit *SuccSU = SuccEdge->getSUnit();
446
447	if (SuccEdge->isWeak()) {
448	--SuccSU->WeakPredsLeft;
449	return;
450	}
451	#ifndef NDEBUG
452	if (SuccSU->NumPredsLeft == 0) {
453	dbgs() << "* Scheduling failed! *\n";
454	DAG->dumpNode(*SuccSU);
455	dbgs() << " has been released too many times!\n";
456	llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 456);
457	}
458	#endif
459
460	--SuccSU->NumPredsLeft;
461	}
462
463	/// Release Successors of the SU that are in the block or not.
464	void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) {
465	for (SDep& Succ : SU->Succs) {
466	SUnit *SuccSU = Succ.getSUnit();
467
468	if (SuccSU->NodeNum >= DAG->SUnits.size())
469	continue;
470
471	if (BC->isSUInBlock(SuccSU, ID) != InOrOutBlock)
472	continue;
473
474	releaseSucc(SU, &Succ);
475	if (SuccSU->NumPredsLeft == 0 && InOrOutBlock)
476	TopReadySUs.push_back(SuccSU);
477	}
478	}
479
480	void SIScheduleBlock::nodeScheduled(SUnit *SU) {
481	// Is in TopReadySUs
482	assert (!SU->NumPredsLeft)(static_cast <bool> (!SU->NumPredsLeft) ? void (0) : __assert_fail ("!SU->NumPredsLeft", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 482, __extension__ __PRETTY_FUNCTION__));
483	std::vector<SUnit *>::iterator I = llvm::find(TopReadySUs, SU);
484	if (I == TopReadySUs.end()) {
485	dbgs() << "Data Structure Bug in SI Scheduler\n";
486	llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 486);
487	}
488	TopReadySUs.erase(I);
489
490	releaseSuccessors(SU, true);
491	// Scheduling this node will trigger a wait,
492	// thus propagate to other instructions that they do not need to wait either.
493	if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]])
494	HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
495
496	if (DAG->IsLowLatencySU[SU->NodeNum]) {
497	for (SDep& Succ : SU->Succs) {
498	std::map<unsigned, unsigned>::iterator I =
499	NodeNum2Index.find(Succ.getSUnit()->NodeNum);
500	if (I != NodeNum2Index.end())
501	HasLowLatencyNonWaitedParent[I->second] = 1;
502	}
503	}
504	SU->isScheduled = true;
505	}
506
507	void SIScheduleBlock::finalizeUnits() {
508	// We remove links from outside blocks to enable scheduling inside the block.
509	for (SUnit* SU : SUnits) {
510	releaseSuccessors(SU, false);
511	if (DAG->IsHighLatencySU[SU->NodeNum])
512	HighLatencyBlock = true;
513	}
514	HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
515	}
516
517	// we maintain ascending order of IDs
518	void SIScheduleBlock::addPred(SIScheduleBlock *Pred) {
519	unsigned PredID = Pred->getID();
520
521	// Check if not already predecessor.
522	for (SIScheduleBlock* P : Preds) {
523	if (PredID == P->getID())
524	return;
525	}
526	Preds.push_back(Pred);
527
528	assert(none_of(Succs,(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock , SIScheduleBlockLinkKind> S) { return PredID == S.first-> getID(); }) && "Loop in the Block Graph!") ? void (0) : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 533, __extension__ __PRETTY_FUNCTION__))
529	[=](std::pair<SIScheduleBlock,(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock , SIScheduleBlockLinkKind> S) { return PredID == S.first-> getID(); }) && "Loop in the Block Graph!") ? void (0) : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 533, __extension__ __PRETTY_FUNCTION__))
530	SIScheduleBlockLinkKind> S) {(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock , SIScheduleBlockLinkKind> S) { return PredID == S.first-> getID(); }) && "Loop in the Block Graph!") ? void (0) : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 533, __extension__ __PRETTY_FUNCTION__))
531	return PredID == S.first->getID();(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock , SIScheduleBlockLinkKind> S) { return PredID == S.first-> getID(); }) && "Loop in the Block Graph!") ? void (0) : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 533, __extension__ __PRETTY_FUNCTION__))
532	}) &&(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock , SIScheduleBlockLinkKind> S) { return PredID == S.first-> getID(); }) && "Loop in the Block Graph!") ? void (0) : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 533, __extension__ __PRETTY_FUNCTION__))
533	"Loop in the Block Graph!")(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock , SIScheduleBlockLinkKind> S) { return PredID == S.first-> getID(); }) && "Loop in the Block Graph!") ? void (0) : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 533, __extension__ __PRETTY_FUNCTION__));
534	}
535
536	void SIScheduleBlock::addSucc(SIScheduleBlock *Succ,
537	SIScheduleBlockLinkKind Kind) {
538	unsigned SuccID = Succ->getID();
539
540	// Check if not already predecessor.
541	for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {
542	if (SuccID == S.first->getID()) {
543	if (S.second == SIScheduleBlockLinkKind::NoData &&
544	Kind == SIScheduleBlockLinkKind::Data)
545	S.second = Kind;
546	return;
547	}
548	}
549	if (Succ->isHighLatencyBlock())
550	++NumHighLatencySuccessors;
551	Succs.push_back(std::make_pair(Succ, Kind));
552
553	assert(none_of(Preds,(static_cast <bool> (none_of(Preds, [=](SIScheduleBlock P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!" ) ? void (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 555, __extension__ __PRETTY_FUNCTION__))
554	[=](SIScheduleBlock P) { return SuccID == P->getID(); }) &&(static_cast <bool> (none_of(Preds, [=](SIScheduleBlock P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!" ) ? void (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 555, __extension__ __PRETTY_FUNCTION__))
555	"Loop in the Block Graph!")(static_cast <bool> (none_of(Preds, [=](SIScheduleBlock P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!" ) ? void (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 555, __extension__ __PRETTY_FUNCTION__));
556	}
557
558	#ifndef NDEBUG
559	void SIScheduleBlock::printDebug(bool full) {
560	dbgs() << "Block (" << ID << ")\n";
561	if (!full)
562	return;
563
564	dbgs() << "\nContains High Latency Instruction: "
565	<< HighLatencyBlock << '\n';
566	dbgs() << "\nDepends On:\n";
567	for (SIScheduleBlock* P : Preds) {
568	P->printDebug(false);
569	}
570
571	dbgs() << "\nSuccessors:\n";
572	for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {
573	if (S.second == SIScheduleBlockLinkKind::Data)
574	dbgs() << "(Data Dep) ";
575	S.first->printDebug(false);
576	}
577
578	if (Scheduled) {
579	dbgs() << "LiveInPressure "
580	<< LiveInPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '
581	<< LiveInPressure[AMDGPU::RegisterPressureSets::VGPR_32] << '\n';
582	dbgs() << "LiveOutPressure "
583	<< LiveOutPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '
584	<< LiveOutPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n\n";
585	dbgs() << "LiveIns:\n";
586	for (unsigned Reg : LiveInRegs)
587	dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
588
589	dbgs() << "\nLiveOuts:\n";
590	for (unsigned Reg : LiveOutRegs)
591	dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
592	}
593
594	dbgs() << "\nInstructions:\n";
595	for (const SUnit* SU : SUnits)
596	DAG->dumpNode(*SU);
597
598	dbgs() << "///////////////////////\n";
599	}
600	#endif
601
602	// SIScheduleBlockCreator //
603
604	SIScheduleBlockCreator::SIScheduleBlockCreator(SIScheduleDAGMI *DAG)
605	: DAG(DAG) {}
606
607	SIScheduleBlocks
608	SIScheduleBlockCreator::getBlocks(SISchedulerBlockCreatorVariant BlockVariant) {
609	std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator B =
610	Blocks.find(BlockVariant);
611	if (B == Blocks.end()) {
612	SIScheduleBlocks Res;
613	createBlocksForVariant(BlockVariant);
614	topologicalSort();
615	scheduleInsideBlocks();
616	fillStats();
617	Res.Blocks = CurrentBlocks;
618	Res.TopDownIndex2Block = TopDownIndex2Block;
619	Res.TopDownBlock2Index = TopDownBlock2Index;
620	Blocks[BlockVariant] = Res;
621	return Res;
622	} else {
623	return B->second;
624	}
625	}
626
627	bool SIScheduleBlockCreator::isSUInBlock(SUnit *SU, unsigned ID) {
628	if (SU->NodeNum >= DAG->SUnits.size())
629	return false;
630	return CurrentBlocks[Node2CurrentBlock[SU->NodeNum]]->getID() == ID;
631	}
632
633	void SIScheduleBlockCreator::colorHighLatenciesAlone() {
634	unsigned DAGSize = DAG->SUnits.size();
635
636	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
637	SUnit *SU = &DAG->SUnits[i];
638	if (DAG->IsHighLatencySU[SU->NodeNum]) {
639	CurrentColoring[SU->NodeNum] = NextReservedID++;
640	}
641	}
642	}
643
644	static bool
645	hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU) {
646	for (const auto &PredDep : SU.Preds) {
647	if (PredDep.getSUnit() == &FromSU &&
648	PredDep.getKind() == llvm::SDep::Data)
649	return true;
650	}
651	return false;
652	}
653
654	void SIScheduleBlockCreator::colorHighLatenciesGroups() {
655	unsigned DAGSize = DAG->SUnits.size();
656	unsigned NumHighLatencies = 0;
657	unsigned GroupSize;
658	int Color = NextReservedID;
659	unsigned Count = 0;
660	std::set<unsigned> FormingGroup;
661
662	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
663	SUnit *SU = &DAG->SUnits[i];
664	if (DAG->IsHighLatencySU[SU->NodeNum])
665	++NumHighLatencies;
666	}
667
668	if (NumHighLatencies == 0)
669	return;
670
671	if (NumHighLatencies <= 6)
672	GroupSize = 2;
673	else if (NumHighLatencies <= 12)
674	GroupSize = 3;
675	else
676	GroupSize = 4;
677
678	for (unsigned SUNum : DAG->TopDownIndex2SU) {
679	const SUnit &SU = DAG->SUnits[SUNum];
680	if (DAG->IsHighLatencySU[SU.NodeNum]) {
681	unsigned CompatibleGroup = true;
682	int ProposedColor = Color;
683	std::vector<int> AdditionalElements;
684
685	// We don't want to put in the same block
686	// two high latency instructions that depend
687	// on each other.
688	// One way would be to check canAddEdge
689	// in both directions, but that currently is not
690	// enough because there the high latency order is
691	// enforced (via links).
692	// Instead, look at the dependencies between the
693	// high latency instructions and deduce if it is
694	// a data dependency or not.
695	for (unsigned j : FormingGroup) {
696	bool HasSubGraph;
697	std::vector<int> SubGraph;
698	// By construction (topological order), if SU and
699	// DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary
700	// in the parent graph of SU.
701	#ifndef NDEBUG
702	SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
703	HasSubGraph);
704	assert(!HasSubGraph)(static_cast <bool> (!HasSubGraph) ? void (0) : __assert_fail ("!HasSubGraph", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 704, __extension__ __PRETTY_FUNCTION__));
705	#endif
706	SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,
707	HasSubGraph);
708	if (!HasSubGraph)
709	continue; // No dependencies between each other
710	else if (SubGraph.size() > 5) {
711	// Too many elements would be required to be added to the block.
712	CompatibleGroup = false;
713	break;
714	}
715	else {
716	// Check the type of dependency
717	for (unsigned k : SubGraph) {
718	// If in the path to join the two instructions,
719	// there is another high latency instruction,
720	// or instructions colored for another block
721	// abort the merge.
722	if (DAG->IsHighLatencySU[k] \|\|
723	(CurrentColoring[k] != ProposedColor &&
724	CurrentColoring[k] != 0)) {
725	CompatibleGroup = false;
726	break;
727	}
728	// If one of the SU in the subgraph depends on the result of SU j,
729	// there'll be a data dependency.
730	if (hasDataDependencyPred(DAG->SUnits[k], DAG->SUnits[j])) {
731	CompatibleGroup = false;
732	break;
733	}
734	}
735	if (!CompatibleGroup)
736	break;
737	// Same check for the SU
738	if (hasDataDependencyPred(SU, DAG->SUnits[j])) {
739	CompatibleGroup = false;
740	break;
741	}
742	// Add all the required instructions to the block
743	// These cannot live in another block (because they
744	// depend (order dependency) on one of the
745	// instruction in the block, and are required for the
746	// high latency instruction we add.
747	llvm::append_range(AdditionalElements, SubGraph);
748	}
749	}
750	if (CompatibleGroup) {
751	FormingGroup.insert(SU.NodeNum);
752	for (unsigned j : AdditionalElements)
753	CurrentColoring[j] = ProposedColor;
754	CurrentColoring[SU.NodeNum] = ProposedColor;
755	++Count;
756	}
757	// Found one incompatible instruction,
758	// or has filled a big enough group.
759	// -> start a new one.
760	if (!CompatibleGroup) {
761	FormingGroup.clear();
762	Color = ++NextReservedID;
763	ProposedColor = Color;
764	FormingGroup.insert(SU.NodeNum);
765	CurrentColoring[SU.NodeNum] = ProposedColor;
766	Count = 0;
767	} else if (Count == GroupSize) {
768	FormingGroup.clear();
769	Color = ++NextReservedID;
770	ProposedColor = Color;
	Value stored to 'ProposedColor' is never read
771	Count = 0;
772	}
773	}
774	}
775	}
776
777	void SIScheduleBlockCreator::colorComputeReservedDependencies() {
778	unsigned DAGSize = DAG->SUnits.size();
779	std::map<std::set<unsigned>, unsigned> ColorCombinations;
780
781	CurrentTopDownReservedDependencyColoring.clear();
782	CurrentBottomUpReservedDependencyColoring.clear();
783
784	CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
785	CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
786
787	// Traverse TopDown, and give different colors to SUs depending
788	// on which combination of High Latencies they depend on.
789
790	for (unsigned SUNum : DAG->TopDownIndex2SU) {
791	SUnit *SU = &DAG->SUnits[SUNum];
792	std::set<unsigned> SUColors;
793
794	// Already given.
795	if (CurrentColoring[SU->NodeNum]) {
796	CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
797	CurrentColoring[SU->NodeNum];
798	continue;
799	}
800
801	for (SDep& PredDep : SU->Preds) {
802	SUnit *Pred = PredDep.getSUnit();
803	if (PredDep.isWeak() \|\| Pred->NodeNum >= DAGSize)
804	continue;
805	if (CurrentTopDownReservedDependencyColoring[Pred->NodeNum] > 0)
806	SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->NodeNum]);
807	}
808	// Color 0 by default.
809	if (SUColors.empty())
810	continue;
811	// Same color than parents.
812	if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
813	CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
814	*SUColors.begin();
815	else {
816	std::map<std::set<unsigned>, unsigned>::iterator Pos =
817	ColorCombinations.find(SUColors);
818	if (Pos != ColorCombinations.end()) {
819	CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second;
820	} else {
821	CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
822	NextNonReservedID;
823	ColorCombinations[SUColors] = NextNonReservedID++;
824	}
825	}
826	}
827
828	ColorCombinations.clear();
829
830	// Same as before, but BottomUp.
831
832	for (unsigned SUNum : DAG->BottomUpIndex2SU) {
833	SUnit *SU = &DAG->SUnits[SUNum];
834	std::set<unsigned> SUColors;
835
836	// Already given.
837	if (CurrentColoring[SU->NodeNum]) {
838	CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
839	CurrentColoring[SU->NodeNum];
840	continue;
841	}
842
843	for (SDep& SuccDep : SU->Succs) {
844	SUnit *Succ = SuccDep.getSUnit();
845	if (SuccDep.isWeak() \|\| Succ->NodeNum >= DAGSize)
846	continue;
847	if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0)
848	SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->NodeNum]);
849	}
850	// Keep color 0.
851	if (SUColors.empty())
852	continue;
853	// Same color than parents.
854	if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
855	CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
856	*SUColors.begin();
857	else {
858	std::map<std::set<unsigned>, unsigned>::iterator Pos =
859	ColorCombinations.find(SUColors);
860	if (Pos != ColorCombinations.end()) {
861	CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = Pos->second;
862	} else {
863	CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
864	NextNonReservedID;
865	ColorCombinations[SUColors] = NextNonReservedID++;
866	}
867	}
868	}
869	}
870
871	void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
872	unsigned DAGSize = DAG->SUnits.size();
873	std::map<std::pair<unsigned, unsigned>, unsigned> ColorCombinations;
874
875	// Every combination of colors given by the top down
876	// and bottom up Reserved node dependency
877
878	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
879	SUnit *SU = &DAG->SUnits[i];
880	std::pair<unsigned, unsigned> SUColors;
881
882	// High latency instructions: already given.
883	if (CurrentColoring[SU->NodeNum])
884	continue;
885
886	SUColors.first = CurrentTopDownReservedDependencyColoring[SU->NodeNum];
887	SUColors.second = CurrentBottomUpReservedDependencyColoring[SU->NodeNum];
888
889	std::map<std::pair<unsigned, unsigned>, unsigned>::iterator Pos =
890	ColorCombinations.find(SUColors);
891	if (Pos != ColorCombinations.end()) {
892	CurrentColoring[SU->NodeNum] = Pos->second;
893	} else {
894	CurrentColoring[SU->NodeNum] = NextNonReservedID;
895	ColorCombinations[SUColors] = NextNonReservedID++;
896	}
897	}
898	}
899
900	void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
901	unsigned DAGSize = DAG->SUnits.size();
902	std::vector<int> PendingColoring = CurrentColoring;
903
904	assert(DAGSize >= 1 &&(static_cast <bool> (DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring .size() == DAGSize && CurrentTopDownReservedDependencyColoring .size() == DAGSize) ? void (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 906, __extension__ __PRETTY_FUNCTION__))
905	CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&(static_cast <bool> (DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring .size() == DAGSize && CurrentTopDownReservedDependencyColoring .size() == DAGSize) ? void (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 906, __extension__ __PRETTY_FUNCTION__))
906	CurrentTopDownReservedDependencyColoring.size() == DAGSize)(static_cast <bool> (DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring .size() == DAGSize && CurrentTopDownReservedDependencyColoring .size() == DAGSize) ? void (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 906, __extension__ __PRETTY_FUNCTION__));
907	// If there is no reserved block at all, do nothing. We don't want
908	// everything in one block.
909	if (*std::max_element(CurrentBottomUpReservedDependencyColoring.begin(),
910	CurrentBottomUpReservedDependencyColoring.end()) == 0 &&
911	*std::max_element(CurrentTopDownReservedDependencyColoring.begin(),
912	CurrentTopDownReservedDependencyColoring.end()) == 0)
913	return;
914
915	for (unsigned SUNum : DAG->BottomUpIndex2SU) {
916	SUnit *SU = &DAG->SUnits[SUNum];
917	std::set<unsigned> SUColors;
918	std::set<unsigned> SUColorsPending;
919
920	if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
921	continue;
922
923	if (CurrentBottomUpReservedDependencyColoring[SU->NodeNum] > 0 \|\|
924	CurrentTopDownReservedDependencyColoring[SU->NodeNum] > 0)
925	continue;
926
927	for (SDep& SuccDep : SU->Succs) {
928	SUnit *Succ = SuccDep.getSUnit();
929	if (SuccDep.isWeak() \|\| Succ->NodeNum >= DAGSize)
930	continue;
931	if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0 \|\|
932	CurrentTopDownReservedDependencyColoring[Succ->NodeNum] > 0)
933	SUColors.insert(CurrentColoring[Succ->NodeNum]);
934	SUColorsPending.insert(PendingColoring[Succ->NodeNum]);
935	}
936	// If there is only one child/parent block, and that block
937	// is not among the ones we are removing in this path, then
938	// merge the instruction to that block
939	if (SUColors.size() == 1 && SUColorsPending.size() == 1)
940	PendingColoring[SU->NodeNum] = *SUColors.begin();
941	else // TODO: Attribute new colors depending on color
942	// combination of children.
943	PendingColoring[SU->NodeNum] = NextNonReservedID++;
944	}
945	CurrentColoring = PendingColoring;
946	}
947
948
949	void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
950	unsigned DAGSize = DAG->SUnits.size();
951	unsigned PreviousColor;
952	std::set<unsigned> SeenColors;
953
954	if (DAGSize <= 1)
955	return;
956
957	PreviousColor = CurrentColoring[0];
958
959	for (unsigned i = 1, e = DAGSize; i != e; ++i) {
960	SUnit *SU = &DAG->SUnits[i];
961	unsigned CurrentColor = CurrentColoring[i];
962	unsigned PreviousColorSave = PreviousColor;
963	assert(i == SU->NodeNum)(static_cast <bool> (i == SU->NodeNum) ? void (0) : __assert_fail ("i == SU->NodeNum", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 963, __extension__ __PRETTY_FUNCTION__));
964
965	if (CurrentColor != PreviousColor)
966	SeenColors.insert(PreviousColor);
967	PreviousColor = CurrentColor;
968
969	if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
970	continue;
971
972	if (SeenColors.find(CurrentColor) == SeenColors.end())
973	continue;
974
975	if (PreviousColorSave != CurrentColor)
976	CurrentColoring[i] = NextNonReservedID++;
977	else
978	CurrentColoring[i] = CurrentColoring[i-1];
979	}
980	}
981
982	void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
983	unsigned DAGSize = DAG->SUnits.size();
984
985	for (unsigned SUNum : DAG->BottomUpIndex2SU) {
986	SUnit *SU = &DAG->SUnits[SUNum];
987	std::set<unsigned> SUColors;
988
989	if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
990	continue;
991
992	// No predecessor: Vgpr constant loading.
993	// Low latency instructions usually have a predecessor (the address)
994	if (SU->Preds.size() > 0 && !DAG->IsLowLatencySU[SU->NodeNum])
995	continue;
996
997	for (SDep& SuccDep : SU->Succs) {
998	SUnit *Succ = SuccDep.getSUnit();
999	if (SuccDep.isWeak() \|\| Succ->NodeNum >= DAGSize)
1000	continue;
1001	SUColors.insert(CurrentColoring[Succ->NodeNum]);
1002	}
1003	if (SUColors.size() == 1)
1004	CurrentColoring[SU->NodeNum] = *SUColors.begin();
1005	}
1006	}
1007
1008	void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
1009	unsigned DAGSize = DAG->SUnits.size();
1010
1011	for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1012	SUnit *SU = &DAG->SUnits[SUNum];
1013	std::set<unsigned> SUColors;
1014
1015	if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1016	continue;
1017
1018	for (SDep& SuccDep : SU->Succs) {
1019	SUnit *Succ = SuccDep.getSUnit();
1020	if (SuccDep.isWeak() \|\| Succ->NodeNum >= DAGSize)
1021	continue;
1022	SUColors.insert(CurrentColoring[Succ->NodeNum]);
1023	}
1024	if (SUColors.size() == 1)
1025	CurrentColoring[SU->NodeNum] = *SUColors.begin();
1026	}
1027	}
1028
1029	void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
1030	unsigned DAGSize = DAG->SUnits.size();
1031
1032	for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1033	SUnit *SU = &DAG->SUnits[SUNum];
1034	std::set<unsigned> SUColors;
1035
1036	if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1037	continue;
1038
1039	for (SDep& SuccDep : SU->Succs) {
1040	SUnit *Succ = SuccDep.getSUnit();
1041	if (SuccDep.isWeak() \|\| Succ->NodeNum >= DAGSize)
1042	continue;
1043	SUColors.insert(CurrentColoring[Succ->NodeNum]);
1044	}
1045	if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
1046	CurrentColoring[SU->NodeNum] = *SUColors.begin();
1047	}
1048	}
1049
1050	void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
1051	unsigned DAGSize = DAG->SUnits.size();
1052	std::map<unsigned, unsigned> ColorCount;
1053
1054	for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1055	SUnit *SU = &DAG->SUnits[SUNum];
1056	unsigned color = CurrentColoring[SU->NodeNum];
1057	++ColorCount[color];
1058	}
1059
1060	for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1061	SUnit *SU = &DAG->SUnits[SUNum];
1062	unsigned color = CurrentColoring[SU->NodeNum];
1063	std::set<unsigned> SUColors;
1064
1065	if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1066	continue;
1067
1068	if (ColorCount[color] > 1)
1069	continue;
1070
1071	for (SDep& SuccDep : SU->Succs) {
1072	SUnit *Succ = SuccDep.getSUnit();
1073	if (SuccDep.isWeak() \|\| Succ->NodeNum >= DAGSize)
1074	continue;
1075	SUColors.insert(CurrentColoring[Succ->NodeNum]);
1076	}
1077	if (SUColors.size() == 1 && *SUColors.begin() != color) {
1078	--ColorCount[color];
1079	CurrentColoring[SU->NodeNum] = *SUColors.begin();
1080	++ColorCount[*SUColors.begin()];
1081	}
1082	}
1083	}
1084
1085	void SIScheduleBlockCreator::cutHugeBlocks() {
1086	// TODO
1087	}
1088
1089	void SIScheduleBlockCreator::regroupNoUserInstructions() {
1090	unsigned DAGSize = DAG->SUnits.size();
1091	int GroupID = NextNonReservedID++;
1092
1093	for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1094	SUnit *SU = &DAG->SUnits[SUNum];
1095	bool hasSuccessor = false;
1096
1097	if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1098	continue;
1099
1100	for (SDep& SuccDep : SU->Succs) {
1101	SUnit *Succ = SuccDep.getSUnit();
1102	if (SuccDep.isWeak() \|\| Succ->NodeNum >= DAGSize)
1103	continue;
1104	hasSuccessor = true;
1105	}
1106	if (!hasSuccessor)
1107	CurrentColoring[SU->NodeNum] = GroupID;
1108	}
1109	}
1110
1111	void SIScheduleBlockCreator::colorExports() {
1112	unsigned ExportColor = NextNonReservedID++;
1113	SmallVector<unsigned, 8> ExpGroup;
1114
1115	// Put all exports together in a block.
1116	// The block will naturally end up being scheduled last,
1117	// thus putting exports at the end of the schedule, which
1118	// is better for performance.
1119	// However we must ensure, for safety, the exports can be put
1120	// together in the same block without any other instruction.
1121	// This could happen, for example, when scheduling after regalloc
1122	// if reloading a spilled register from memory using the same
1123	// register than used in a previous export.
1124	// If that happens, do not regroup the exports.
1125	for (unsigned SUNum : DAG->TopDownIndex2SU) {
1126	const SUnit &SU = DAG->SUnits[SUNum];
1127	if (SIInstrInfo::isEXP(*SU.getInstr())) {
1128	// Check the EXP can be added to the group safely,
1129	// ie without needing any other instruction.
1130	// The EXP is allowed to depend on other EXP
1131	// (they will be in the same group).
1132	for (unsigned j : ExpGroup) {
1133	bool HasSubGraph;
1134	std::vector<int> SubGraph;
1135	// By construction (topological order), if SU and
1136	// DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary
1137	// in the parent graph of SU.
1138	#ifndef NDEBUG
1139	SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
1140	HasSubGraph);
1141	assert(!HasSubGraph)(static_cast <bool> (!HasSubGraph) ? void (0) : __assert_fail ("!HasSubGraph", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 1141, __extension__ __PRETTY_FUNCTION__));
1142	#endif
1143	SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,
1144	HasSubGraph);
1145	if (!HasSubGraph)
1146	continue; // No dependencies between each other
1147
1148	// SubGraph contains all the instructions required
1149	// between EXP SUnits[j] and EXP SU.
1150	for (unsigned k : SubGraph) {
1151	if (!SIInstrInfo::isEXP(*DAG->SUnits[k].getInstr()))
1152	// Other instructions than EXP would be required in the group.
1153	// Abort the groupping.
1154	return;
1155	}
1156	}
1157
1158	ExpGroup.push_back(SUNum);
1159	}
1160	}
1161
1162	// The group can be formed. Give the color.
1163	for (unsigned j : ExpGroup)
1164	CurrentColoring[j] = ExportColor;
1165	}
1166
1167	void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant) {
1168	unsigned DAGSize = DAG->SUnits.size();
1169	std::map<unsigned,unsigned> RealID;
1170
1171	CurrentBlocks.clear();
1172	CurrentColoring.clear();
1173	CurrentColoring.resize(DAGSize, 0);
1174	Node2CurrentBlock.clear();
1175
1176	// Restore links previous scheduling variant has overridden.
1177	DAG->restoreSULinksLeft();
1178
1179	NextReservedID = 1;
1180	NextNonReservedID = DAGSize + 1;
1181
1182	LLVM_DEBUG(dbgs() << "Coloring the graph\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Coloring the graph\n" ; } } while (false);
1183
1184	if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesGrouped)
1185	colorHighLatenciesGroups();
1186	else
1187	colorHighLatenciesAlone();
1188	colorComputeReservedDependencies();
1189	colorAccordingToReservedDependencies();
1190	colorEndsAccordingToDependencies();
1191	if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesAlonePlusConsecutive)
1192	colorForceConsecutiveOrderInGroup();
1193	regroupNoUserInstructions();
1194	colorMergeConstantLoadsNextGroup();
1195	colorMergeIfPossibleNextGroupOnlyForReserved();
1196	colorExports();
1197
1198	// Put SUs of same color into same block
1199	Node2CurrentBlock.resize(DAGSize, -1);
1200	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1201	SUnit *SU = &DAG->SUnits[i];
1202	unsigned Color = CurrentColoring[SU->NodeNum];
1203	if (RealID.find(Color) == RealID.end()) {
1204	int ID = CurrentBlocks.size();
1205	BlockPtrs.push_back(std::make_unique<SIScheduleBlock>(DAG, this, ID));
1206	CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
1207	RealID[Color] = ID;
1208	}
1209	CurrentBlocks[RealID[Color]]->addUnit(SU);
1210	Node2CurrentBlock[SU->NodeNum] = RealID[Color];
1211	}
1212
1213	// Build dependencies between blocks.
1214	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1215	SUnit *SU = &DAG->SUnits[i];
1216	int SUID = Node2CurrentBlock[i];
1217	for (SDep& SuccDep : SU->Succs) {
1218	SUnit *Succ = SuccDep.getSUnit();
1219	if (SuccDep.isWeak() \|\| Succ->NodeNum >= DAGSize)
1220	continue;
1221	if (Node2CurrentBlock[Succ->NodeNum] != SUID)
1222	CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]],
1223	SuccDep.isCtrl() ? NoData : Data);
1224	}
1225	for (SDep& PredDep : SU->Preds) {
1226	SUnit *Pred = PredDep.getSUnit();
1227	if (PredDep.isWeak() \|\| Pred->NodeNum >= DAGSize)
1228	continue;
1229	if (Node2CurrentBlock[Pred->NodeNum] != SUID)
1230	CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->NodeNum]]);
1231	}
1232	}
1233
1234	// Free root and leafs of all blocks to enable scheduling inside them.
1235	for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
1236	SIScheduleBlock *Block = CurrentBlocks[i];
1237	Block->finalizeUnits();
1238	}
1239	LLVM_DEBUG(dbgs() << "Blocks created:\n\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Blocks created:\n\n" ; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug (true); }; } } while (false)
1240	for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Blocks created:\n\n" ; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug (true); }; } } while (false)
1241	SIScheduleBlock Block = CurrentBlocks[i];do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Blocks created:\n\n" ; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) { SIScheduleBlock Block = CurrentBlocks[i]; Block->printDebug (true); }; } } while (false)
1242	Block->printDebug(true);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Blocks created:\n\n" ; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug (true); }; } } while (false)
1243	})do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Blocks created:\n\n" ; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug (true); }; } } while (false);
1244	}
1245
1246	// Two functions taken from Codegen/MachineScheduler.cpp
1247
1248	/// Non-const version.
1249	static MachineBasicBlock::iterator
1250	nextIfDebug(MachineBasicBlock::iterator I,
1251	MachineBasicBlock::const_iterator End) {
1252	for (; I != End; ++I) {
1253	if (!I->isDebugInstr())
1254	break;
1255	}
1256	return I;
1257	}
1258
1259	void SIScheduleBlockCreator::topologicalSort() {
1260	unsigned DAGSize = CurrentBlocks.size();
1261	std::vector<int> WorkList;
1262
1263	LLVM_DEBUG(dbgs() << "Topological Sort\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Topological Sort\n" ; } } while (false);
1264
1265	WorkList.reserve(DAGSize);
1266	TopDownIndex2Block.resize(DAGSize);
1267	TopDownBlock2Index.resize(DAGSize);
1268	BottomUpIndex2Block.resize(DAGSize);
1269
1270	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1271	SIScheduleBlock *Block = CurrentBlocks[i];
1272	unsigned Degree = Block->getSuccs().size();
1273	TopDownBlock2Index[i] = Degree;
1274	if (Degree == 0) {
1275	WorkList.push_back(i);
1276	}
1277	}
1278
1279	int Id = DAGSize;
1280	while (!WorkList.empty()) {
1281	int i = WorkList.back();
1282	SIScheduleBlock *Block = CurrentBlocks[i];
1283	WorkList.pop_back();
1284	TopDownBlock2Index[i] = --Id;
1285	TopDownIndex2Block[Id] = i;
1286	for (SIScheduleBlock* Pred : Block->getPreds()) {
1287	if (!--TopDownBlock2Index[Pred->getID()])
1288	WorkList.push_back(Pred->getID());
1289	}
1290	}
1291
1292	#ifndef NDEBUG
1293	// Check correctness of the ordering.
1294	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1295	SIScheduleBlock *Block = CurrentBlocks[i];
1296	for (SIScheduleBlock* Pred : Block->getPreds()) {
1297	assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&(static_cast <bool> (TopDownBlock2Index[i] > TopDownBlock2Index [Pred->getID()] && "Wrong Top Down topological sorting" ) ? void (0) : __assert_fail ("TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] && \"Wrong Top Down topological sorting\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 1298, __extension__ __PRETTY_FUNCTION__))
1298	"Wrong Top Down topological sorting")(static_cast <bool> (TopDownBlock2Index[i] > TopDownBlock2Index [Pred->getID()] && "Wrong Top Down topological sorting" ) ? void (0) : __assert_fail ("TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] && \"Wrong Top Down topological sorting\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 1298, __extension__ __PRETTY_FUNCTION__));
1299	}
1300	}
1301	#endif
1302
1303	BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
1304	TopDownIndex2Block.rend());
1305	}
1306
1307	void SIScheduleBlockCreator::scheduleInsideBlocks() {
1308	unsigned DAGSize = CurrentBlocks.size();
1309
1310	LLVM_DEBUG(dbgs() << "\nScheduling Blocks\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "\nScheduling Blocks\n\n" ; } } while (false);
1311
1312	// We do schedule a valid scheduling such that a Block corresponds
1313	// to a range of instructions.
1314	LLVM_DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "First phase: Fast scheduling for Reg Liveness\n" ; } } while (false);
1315	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1316	SIScheduleBlock *Block = CurrentBlocks[i];
1317	Block->fastSchedule();
1318	}
1319
1320	// Note: the following code, and the part restoring previous position
1321	// is by far the most expensive operation of the Scheduler.
1322
1323	// Do not update CurrentTop.
1324	MachineBasicBlock::iterator CurrentTopFastSched = DAG->getCurrentTop();
1325	std::vector<MachineBasicBlock::iterator> PosOld;
1326	std::vector<MachineBasicBlock::iterator> PosNew;
1327	PosOld.reserve(DAG->SUnits.size());
1328	PosNew.reserve(DAG->SUnits.size());
1329
1330	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1331	int BlockIndice = TopDownIndex2Block[i];
1332	SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1333	std::vector<SUnit*> SUs = Block->getScheduledUnits();
1334
1335	for (SUnit* SU : SUs) {
1336	MachineInstr *MI = SU->getInstr();
1337	MachineBasicBlock::iterator Pos = MI;
1338	PosOld.push_back(Pos);
1339	if (&*CurrentTopFastSched == MI) {
1340	PosNew.push_back(Pos);
1341	CurrentTopFastSched = nextIfDebug(++CurrentTopFastSched,
1342	DAG->getCurrentBottom());
1343	} else {
1344	// Update the instruction stream.
1345	DAG->getBB()->splice(CurrentTopFastSched, DAG->getBB(), MI);
1346
1347	// Update LiveIntervals.
1348	// Note: Moving all instructions and calling handleMove every time
1349	// is the most cpu intensive operation of the scheduler.
1350	// It would gain a lot if there was a way to recompute the
1351	// LiveIntervals for the entire scheduling region.
1352	DAG->getLIS()->handleMove(MI, /UpdateFlags=*/true);
1353	PosNew.push_back(CurrentTopFastSched);
1354	}
1355	}
1356	}
1357
1358	// Now we have Block of SUs == Block of MI.
1359	// We do the final schedule for the instructions inside the block.
1360	// The property that all the SUs of the Block are grouped together as MI
1361	// is used for correct reg usage tracking.
1362	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1363	SIScheduleBlock *Block = CurrentBlocks[i];
1364	std::vector<SUnit*> SUs = Block->getScheduledUnits();
1365	Block->schedule((SUs.begin())->getInstr(), (SUs.rbegin())->getInstr());
1366	}
1367
1368	LLVM_DEBUG(dbgs() << "Restoring MI Pos\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Restoring MI Pos\n" ; } } while (false);
1369	// Restore old ordering (which prevents a LIS->handleMove bug).
1370	for (unsigned i = PosOld.size(), e = 0; i != e; --i) {
1371	MachineBasicBlock::iterator POld = PosOld[i-1];
1372	MachineBasicBlock::iterator PNew = PosNew[i-1];
1373	if (PNew != POld) {
1374	// Update the instruction stream.
1375	DAG->getBB()->splice(POld, DAG->getBB(), PNew);
1376
1377	// Update LiveIntervals.
1378	DAG->getLIS()->handleMove(POld, /UpdateFlags=*/true);
1379	}
1380	}
1381
1382	LLVM_DEBUG(for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks .size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks [i]; Block->printDebug(true); }; } } while (false)
1383	SIScheduleBlock Block = CurrentBlocks[i];do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks .size(); i != e; ++i) { SIScheduleBlock Block = CurrentBlocks [i]; Block->printDebug(true); }; } } while (false)
1384	Block->printDebug(true);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks .size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks [i]; Block->printDebug(true); }; } } while (false)
1385	})do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks .size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks [i]; Block->printDebug(true); }; } } while (false);
1386	}
1387
1388	void SIScheduleBlockCreator::fillStats() {
1389	unsigned DAGSize = CurrentBlocks.size();
1390
1391	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1392	int BlockIndice = TopDownIndex2Block[i];
1393	SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1394	if (Block->getPreds().empty())
1395	Block->Depth = 0;
1396	else {
1397	unsigned Depth = 0;
1398	for (SIScheduleBlock *Pred : Block->getPreds()) {
1399	if (Depth < Pred->Depth + Pred->getCost())
1400	Depth = Pred->Depth + Pred->getCost();
1401	}
1402	Block->Depth = Depth;
1403	}
1404	}
1405
1406	for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1407	int BlockIndice = BottomUpIndex2Block[i];
1408	SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1409	if (Block->getSuccs().empty())
1410	Block->Height = 0;
1411	else {
1412	unsigned Height = 0;
1413	for (const auto &Succ : Block->getSuccs())
1414	Height = std::max(Height, Succ.first->Height + Succ.first->getCost());
1415	Block->Height = Height;
1416	}
1417	}
1418	}
1419
1420	// SIScheduleBlockScheduler //
1421
1422	SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
1423	SISchedulerBlockSchedulerVariant Variant,
1424	SIScheduleBlocks BlocksStruct) :
1425	DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks),
1426	LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
1427	SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
1428
1429	// Fill the usage of every output
1430	// Warning: while by construction we always have a link between two blocks
1431	// when one needs a result from the other, the number of users of an output
1432	// is not the sum of child blocks having as input the same virtual register.
1433	// Here is an example. A produces x and y. B eats x and produces x'.
1434	// C eats x' and y. The register coalescer may have attributed the same
1435	// virtual register to x and x'.
1436	// To count accurately, we do a topological sort. In case the register is
1437	// found for several parents, we increment the usage of the one with the
1438	// highest topological index.
1439	LiveOutRegsNumUsages.resize(Blocks.size());
1440	for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1441	SIScheduleBlock *Block = Blocks[i];
1442	for (unsigned Reg : Block->getInRegs()) {
1443	bool Found = false;
1444	int topoInd = -1;
1445	for (SIScheduleBlock* Pred: Block->getPreds()) {
1446	std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1447	std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1448
1449	if (RegPos != PredOutRegs.end()) {
1450	Found = true;
1451	if (topoInd < BlocksStruct.TopDownBlock2Index[Pred->getID()]) {
1452	topoInd = BlocksStruct.TopDownBlock2Index[Pred->getID()];
1453	}
1454	}
1455	}
1456
1457	if (!Found)
1458	continue;
1459
1460	int PredID = BlocksStruct.TopDownIndex2Block[topoInd];
1461	++LiveOutRegsNumUsages[PredID][Reg];
1462	}
1463	}
1464
1465	LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0);
1466	BlockNumPredsLeft.resize(Blocks.size());
1467	BlockNumSuccsLeft.resize(Blocks.size());
1468
1469	for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1470	SIScheduleBlock *Block = Blocks[i];
1471	BlockNumPredsLeft[i] = Block->getPreds().size();
1472	BlockNumSuccsLeft[i] = Block->getSuccs().size();
1473	}
1474
1475	#ifndef NDEBUG
1476	for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1477	SIScheduleBlock *Block = Blocks[i];
1478	assert(Block->getID() == i)(static_cast <bool> (Block->getID() == i) ? void (0) : __assert_fail ("Block->getID() == i", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 1478, __extension__ __PRETTY_FUNCTION__));
1479	}
1480	#endif
1481
1482	std::set<unsigned> InRegs = DAG->getInRegs();
1483	addLiveRegs(InRegs);
1484
1485	// Increase LiveOutRegsNumUsages for blocks
1486	// producing registers consumed in another
1487	// scheduling region.
1488	for (unsigned Reg : DAG->getOutRegs()) {
1489	for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1490	// Do reverse traversal
1491	int ID = BlocksStruct.TopDownIndex2Block[Blocks.size()-1-i];
1492	SIScheduleBlock *Block = Blocks[ID];
1493	const std::set<unsigned> &OutRegs = Block->getOutRegs();
1494
1495	if (OutRegs.find(Reg) == OutRegs.end())
1496	continue;
1497
1498	++LiveOutRegsNumUsages[ID][Reg];
1499	break;
1500	}
1501	}
1502
1503	// Fill LiveRegsConsumers for regs that were already
1504	// defined before scheduling.
1505	for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1506	SIScheduleBlock *Block = Blocks[i];
1507	for (unsigned Reg : Block->getInRegs()) {
1508	bool Found = false;
1509	for (SIScheduleBlock* Pred: Block->getPreds()) {
1510	std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1511	std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1512
1513	if (RegPos != PredOutRegs.end()) {
1514	Found = true;
1515	break;
1516	}
1517	}
1518
1519	if (!Found)
1520	++LiveRegsConsumers[Reg];
1521	}
1522	}
1523
1524	for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1525	SIScheduleBlock *Block = Blocks[i];
1526	if (BlockNumPredsLeft[i] == 0) {
1527	ReadyBlocks.push_back(Block);
1528	}
1529	}
1530
1531	while (SIScheduleBlock *Block = pickBlock()) {
1532	BlocksScheduled.push_back(Block);
1533	blockScheduled(Block);
1534	}
1535
1536	LLVM_DEBUG(dbgs() << "Block Order:"; for (SIScheduleBlock Blockdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Block Order:"; for ( SIScheduleBlock Block : BlocksScheduled) { dbgs() << ' ' << Block->getID(); } dbgs() << '\n';; } } while (false)
1537	: BlocksScheduled) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Block Order:"; for ( SIScheduleBlock *Block : BlocksScheduled) { dbgs() << ' ' << Block->getID(); } dbgs() << '\n';; } } while (false)
1538	dbgs() << ' ' << Block->getID();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Block Order:"; for ( SIScheduleBlock *Block : BlocksScheduled) { dbgs() << ' ' << Block->getID(); } dbgs() << '\n';; } } while (false)
1539	} dbgs() << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Block Order:"; for ( SIScheduleBlock *Block : BlocksScheduled) { dbgs() << ' ' << Block->getID(); } dbgs() << '\n';; } } while (false);
1540	}
1541
1542	bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
1543	SIBlockSchedCandidate &TryCand) {
1544	if (!Cand.isValid()) {
1545	TryCand.Reason = NodeOrder;
1546	return true;
1547	}
1548
1549	// Try to hide high latencies.
1550	if (SISched::tryLess(TryCand.LastPosHighLatParentScheduled,
1551	Cand.LastPosHighLatParentScheduled, TryCand, Cand, Latency))
1552	return true;
1553	// Schedule high latencies early so you can hide them better.
1554	if (SISched::tryGreater(TryCand.IsHighLatency, Cand.IsHighLatency,
1555	TryCand, Cand, Latency))
1556	return true;
1557	if (TryCand.IsHighLatency && SISched::tryGreater(TryCand.Height, Cand.Height,
1558	TryCand, Cand, Depth))
1559	return true;
1560	if (SISched::tryGreater(TryCand.NumHighLatencySuccessors,
1561	Cand.NumHighLatencySuccessors,
1562	TryCand, Cand, Successor))
1563	return true;
1564	return false;
1565	}
1566
1567	bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
1568	SIBlockSchedCandidate &TryCand) {
1569	if (!Cand.isValid()) {
1570	TryCand.Reason = NodeOrder;
1571	return true;
1572	}
1573
1574	if (SISched::tryLess(TryCand.VGPRUsageDiff > 0, Cand.VGPRUsageDiff > 0,
1575	TryCand, Cand, RegUsage))
1576	return true;
1577	if (SISched::tryGreater(TryCand.NumSuccessors > 0,
1578	Cand.NumSuccessors > 0,
1579	TryCand, Cand, Successor))
1580	return true;
1581	if (SISched::tryGreater(TryCand.Height, Cand.Height, TryCand, Cand, Depth))
1582	return true;
1583	if (SISched::tryLess(TryCand.VGPRUsageDiff, Cand.VGPRUsageDiff,
1584	TryCand, Cand, RegUsage))
1585	return true;
1586	return false;
1587	}
1588
1589	SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() {
1590	SIBlockSchedCandidate Cand;
1591	std::vector<SIScheduleBlock*>::iterator Best;
1592	SIScheduleBlock *Block;
1593	if (ReadyBlocks.empty())
1594	return nullptr;
1595
1596	DAG->fillVgprSgprCost(LiveRegs.begin(), LiveRegs.end(),
1597	VregCurrentUsage, SregCurrentUsage);
1598	if (VregCurrentUsage > maxVregUsage)
1599	maxVregUsage = VregCurrentUsage;
1600	if (SregCurrentUsage > maxSregUsage)
1601	maxSregUsage = SregCurrentUsage;
1602	LLVM_DEBUG(dbgs() << "Picking New Blocks\n"; dbgs() << "Available: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking New Blocks\n" ; dbgs() << "Available: "; for (SIScheduleBlock *Block : ReadyBlocks) dbgs() << Block->getID() << ' '; dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs ) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';; } } while (false)
1603	for (SIScheduleBlock Blockdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking New Blocks\n" ; dbgs() << "Available: "; for (SIScheduleBlock Block : ReadyBlocks) dbgs() << Block->getID() << ' '; dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs ) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';; } } while (false)
1604	: ReadyBlocks) dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking New Blocks\n" ; dbgs() << "Available: "; for (SIScheduleBlock *Block : ReadyBlocks) dbgs() << Block->getID() << ' '; dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs ) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';; } } while (false)
1605	<< Block->getID() << ' ';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking New Blocks\n" ; dbgs() << "Available: "; for (SIScheduleBlock *Block : ReadyBlocks) dbgs() << Block->getID() << ' '; dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs ) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';; } } while (false)
1606	dbgs() << "\nCurrent Live:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking New Blocks\n" ; dbgs() << "Available: "; for (SIScheduleBlock *Block : ReadyBlocks) dbgs() << Block->getID() << ' '; dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs ) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';; } } while (false)
1607	for (unsigned Regdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking New Blocks\n" ; dbgs() << "Available: "; for (SIScheduleBlock *Block : ReadyBlocks) dbgs() << Block->getID() << ' '; dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs ) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';; } } while (false)
1608	: LiveRegs) dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking New Blocks\n" ; dbgs() << "Available: "; for (SIScheduleBlock *Block : ReadyBlocks) dbgs() << Block->getID() << ' '; dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs ) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';; } } while (false)
1609	<< printVRegOrUnit(Reg, DAG->getTRI()) << ' ';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking New Blocks\n" ; dbgs() << "Available: "; for (SIScheduleBlock *Block : ReadyBlocks) dbgs() << Block->getID() << ' '; dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs ) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';; } } while (false)
1610	dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking New Blocks\n" ; dbgs() << "Available: "; for (SIScheduleBlock *Block : ReadyBlocks) dbgs() << Block->getID() << ' '; dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs ) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';; } } while (false)
1611	dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking New Blocks\n" ; dbgs() << "Available: "; for (SIScheduleBlock *Block : ReadyBlocks) dbgs() << Block->getID() << ' '; dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs ) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';; } } while (false)
1612	dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking New Blocks\n" ; dbgs() << "Available: "; for (SIScheduleBlock *Block : ReadyBlocks) dbgs() << Block->getID() << ' '; dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs ) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';; } } while (false);
1613
1614	Cand.Block = nullptr;
1615	for (std::vector<SIScheduleBlock*>::iterator I = ReadyBlocks.begin(),
1616	E = ReadyBlocks.end(); I != E; ++I) {
1617	SIBlockSchedCandidate TryCand;
1618	TryCand.Block = *I;
1619	TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
1620	TryCand.VGPRUsageDiff =
1621	checkRegUsageImpact(TryCand.Block->getInRegs(),
1622	TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32];
1623	TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
1624	TryCand.NumHighLatencySuccessors =
1625	TryCand.Block->getNumHighLatencySuccessors();
1626	TryCand.LastPosHighLatParentScheduled =
1627	(unsigned int) std::max<int> (0,
1628	LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
1629	LastPosWaitedHighLatency);
1630	TryCand.Height = TryCand.Block->Height;
1631	// Try not to increase VGPR usage too much, else we may spill.
1632	if (VregCurrentUsage > 120 \|\|
1633	Variant != SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage) {
1634	if (!tryCandidateRegUsage(Cand, TryCand) &&
1635	Variant != SISchedulerBlockSchedulerVariant::BlockRegUsage)
1636	tryCandidateLatency(Cand, TryCand);
1637	} else {
1638	if (!tryCandidateLatency(Cand, TryCand))
1639	tryCandidateRegUsage(Cand, TryCand);
1640	}
1641	if (TryCand.Reason != NoCand) {
1642	Cand.setBest(TryCand);
1643	Best = I;
1644	LLVM_DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' 'do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' ' << getReasonStr (Cand.Reason) << '\n'; } } while (false)
1645	<< getReasonStr(Cand.Reason) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' ' << getReasonStr (Cand.Reason) << '\n'; } } while (false);
1646	}
1647	}
1648
1649	LLVM_DEBUG(dbgs() << "Picking: " << Cand.Block->getID() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking: " << Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: " << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() << "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled << '\n'; dbgs() << "VGPRUsageDiff: " << Cand .VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while (false)
1650	dbgs() << "Is a block with high latency instruction: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking: " << Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: " << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() << "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled << '\n'; dbgs() << "VGPRUsageDiff: " << Cand .VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while (false)
1651	<< (Cand.IsHighLatency ? "yes\n" : "no\n");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking: " << Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: " << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() << "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled << '\n'; dbgs() << "VGPRUsageDiff: " << Cand .VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while (false)
1652	dbgs() << "Position of last high latency dependency: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking: " << Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: " << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() << "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled << '\n'; dbgs() << "VGPRUsageDiff: " << Cand .VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while (false)
1653	<< Cand.LastPosHighLatParentScheduled << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking: " << Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: " << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() << "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled << '\n'; dbgs() << "VGPRUsageDiff: " << Cand .VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while (false)
1654	dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking: " << Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: " << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() << "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled << '\n'; dbgs() << "VGPRUsageDiff: " << Cand .VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while (false)
1655	dbgs() << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Picking: " << Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: " << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() << "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled << '\n'; dbgs() << "VGPRUsageDiff: " << Cand .VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while (false);
1656
1657	Block = Cand.Block;
1658	ReadyBlocks.erase(Best);
1659	return Block;
1660	}
1661
1662	// Tracking of currently alive registers to determine VGPR Usage.
1663
1664	void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
1665	for (Register Reg : Regs) {
1666	// For now only track virtual registers.
1667	if (!Reg.isVirtual())
1668	continue;
1669	// If not already in the live set, then add it.
1670	(void) LiveRegs.insert(Reg);
1671	}
1672	}
1673
1674	void SIScheduleBlockScheduler::decreaseLiveRegs(SIScheduleBlock *Block,
1675	std::set<unsigned> &Regs) {
1676	for (unsigned Reg : Regs) {
1677	// For now only track virtual registers.
1678	std::set<unsigned>::iterator Pos = LiveRegs.find(Reg);
1679	assert (Pos != LiveRegs.end() && // Reg must be live.(static_cast <bool> (Pos != LiveRegs.end() && LiveRegsConsumers .find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers [Reg] >= 1) ? void (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 1681, __extension__ __PRETTY_FUNCTION__))
1680	LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&(static_cast <bool> (Pos != LiveRegs.end() && LiveRegsConsumers .find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers [Reg] >= 1) ? void (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 1681, __extension__ __PRETTY_FUNCTION__))
1681	LiveRegsConsumers[Reg] >= 1)(static_cast <bool> (Pos != LiveRegs.end() && LiveRegsConsumers .find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers [Reg] >= 1) ? void (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 1681, __extension__ __PRETTY_FUNCTION__));
1682	--LiveRegsConsumers[Reg];
1683	if (LiveRegsConsumers[Reg] == 0)
1684	LiveRegs.erase(Pos);
1685	}
1686	}
1687
1688	void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) {
1689	for (const auto &Block : Parent->getSuccs()) {
1690	if (--BlockNumPredsLeft[Block.first->getID()] == 0)
1691	ReadyBlocks.push_back(Block.first);
1692
1693	if (Parent->isHighLatencyBlock() &&
1694	Block.second == SIScheduleBlockLinkKind::Data)
1695	LastPosHighLatencyParentScheduled[Block.first->getID()] = NumBlockScheduled;
1696	}
1697	}
1698
1699	void SIScheduleBlockScheduler::blockScheduled(SIScheduleBlock *Block) {
1700	decreaseLiveRegs(Block, Block->getInRegs());
1701	addLiveRegs(Block->getOutRegs());
1702	releaseBlockSuccs(Block);
1703	for (std::map<unsigned, unsigned>::iterator RegI =
1704	LiveOutRegsNumUsages[Block->getID()].begin(),
1705	E = LiveOutRegsNumUsages[Block->getID()].end(); RegI != E; ++RegI) {
1706	std::pair<unsigned, unsigned> RegP = *RegI;
1707	// We produce this register, thus it must not be previously alive.
1708	assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() \|\|(static_cast <bool> (LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() \|\| LiveRegsConsumers[RegP.first] == 0) ? void (0) : __assert_fail ("LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() \|\| LiveRegsConsumers[RegP.first] == 0" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 1709, __extension__ __PRETTY_FUNCTION__))
1709	LiveRegsConsumers[RegP.first] == 0)(static_cast <bool> (LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() \|\| LiveRegsConsumers[RegP.first] == 0) ? void (0) : __assert_fail ("LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() \|\| LiveRegsConsumers[RegP.first] == 0" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 1709, __extension__ __PRETTY_FUNCTION__));
1710	LiveRegsConsumers[RegP.first] += RegP.second;
1711	}
1712	if (LastPosHighLatencyParentScheduled[Block->getID()] >
1713	(unsigned)LastPosWaitedHighLatency)
1714	LastPosWaitedHighLatency =
1715	LastPosHighLatencyParentScheduled[Block->getID()];
1716	++NumBlockScheduled;
1717	}
1718
1719	std::vector<int>
1720	SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
1721	std::set<unsigned> &OutRegs) {
1722	std::vector<int> DiffSetPressure;
1723	DiffSetPressure.assign(DAG->getTRI()->getNumRegPressureSets(), 0);
1724
1725	for (Register Reg : InRegs) {
1726	// For now only track virtual registers.
1727	if (!Reg.isVirtual())
1728	continue;
1729	if (LiveRegsConsumers[Reg] > 1)
1730	continue;
1731	PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
1732	for (; PSetI.isValid(); ++PSetI) {
1733	DiffSetPressure[*PSetI] -= PSetI.getWeight();
1734	}
1735	}
1736
1737	for (Register Reg : OutRegs) {
1738	// For now only track virtual registers.
1739	if (!Reg.isVirtual())
1740	continue;
1741	PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
1742	for (; PSetI.isValid(); ++PSetI) {
1743	DiffSetPressure[*PSetI] += PSetI.getWeight();
1744	}
1745	}
1746
1747	return DiffSetPressure;
1748	}
1749
1750	// SIScheduler //
1751
1752	struct SIScheduleBlockResult
1753	SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
1754	SISchedulerBlockSchedulerVariant ScheduleVariant) {
1755	SIScheduleBlocks Blocks = BlockCreator.getBlocks(BlockVariant);
1756	SIScheduleBlockScheduler Scheduler(DAG, ScheduleVariant, Blocks);
1757	std::vector<SIScheduleBlock*> ScheduledBlocks;
1758	struct SIScheduleBlockResult Res;
1759
1760	ScheduledBlocks = Scheduler.getBlocks();
1761
1762	for (unsigned b = 0; b < ScheduledBlocks.size(); ++b) {
1763	SIScheduleBlock *Block = ScheduledBlocks[b];
1764	std::vector<SUnit*> SUs = Block->getScheduledUnits();
1765
1766	for (SUnit* SU : SUs)
1767	Res.SUs.push_back(SU->NodeNum);
1768	}
1769
1770	Res.MaxSGPRUsage = Scheduler.getSGPRUsage();
1771	Res.MaxVGPRUsage = Scheduler.getVGPRUsage();
1772	return Res;
1773	}
1774
1775	// SIScheduleDAGMI //
1776
1777	SIScheduleDAGMI::SIScheduleDAGMI(MachineSchedContext *C) :
1778	ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C)) {
1779	SITII = static_cast<const SIInstrInfo*>(TII);
1780	SITRI = static_cast<const SIRegisterInfo*>(TRI);
1781	}
1782
1783	SIScheduleDAGMI::~SIScheduleDAGMI() = default;
1784
1785	// Code adapted from scheduleDAG.cpp
1786	// Does a topological sort over the SUs.
1787	// Both TopDown and BottomUp
1788	void SIScheduleDAGMI::topologicalSort() {
1789	Topo.InitDAGTopologicalSorting();
1790
1791	TopDownIndex2SU = std::vector<int>(Topo.begin(), Topo.end());
1792	BottomUpIndex2SU = std::vector<int>(Topo.rbegin(), Topo.rend());
1793	}
1794
1795	// Move low latencies further from their user without
1796	// increasing SGPR usage (in general)
1797	// This is to be replaced by a better pass that would
1798	// take into account SGPR usage (based on VGPR Usage
1799	// and the corresponding wavefront count), that would
1800	// try to merge groups of loads if it make sense, etc
1801	void SIScheduleDAGMI::moveLowLatencies() {
1802	unsigned DAGSize = SUnits.size();
1803	int LastLowLatencyUser = -1;
1804	int LastLowLatencyPos = -1;
1805
1806	for (unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {
1807	SUnit *SU = &SUnits[ScheduledSUnits[i]];
1808	bool IsLowLatencyUser = false;
1809	unsigned MinPos = 0;
1810
1811	for (SDep& PredDep : SU->Preds) {
1812	SUnit *Pred = PredDep.getSUnit();
1813	if (SITII->isLowLatencyInstruction(*Pred->getInstr())) {
1814	IsLowLatencyUser = true;
1815	}
1816	if (Pred->NodeNum >= DAGSize)
1817	continue;
1818	unsigned PredPos = ScheduledSUnitsInv[Pred->NodeNum];
1819	if (PredPos >= MinPos)
1820	MinPos = PredPos + 1;
1821	}
1822
1823	if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
1824	unsigned BestPos = LastLowLatencyUser + 1;
1825	if ((int)BestPos <= LastLowLatencyPos)
1826	BestPos = LastLowLatencyPos + 1;
1827	if (BestPos < MinPos)
1828	BestPos = MinPos;
1829	if (BestPos < i) {
1830	for (unsigned u = i; u > BestPos; --u) {
1831	++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1832	ScheduledSUnits[u] = ScheduledSUnits[u-1];
1833	}
1834	ScheduledSUnits[BestPos] = SU->NodeNum;
1835	ScheduledSUnitsInv[SU->NodeNum] = BestPos;
1836	}
1837	LastLowLatencyPos = BestPos;
1838	if (IsLowLatencyUser)
1839	LastLowLatencyUser = BestPos;
1840	} else if (IsLowLatencyUser) {
1841	LastLowLatencyUser = i;
1842	// Moves COPY instructions on which depends
1843	// the low latency instructions too.
1844	} else if (SU->getInstr()->getOpcode() == AMDGPU::COPY) {
1845	bool CopyForLowLat = false;
1846	for (SDep& SuccDep : SU->Succs) {
1847	SUnit *Succ = SuccDep.getSUnit();
1848	if (SuccDep.isWeak() \|\| Succ->NodeNum >= DAGSize)
1849	continue;
1850	if (SITII->isLowLatencyInstruction(*Succ->getInstr())) {
1851	CopyForLowLat = true;
1852	}
1853	}
1854	if (!CopyForLowLat)
1855	continue;
1856	if (MinPos < i) {
1857	for (unsigned u = i; u > MinPos; --u) {
1858	++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1859	ScheduledSUnits[u] = ScheduledSUnits[u-1];
1860	}
1861	ScheduledSUnits[MinPos] = SU->NodeNum;
1862	ScheduledSUnitsInv[SU->NodeNum] = MinPos;
1863	}
1864	}
1865	}
1866	}
1867
1868	void SIScheduleDAGMI::restoreSULinksLeft() {
1869	for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1870	SUnits[i].isScheduled = false;
1871	SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;
1872	SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;
1873	SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;
1874	SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;
1875	}
1876	}
1877
1878	// Return the Vgpr and Sgpr usage corresponding to some virtual registers.
1879	template<typename _Iterator> void
1880	SIScheduleDAGMI::fillVgprSgprCost(_Iterator First, _Iterator End,
1881	unsigned &VgprUsage, unsigned &SgprUsage) {
1882	VgprUsage = 0;
1883	SgprUsage = 0;
1884	for (_Iterator RegI = First; RegI != End; ++RegI) {
1885	Register Reg = *RegI;
1886	// For now only track virtual registers
1887	if (!Reg.isVirtual())
1888	continue;
1889	PSetIterator PSetI = MRI.getPressureSets(Reg);
1890	for (; PSetI.isValid(); ++PSetI) {
1891	if (*PSetI == AMDGPU::RegisterPressureSets::VGPR_32)
1892	VgprUsage += PSetI.getWeight();
1893	else if (*PSetI == AMDGPU::RegisterPressureSets::SReg_32)
1894	SgprUsage += PSetI.getWeight();
1895	}
1896	}
1897	}
1898
1899	void SIScheduleDAGMI::schedule()
1900	{
1901	SmallVector<SUnit*, 8> TopRoots, BotRoots;
1902	SIScheduleBlockResult Best, Temp;
1903	LLVM_DEBUG(dbgs() << "Preparing Scheduling\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Preparing Scheduling\n" ; } } while (false);
1904
1905	buildDAGWithRegPressure();
1906	LLVM_DEBUG(dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dump(); } } while (false);
1907
1908	topologicalSort();
1909	findRootsAndBiasEdges(TopRoots, BotRoots);
1910	// We reuse several ScheduleDAGMI and ScheduleDAGMILive
1911	// functions, but to make them happy we must initialize
1912	// the default Scheduler implementation (even if we do not
1913	// run it)
1914	SchedImpl->initialize(this);
1915	initQueues(TopRoots, BotRoots);
1916
1917	// Fill some stats to help scheduling.
1918
1919	SUnitsLinksBackup = SUnits;
1920	IsLowLatencySU.clear();
1921	LowLatencyOffset.clear();
1922	IsHighLatencySU.clear();
1923
1924	IsLowLatencySU.resize(SUnits.size(), 0);
1925	LowLatencyOffset.resize(SUnits.size(), 0);
1926	IsHighLatencySU.resize(SUnits.size(), 0);
1927
1928	for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
1929	SUnit *SU = &SUnits[i];
1930	const MachineOperand *BaseLatOp;
1931	int64_t OffLatReg;
1932	if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
1933	IsLowLatencySU[i] = 1;
1934	bool OffsetIsScalable;
1935	if (SITII->getMemOperandWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg,
1936	OffsetIsScalable, TRI))
1937	LowLatencyOffset[i] = OffLatReg;
1938	} else if (SITII->isHighLatencyDef(SU->getInstr()->getOpcode()))
1939	IsHighLatencySU[i] = 1;
1940	}
1941
1942	SIScheduler Scheduler(this);
1943	Best = Scheduler.scheduleVariant(SISchedulerBlockCreatorVariant::LatenciesAlone,
1944	SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage);
1945
1946	// if VGPR usage is extremely high, try other good performing variants
1947	// which could lead to lower VGPR usage
1948	if (Best.MaxVGPRUsage > 180) {
1949	static const std::pair<SISchedulerBlockCreatorVariant,
1950	SISchedulerBlockSchedulerVariant>
1951	Variants[] = {
1952	{ LatenciesAlone, BlockRegUsageLatency },
1953	// { LatenciesAlone, BlockRegUsage },
1954	{ LatenciesGrouped, BlockLatencyRegUsage },
1955	// { LatenciesGrouped, BlockRegUsageLatency },
1956	// { LatenciesGrouped, BlockRegUsage },
1957	{ LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
1958	// { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
1959	// { LatenciesAlonePlusConsecutive, BlockRegUsage }
1960	};
1961	for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1962	Temp = Scheduler.scheduleVariant(v.first, v.second);
1963	if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
1964	Best = Temp;
1965	}
1966	}
1967	// if VGPR usage is still extremely high, we may spill. Try other variants
1968	// which are less performing, but that could lead to lower VGPR usage.
1969	if (Best.MaxVGPRUsage > 200) {
1970	static const std::pair<SISchedulerBlockCreatorVariant,
1971	SISchedulerBlockSchedulerVariant>
1972	Variants[] = {
1973	// { LatenciesAlone, BlockRegUsageLatency },
1974	{ LatenciesAlone, BlockRegUsage },
1975	// { LatenciesGrouped, BlockLatencyRegUsage },
1976	{ LatenciesGrouped, BlockRegUsageLatency },
1977	{ LatenciesGrouped, BlockRegUsage },
1978	// { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
1979	{ LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
1980	{ LatenciesAlonePlusConsecutive, BlockRegUsage }
1981	};
1982	for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1983	Temp = Scheduler.scheduleVariant(v.first, v.second);
1984	if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
1985	Best = Temp;
1986	}
1987	}
1988
1989	ScheduledSUnits = Best.SUs;
1990	ScheduledSUnitsInv.resize(SUnits.size());
1991
1992	for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
1993	ScheduledSUnitsInv[ScheduledSUnits[i]] = i;
1994	}
1995
1996	moveLowLatencies();
1997
1998	// Tell the outside world about the result of the scheduling.
1999
2000	assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker")(static_cast <bool> (TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker") ? void (0) : __assert_fail ("TopRPTracker.getPos() == RegionBegin && \"bad initial Top tracker\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 2000, __extension__ __PRETTY_FUNCTION__));
2001	TopRPTracker.setPos(CurrentTop);
2002
2003	for (std::vector<unsigned>::iterator I = ScheduledSUnits.begin(),
2004	E = ScheduledSUnits.end(); I != E; ++I) {
2005	SUnit SU = &SUnits[I];
2006
2007	scheduleMI(SU, true);
2008
2009	LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr(); } } while (false)
2010	<< SU->getInstr())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << SU->getInstr(); } } while (false);
2011	}
2012
2013	assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.")(static_cast <bool> (CurrentTop == CurrentBottom && "Nonempty unscheduled zone.") ? void (0) : __assert_fail ("CurrentTop == CurrentBottom && \"Nonempty unscheduled zone.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp" , 2013, __extension__ __PRETTY_FUNCTION__));
2014
2015	placeDebugValues();
2016
2017	LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { { dbgs() << "*** Final schedule for " << printMBBReference(begin()->getParent()) << " **\n"; dumpSchedule(); dbgs() << '\n'; }; } } while (false)
2018	dbgs() << "* Final schedule for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { { dbgs() << "* Final schedule for " << printMBBReference(begin()->getParent()) << " **\n"; dumpSchedule(); dbgs() << '\n'; }; } } while (false)
2019	<< printMBBReference(begin()->getParent()) << " \n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { { dbgs() << "* Final schedule for " << printMBBReference(begin()->getParent()) << " **\n"; dumpSchedule(); dbgs() << '\n'; }; } } while (false)
2020	dumpSchedule();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { { dbgs() << "*** Final schedule for " << printMBBReference(begin()->getParent()) << " **\n"; dumpSchedule(); dbgs() << '\n'; }; } } while (false)
2021	dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { { dbgs() << "*** Final schedule for " << printMBBReference(begin()->getParent()) << " **\n"; dumpSchedule(); dbgs() << '\n'; }; } } while (false)
2022	})do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("machine-scheduler")) { { dbgs() << "*** Final schedule for " << printMBBReference(begin()->getParent()) << " **\n"; dumpSchedule(); dbgs() << '\n'; }; } } while (false);
2023	}