Bug Summary

File:llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Warning:line 141, column 27
The result of the left shift is undefined due to shifting by '64', which is greater or equal to the width of type 'long long'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SIRegisterInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/build-llvm/lib/Target/AMDGPU -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/build-llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/build-llvm/lib/Target/AMDGPU -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-07-26-235520-9401-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

1//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// SI implementation of the TargetRegisterInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "SIRegisterInfo.h"
15#include "AMDGPU.h"
16#include "AMDGPURegisterBankInfo.h"
17#include "GCNSubtarget.h"
18#include "MCTargetDesc/AMDGPUInstPrinter.h"
19#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20#include "SIMachineFunctionInfo.h"
21#include "llvm/CodeGen/LiveIntervals.h"
22#include "llvm/CodeGen/MachineDominators.h"
23#include "llvm/CodeGen/RegisterScavenging.h"
24
25using namespace llvm;
26
27#define GET_REGINFO_TARGET_DESC
28#include "AMDGPUGenRegisterInfo.inc"
29
30static cl::opt<bool> EnableSpillSGPRToVGPR(
31 "amdgpu-spill-sgpr-to-vgpr",
32 cl::desc("Enable spilling VGPRs to SGPRs"),
33 cl::ReallyHidden,
34 cl::init(true));
35
36std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
37std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
38
39// Map numbers of DWORDs to indexes in SubRegFromChannelTable.
40// Valid indexes are shifted 1, such that a 0 mapping means unsupported.
41// e.g. for 8 DWORDs (256-bit), SubRegFromChannelTableWidthMap[8] = 8,
42// meaning index 7 in SubRegFromChannelTable.
43static const std::array<unsigned, 17> SubRegFromChannelTableWidthMap = {
44 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
45
46namespace llvm {
47
48// A temporary struct to spill SGPRs.
49// This is mostly to spill SGPRs to memory. Spilling SGPRs into VGPR lanes emits
50// just v_writelane and v_readlane.
51//
52// When spilling to memory, the SGPRs are written into VGPR lanes and the VGPR
53// is saved to scratch (or the other way around for loads).
54// For this, a VGPR is required where the needed lanes can be clobbered. The
55// RegScavenger can provide a VGPR where currently active lanes can be
56// clobbered, but we still need to save inactive lanes.
57// The high-level steps are:
58// - Try to scavenge SGPR(s) to save exec
59// - Try to scavenge VGPR
60// - Save needed, all or inactive lanes of a TmpVGPR
61// - Spill/Restore SGPRs using TmpVGPR
62// - Restore TmpVGPR
63//
64// To save all lanes of TmpVGPR, exec needs to be saved and modified. If we
65// cannot scavenge temporary SGPRs to save exec, we use the following code:
66// buffer_store_dword TmpVGPR ; only if active lanes need to be saved
67// s_not exec, exec
68// buffer_store_dword TmpVGPR ; save inactive lanes
69// s_not exec, exec
70struct SGPRSpillBuilder {
71 struct PerVGPRData {
72 unsigned PerVGPR;
73 unsigned NumVGPRs;
74 int64_t VGPRLanes;
75 };
76
77 // The SGPR to save
78 Register SuperReg;
79 MachineBasicBlock::iterator MI;
80 ArrayRef<int16_t> SplitParts;
81 unsigned NumSubRegs;
82 bool IsKill;
83 const DebugLoc &DL;
84
85 /* When spilling to stack */
86 // The SGPRs are written into this VGPR, which is then written to scratch
87 // (or vice versa for loads).
88 Register TmpVGPR = AMDGPU::NoRegister;
89 // Temporary spill slot to save TmpVGPR to.
90 int TmpVGPRIndex = 0;
91 // If TmpVGPR is live before the spill or if it is scavenged.
92 bool TmpVGPRLive = false;
93 // Scavenged SGPR to save EXEC.
94 Register SavedExecReg = AMDGPU::NoRegister;
95 // Stack index to write the SGPRs to.
96 int Index;
97 unsigned EltSize = 4;
98
99 RegScavenger *RS;
100 MachineBasicBlock &MBB;
101 MachineFunction &MF;
102 SIMachineFunctionInfo &MFI;
103 const SIInstrInfo &TII;
104 const SIRegisterInfo &TRI;
105 bool IsWave32;
106 Register ExecReg;
107 unsigned MovOpc;
108 unsigned NotOpc;
109
110 SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII,
111 bool IsWave32, MachineBasicBlock::iterator MI, int Index,
112 RegScavenger *RS)
113 : SuperReg(MI->getOperand(0).getReg()), MI(MI),
114 IsKill(MI->getOperand(0).isKill()), DL(MI->getDebugLoc()), Index(Index),
115 RS(RS), MBB(*MI->getParent()), MF(*MBB.getParent()),
116 MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
117 IsWave32(IsWave32) {
118 const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg);
119 SplitParts = TRI.getRegSplitParts(RC, EltSize);
120 NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
121
122 if (IsWave32) {
123 ExecReg = AMDGPU::EXEC_LO;
124 MovOpc = AMDGPU::S_MOV_B32;
125 NotOpc = AMDGPU::S_NOT_B32;
126 } else {
127 ExecReg = AMDGPU::EXEC;
128 MovOpc = AMDGPU::S_MOV_B64;
129 NotOpc = AMDGPU::S_NOT_B64;
130 }
131
132 assert(SuperReg != AMDGPU::M0 && "m0 should never spill")(static_cast <bool> (SuperReg != AMDGPU::M0 && "m0 should never spill"
) ? void (0) : __assert_fail ("SuperReg != AMDGPU::M0 && \"m0 should never spill\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 132, __extension__ __PRETTY_FUNCTION__))
;
133 assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI &&(static_cast <bool> (SuperReg != AMDGPU::EXEC_LO &&
SuperReg != AMDGPU::EXEC_HI && SuperReg != AMDGPU::EXEC
&& "exec should never spill") ? void (0) : __assert_fail
("SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI && SuperReg != AMDGPU::EXEC && \"exec should never spill\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 134, __extension__ __PRETTY_FUNCTION__))
134 SuperReg != AMDGPU::EXEC && "exec should never spill")(static_cast <bool> (SuperReg != AMDGPU::EXEC_LO &&
SuperReg != AMDGPU::EXEC_HI && SuperReg != AMDGPU::EXEC
&& "exec should never spill") ? void (0) : __assert_fail
("SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI && SuperReg != AMDGPU::EXEC && \"exec should never spill\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 134, __extension__ __PRETTY_FUNCTION__))
;
135 }
136
137 PerVGPRData getPerVGPRData() {
138 PerVGPRData Data;
139 Data.PerVGPR = IsWave32
15.1
Field 'IsWave32' is false
15.1
Field 'IsWave32' is false
? 32 : 64
;
16
'?' condition is false
17
The value 64 is assigned to 'Data.PerVGPR'
140 Data.NumVGPRs = (NumSubRegs + (Data.PerVGPR - 1)) / Data.PerVGPR;
141 Data.VGPRLanes = (1LL << std::min(Data.PerVGPR, NumSubRegs)) - 1LL;
18
Passing value via 1st parameter '__a'
19
Calling 'min<unsigned int>'
23
Returning from 'min<unsigned int>'
24
The result of the left shift is undefined due to shifting by '64', which is greater or equal to the width of type 'long long'
142 return Data;
143 }
144
145 // Tries to scavenge SGPRs to save EXEC and a VGPR. Uses v0 if no VGPR is
146 // free.
147 // Writes these instructions if an SGPR can be scavenged:
148 // s_mov_b64 s[6:7], exec ; Save exec
149 // s_mov_b64 exec, 3 ; Wanted lanemask
150 // buffer_store_dword v1 ; Write scavenged VGPR to emergency slot
151 //
152 // Writes these instructions if no SGPR can be scavenged:
153 // buffer_store_dword v0 ; Only if no free VGPR was found
154 // s_not_b64 exec, exec
155 // buffer_store_dword v0 ; Save inactive lanes
156 // ; exec stays inverted, it is flipped back in
157 // ; restore.
158 void prepare() {
159 // Scavenged temporary VGPR to use. It must be scavenged once for any number
160 // of spilled subregs.
161 // FIXME: The liveness analysis is limited and does not tell if a register
162 // is in use in lanes that are currently inactive. We can never be sure if
163 // a register as actually in use in another lane, so we need to save all
164 // used lanes of the chosen VGPR.
165 assert(RS && "Cannot spill SGPR to memory without RegScavenger")(static_cast <bool> (RS && "Cannot spill SGPR to memory without RegScavenger"
) ? void (0) : __assert_fail ("RS && \"Cannot spill SGPR to memory without RegScavenger\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 165, __extension__ __PRETTY_FUNCTION__))
;
8
Assuming field 'RS' is non-null
9
'?' condition is true
166 TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0, false);
167
168 // Reserve temporary stack slot
169 TmpVGPRIndex = MFI.getScavengeFI(MF.getFrameInfo(), TRI);
170 if (TmpVGPR) {
10
Assuming the condition is true
11
Taking true branch
171 // Found a register that is dead in the currently active lanes, we only
172 // need to spill inactive lanes.
173 TmpVGPRLive = false;
174 } else {
175 // Pick v0 because it doesn't make a difference.
176 TmpVGPR = AMDGPU::VGPR0;
177 TmpVGPRLive = true;
178 }
179
180 // Try to scavenge SGPRs to save exec
181 assert(!SavedExecReg && "Exec is already saved, refuse to save again")(static_cast <bool> (!SavedExecReg && "Exec is already saved, refuse to save again"
) ? void (0) : __assert_fail ("!SavedExecReg && \"Exec is already saved, refuse to save again\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 181, __extension__ __PRETTY_FUNCTION__))
;
12
Assuming the condition is true
13
'?' condition is true
182 const TargetRegisterClass &RC =
183 IsWave32
13.1
Field 'IsWave32' is false
13.1
Field 'IsWave32' is false
? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
14
'?' condition is false
184 RS->setRegUsed(SuperReg);
185 SavedExecReg = RS->scavengeRegister(&RC, MI, 0, false);
186
187 int64_t VGPRLanes = getPerVGPRData().VGPRLanes;
15
Calling 'SGPRSpillBuilder::getPerVGPRData'
188
189 if (SavedExecReg) {
190 RS->setRegUsed(SavedExecReg);
191 // Set exec to needed lanes
192 BuildMI(MBB, MI, DL, TII.get(MovOpc), SavedExecReg).addReg(ExecReg);
193 auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes);
194 if (!TmpVGPRLive)
195 I.addReg(TmpVGPR, RegState::ImplicitDefine);
196 // Spill needed lanes
197 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
198 } else {
199 // Spill active lanes
200 if (TmpVGPRLive)
201 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false,
202 /*IsKill*/ false);
203 // Spill inactive lanes
204 auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
205 if (!TmpVGPRLive)
206 I.addReg(TmpVGPR, RegState::ImplicitDefine);
207 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
208 }
209 }
210
211 // Writes these instructions if an SGPR can be scavenged:
212 // buffer_load_dword v1 ; Write scavenged VGPR to emergency slot
213 // s_waitcnt vmcnt(0) ; If a free VGPR was found
214 // s_mov_b64 exec, s[6:7] ; Save exec
215 //
216 // Writes these instructions if no SGPR can be scavenged:
217 // buffer_load_dword v0 ; Restore inactive lanes
218 // s_waitcnt vmcnt(0) ; If a free VGPR was found
219 // s_not_b64 exec, exec
220 // buffer_load_dword v0 ; Only if no free VGPR was found
221 void restore() {
222 if (SavedExecReg) {
223 // Restore used lanes
224 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
225 /*IsKill*/ false);
226 // Restore exec
227 auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg)
228 .addReg(SavedExecReg, RegState::Kill);
229 // Add an implicit use of the load so it is not dead.
230 // FIXME This inserts an unnecessary waitcnt
231 if (!TmpVGPRLive) {
232 I.addReg(TmpVGPR, RegState::ImplicitKill);
233 }
234 } else {
235 // Restore inactive lanes
236 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
237 /*IsKill*/ false);
238 auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
239 if (!TmpVGPRLive) {
240 I.addReg(TmpVGPR, RegState::ImplicitKill);
241 }
242 // Restore active lanes
243 if (TmpVGPRLive)
244 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true);
245 }
246 }
247
248 // Write TmpVGPR to memory or read TmpVGPR from memory.
249 // Either using a single buffer_load/store if exec is set to the needed mask
250 // or using
251 // buffer_load
252 // s_not exec, exec
253 // buffer_load
254 // s_not exec, exec
255 void readWriteTmpVGPR(unsigned Offset, bool IsLoad) {
256 if (SavedExecReg) {
257 // Spill needed lanes
258 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
259 } else {
260 // Spill active lanes
261 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad,
262 /*IsKill*/ false);
263 // Spill inactive lanes
264 BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
265 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
266 BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
267 }
268 }
269};
270
271} // namespace llvm
272
273SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST)
274 : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour()), ST(ST),
275 SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) {
276
277 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&(static_cast <bool> (getSubRegIndexLaneMask(AMDGPU::sub0
).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU
::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask
(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger
() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
"getNumCoveredRegs() will not work with generated subreg masks!"
) ? void (0) : __assert_fail ("getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() && \"getNumCoveredRegs() will not work with generated subreg masks!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 282, __extension__ __PRETTY_FUNCTION__))
278 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&(static_cast <bool> (getSubRegIndexLaneMask(AMDGPU::sub0
).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU
::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask
(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger
() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
"getNumCoveredRegs() will not work with generated subreg masks!"
) ? void (0) : __assert_fail ("getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() && \"getNumCoveredRegs() will not work with generated subreg masks!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 282, __extension__ __PRETTY_FUNCTION__))
279 (getSubRegIndexLaneMask(AMDGPU::lo16) |(static_cast <bool> (getSubRegIndexLaneMask(AMDGPU::sub0
).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU
::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask
(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger
() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
"getNumCoveredRegs() will not work with generated subreg masks!"
) ? void (0) : __assert_fail ("getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() && \"getNumCoveredRegs() will not work with generated subreg masks!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 282, __extension__ __PRETTY_FUNCTION__))
280 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==(static_cast <bool> (getSubRegIndexLaneMask(AMDGPU::sub0
).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU
::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask
(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger
() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
"getNumCoveredRegs() will not work with generated subreg masks!"
) ? void (0) : __assert_fail ("getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() && \"getNumCoveredRegs() will not work with generated subreg masks!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 282, __extension__ __PRETTY_FUNCTION__))
281 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&(static_cast <bool> (getSubRegIndexLaneMask(AMDGPU::sub0
).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU
::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask
(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger
() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
"getNumCoveredRegs() will not work with generated subreg masks!"
) ? void (0) : __assert_fail ("getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() && \"getNumCoveredRegs() will not work with generated subreg masks!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 282, __extension__ __PRETTY_FUNCTION__))
282 "getNumCoveredRegs() will not work with generated subreg masks!")(static_cast <bool> (getSubRegIndexLaneMask(AMDGPU::sub0
).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU
::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask
(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger
() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
"getNumCoveredRegs() will not work with generated subreg masks!"
) ? void (0) : __assert_fail ("getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() && \"getNumCoveredRegs() will not work with generated subreg masks!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 282, __extension__ __PRETTY_FUNCTION__))
;
283
284 RegPressureIgnoredUnits.resize(getNumRegUnits());
285 RegPressureIgnoredUnits.set(
286 *MCRegUnitIterator(MCRegister::from(AMDGPU::M0), this));
287 for (auto Reg : AMDGPU::VGPR_HI16RegClass)
288 RegPressureIgnoredUnits.set(*MCRegUnitIterator(Reg, this));
289
290 // HACK: Until this is fully tablegen'd.
291 static llvm::once_flag InitializeRegSplitPartsFlag;
292
293 static auto InitializeRegSplitPartsOnce = [this]() {
294 for (unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) {
295 unsigned Size = getSubRegIdxSize(Idx);
296 if (Size & 31)
297 continue;
298 std::vector<int16_t> &Vec = RegSplitParts[Size / 32 - 1];
299 unsigned Pos = getSubRegIdxOffset(Idx);
300 if (Pos % Size)
301 continue;
302 Pos /= Size;
303 if (Vec.empty()) {
304 unsigned MaxNumParts = 1024 / Size; // Maximum register is 1024 bits.
305 Vec.resize(MaxNumParts);
306 }
307 Vec[Pos] = Idx;
308 }
309 };
310
311 static llvm::once_flag InitializeSubRegFromChannelTableFlag;
312
313 static auto InitializeSubRegFromChannelTableOnce = [this]() {
314 for (auto &Row : SubRegFromChannelTable)
315 Row.fill(AMDGPU::NoSubRegister);
316 for (uint16_t Idx = 1; Idx < getNumSubRegIndices(); ++Idx) {
317 unsigned Width = AMDGPUSubRegIdxRanges[Idx].Size / 32;
318 unsigned Offset = AMDGPUSubRegIdxRanges[Idx].Offset / 32;
319 assert(Width < SubRegFromChannelTableWidthMap.size())(static_cast <bool> (Width < SubRegFromChannelTableWidthMap
.size()) ? void (0) : __assert_fail ("Width < SubRegFromChannelTableWidthMap.size()"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 319, __extension__ __PRETTY_FUNCTION__))
;
320 Width = SubRegFromChannelTableWidthMap[Width];
321 if (Width == 0)
322 continue;
323 unsigned TableIdx = Width - 1;
324 assert(TableIdx < SubRegFromChannelTable.size())(static_cast <bool> (TableIdx < SubRegFromChannelTable
.size()) ? void (0) : __assert_fail ("TableIdx < SubRegFromChannelTable.size()"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 324, __extension__ __PRETTY_FUNCTION__))
;
325 assert(Offset < SubRegFromChannelTable[TableIdx].size())(static_cast <bool> (Offset < SubRegFromChannelTable
[TableIdx].size()) ? void (0) : __assert_fail ("Offset < SubRegFromChannelTable[TableIdx].size()"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 325, __extension__ __PRETTY_FUNCTION__))
;
326 SubRegFromChannelTable[TableIdx][Offset] = Idx;
327 }
328 };
329
330 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
331 llvm::call_once(InitializeSubRegFromChannelTableFlag,
332 InitializeSubRegFromChannelTableOnce);
333}
334
335void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved,
336 MCRegister Reg) const {
337 MCRegAliasIterator R(Reg, this, true);
338
339 for (; R.isValid(); ++R)
340 Reserved.set(*R);
341}
342
343// Forced to be here by one .inc
344const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs(
345 const MachineFunction *MF) const {
346 CallingConv::ID CC = MF->getFunction().getCallingConv();
347 switch (CC) {
348 case CallingConv::C:
349 case CallingConv::Fast:
350 case CallingConv::Cold:
351 case CallingConv::AMDGPU_Gfx:
352 return MF->getSubtarget<GCNSubtarget>().hasGFX90AInsts()
353 ? CSR_AMDGPU_HighRegs_With_AGPRs_SaveList
354 : CSR_AMDGPU_HighRegs_SaveList;
355 default: {
356 // Dummy to not crash RegisterClassInfo.
357 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
358 return &NoCalleeSavedReg;
359 }
360 }
361}
362
363const MCPhysReg *
364SIRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const {
365 return nullptr;
366}
367
368const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
369 CallingConv::ID CC) const {
370 switch (CC) {
371 case CallingConv::C:
372 case CallingConv::Fast:
373 case CallingConv::Cold:
374 case CallingConv::AMDGPU_Gfx:
375 return MF.getSubtarget<GCNSubtarget>().hasGFX90AInsts()
376 ? CSR_AMDGPU_HighRegs_With_AGPRs_RegMask
377 : CSR_AMDGPU_HighRegs_RegMask;
378 default:
379 return nullptr;
380 }
381}
382
383const uint32_t *SIRegisterInfo::getNoPreservedMask() const {
384 return CSR_AMDGPU_NoRegs_RegMask;
385}
386
387Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
388 const SIFrameLowering *TFI =
389 MF.getSubtarget<GCNSubtarget>().getFrameLowering();
390 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
391 // During ISel lowering we always reserve the stack pointer in entry
392 // functions, but never actually want to reference it when accessing our own
393 // frame. If we need a frame pointer we use it, but otherwise we can just use
394 // an immediate "0" which we represent by returning NoRegister.
395 if (FuncInfo->isEntryFunction()) {
396 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg() : Register();
397 }
398 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg()
399 : FuncInfo->getStackPtrOffsetReg();
400}
401
402bool SIRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
403 // When we need stack realignment, we can't reference off of the
404 // stack pointer, so we reserve a base pointer.
405 const MachineFrameInfo &MFI = MF.getFrameInfo();
406 return MFI.getNumFixedObjects() && shouldRealignStack(MF);
407}
408
409Register SIRegisterInfo::getBaseRegister() const { return AMDGPU::SGPR34; }
410
411const uint32_t *SIRegisterInfo::getAllVGPRRegMask() const {
412 return CSR_AMDGPU_AllVGPRs_RegMask;
413}
414
415const uint32_t *SIRegisterInfo::getAllAGPRRegMask() const {
416 return CSR_AMDGPU_AllAGPRs_RegMask;
417}
418
419const uint32_t *SIRegisterInfo::getAllVectorRegMask() const {
420 return CSR_AMDGPU_AllVectorRegs_RegMask;
421}
422
423const uint32_t *SIRegisterInfo::getAllAllocatableSRegMask() const {
424 return CSR_AMDGPU_AllAllocatableSRegs_RegMask;
425}
426
427unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel,
428 unsigned NumRegs) {
429 assert(NumRegs < SubRegFromChannelTableWidthMap.size())(static_cast <bool> (NumRegs < SubRegFromChannelTableWidthMap
.size()) ? void (0) : __assert_fail ("NumRegs < SubRegFromChannelTableWidthMap.size()"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 429, __extension__ __PRETTY_FUNCTION__))
;
430 unsigned NumRegIndex = SubRegFromChannelTableWidthMap[NumRegs];
431 assert(NumRegIndex && "Not implemented")(static_cast <bool> (NumRegIndex && "Not implemented"
) ? void (0) : __assert_fail ("NumRegIndex && \"Not implemented\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 431, __extension__ __PRETTY_FUNCTION__))
;
432 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].size())(static_cast <bool> (Channel < SubRegFromChannelTable
[NumRegIndex - 1].size()) ? void (0) : __assert_fail ("Channel < SubRegFromChannelTable[NumRegIndex - 1].size()"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 432, __extension__ __PRETTY_FUNCTION__))
;
433 return SubRegFromChannelTable[NumRegIndex - 1][Channel];
434}
435
436MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg(
437 const MachineFunction &MF) const {
438 unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
439 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
440 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass);
441}
442
443BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
444 BitVector Reserved(getNumRegs());
445 Reserved.set(AMDGPU::MODE);
446
447 // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
448 // this seems likely to result in bugs, so I'm marking them as reserved.
449 reserveRegisterTuples(Reserved, AMDGPU::EXEC);
450 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
451
452 // M0 has to be reserved so that llvm accepts it as a live-in into a block.
453 reserveRegisterTuples(Reserved, AMDGPU::M0);
454
455 // Reserve src_vccz, src_execz, src_scc.
456 reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
457 reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
458 reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
459
460 // Reserve the memory aperture registers.
461 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
462 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
463 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
464 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
465
466 // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
467 reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
468
469 // Reserve xnack_mask registers - support is not implemented in Codegen.
470 reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
471
472 // Reserve lds_direct register - support is not implemented in Codegen.
473 reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
474
475 // Reserve Trap Handler registers - support is not implemented in Codegen.
476 reserveRegisterTuples(Reserved, AMDGPU::TBA);
477 reserveRegisterTuples(Reserved, AMDGPU::TMA);
478 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
479 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
480 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
481 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
482 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
483 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
484 reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
485 reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
486
487 // Reserve null register - it shall never be allocated
488 reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL);
489
490 // Disallow vcc_hi allocation in wave32. It may be allocated but most likely
491 // will result in bugs.
492 if (isWave32) {
493 Reserved.set(AMDGPU::VCC);
494 Reserved.set(AMDGPU::VCC_HI);
495 }
496
497 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
498 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
499 for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
500 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
501 reserveRegisterTuples(Reserved, Reg);
502 }
503
504 unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
505 // TODO: In an entry function without calls and AGPRs used it is possible
506 // to use the whole register budget for VGPRs. Even more it shall
507 // be possible to estimate maximum AGPR/VGPR pressure and split
508 // register file accordingly.
509 if (ST.hasGFX90AInsts())
510 MaxNumVGPRs /= 2;
511 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
512 for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
513 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
514 reserveRegisterTuples(Reserved, Reg);
515 Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
516 reserveRegisterTuples(Reserved, Reg);
517 }
518
519 for (auto Reg : AMDGPU::SReg_32RegClass) {
520 Reserved.set(getSubReg(Reg, AMDGPU::hi16));
521 Register Low = getSubReg(Reg, AMDGPU::lo16);
522 // This is to prevent BB vcc liveness errors.
523 if (!AMDGPU::SGPR_LO16RegClass.contains(Low))
524 Reserved.set(Low);
525 }
526
527 for (auto Reg : AMDGPU::AGPR_32RegClass) {
528 Reserved.set(getSubReg(Reg, AMDGPU::hi16));
529 }
530
531 // Reserve all the rest AGPRs if there are no instructions to use it.
532 if (!ST.hasMAIInsts()) {
533 for (unsigned i = 0; i < MaxNumVGPRs; ++i) {
534 unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
535 reserveRegisterTuples(Reserved, Reg);
536 }
537 }
538
539 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
540
541 Register ScratchRSrcReg = MFI->getScratchRSrcReg();
542 if (ScratchRSrcReg != AMDGPU::NoRegister) {
543 // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
544 // to spill.
545 // TODO: May need to reserve a VGPR if doing LDS spilling.
546 reserveRegisterTuples(Reserved, ScratchRSrcReg);
547 }
548
549 // We have to assume the SP is needed in case there are calls in the function,
550 // which is detected after the function is lowered. If we aren't really going
551 // to need SP, don't bother reserving it.
552 MCRegister StackPtrReg = MFI->getStackPtrOffsetReg();
553
554 if (StackPtrReg) {
555 reserveRegisterTuples(Reserved, StackPtrReg);
556 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg))(static_cast <bool> (!isSubRegister(ScratchRSrcReg, StackPtrReg
)) ? void (0) : __assert_fail ("!isSubRegister(ScratchRSrcReg, StackPtrReg)"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 556, __extension__ __PRETTY_FUNCTION__))
;
557 }
558
559 MCRegister FrameReg = MFI->getFrameOffsetReg();
560 if (FrameReg) {
561 reserveRegisterTuples(Reserved, FrameReg);
562 assert(!isSubRegister(ScratchRSrcReg, FrameReg))(static_cast <bool> (!isSubRegister(ScratchRSrcReg, FrameReg
)) ? void (0) : __assert_fail ("!isSubRegister(ScratchRSrcReg, FrameReg)"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 562, __extension__ __PRETTY_FUNCTION__))
;
563 }
564
565 if (hasBasePointer(MF)) {
566 MCRegister BasePtrReg = getBaseRegister();
567 reserveRegisterTuples(Reserved, BasePtrReg);
568 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg))(static_cast <bool> (!isSubRegister(ScratchRSrcReg, BasePtrReg
)) ? void (0) : __assert_fail ("!isSubRegister(ScratchRSrcReg, BasePtrReg)"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 568, __extension__ __PRETTY_FUNCTION__))
;
569 }
570
571 for (auto Reg : MFI->WWMReservedRegs) {
572 reserveRegisterTuples(Reserved, Reg.first);
573 }
574
575 // Reserve VGPRs used for SGPR spilling.
576 // Note we treat freezeReservedRegs unusually because we run register
577 // allocation in two phases. It's OK to re-freeze with new registers for the
578 // second run.
579#if 0
580 for (auto &SpilledFI : MFI->sgpr_spill_vgprs()) {
581 for (auto &SpilledVGPR : SpilledFI.second)
582 reserveRegisterTuples(Reserved, SpilledVGPR.VGPR);
583 }
584#endif
585
586 // FIXME: Stop using reserved registers for this.
587 for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
588 reserveRegisterTuples(Reserved, Reg);
589
590 for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
591 reserveRegisterTuples(Reserved, Reg);
592
593 for (auto SSpill : MFI->getSGPRSpillVGPRs())
594 reserveRegisterTuples(Reserved, SSpill.VGPR);
595
596 return Reserved;
597}
598
599bool SIRegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
600 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
601 // On entry, the base address is 0, so it can't possibly need any more
602 // alignment.
603
604 // FIXME: Should be able to specify the entry frame alignment per calling
605 // convention instead.
606 if (Info->isEntryFunction())
607 return false;
608
609 return TargetRegisterInfo::shouldRealignStack(MF);
610}
611
612bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
613 const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
614 if (Info->isEntryFunction()) {
615 const MachineFrameInfo &MFI = Fn.getFrameInfo();
616 return MFI.hasStackObjects() || MFI.hasCalls();
617 }
618
619 // May need scavenger for dealing with callee saved registers.
620 return true;
621}
622
623bool SIRegisterInfo::requiresFrameIndexScavenging(
624 const MachineFunction &MF) const {
625 // Do not use frame virtual registers. They used to be used for SGPRs, but
626 // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the
627 // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a
628 // spill.
629 return false;
630}
631
632bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
633 const MachineFunction &MF) const {
634 const MachineFrameInfo &MFI = MF.getFrameInfo();
635 return MFI.hasStackObjects();
636}
637
638bool SIRegisterInfo::requiresVirtualBaseRegisters(
639 const MachineFunction &) const {
640 // There are no special dedicated stack or frame pointers.
641 return true;
642}
643
644int64_t SIRegisterInfo::getScratchInstrOffset(const MachineInstr *MI) const {
645 assert(SIInstrInfo::isMUBUF(*MI) || SIInstrInfo::isFLATScratch(*MI))(static_cast <bool> (SIInstrInfo::isMUBUF(*MI) || SIInstrInfo
::isFLATScratch(*MI)) ? void (0) : __assert_fail ("SIInstrInfo::isMUBUF(*MI) || SIInstrInfo::isFLATScratch(*MI)"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 645, __extension__ __PRETTY_FUNCTION__))
;
646
647 int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
648 AMDGPU::OpName::offset);
649 return MI->getOperand(OffIdx).getImm();
650}
651
652int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
653 int Idx) const {
654 if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI))
655 return 0;
656
657 assert((Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),(static_cast <bool> ((Idx == AMDGPU::getNamedOperandIdx
(MI->getOpcode(), AMDGPU::OpName::vaddr) || (Idx == AMDGPU
::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr
))) && "Should never see frame index on non-address operand"
) ? void (0) : __assert_fail ("(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr) || (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr))) && \"Should never see frame index on non-address operand\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 661, __extension__ __PRETTY_FUNCTION__))
658 AMDGPU::OpName::vaddr) ||(static_cast <bool> ((Idx == AMDGPU::getNamedOperandIdx
(MI->getOpcode(), AMDGPU::OpName::vaddr) || (Idx == AMDGPU
::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr
))) && "Should never see frame index on non-address operand"
) ? void (0) : __assert_fail ("(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr) || (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr))) && \"Should never see frame index on non-address operand\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 661, __extension__ __PRETTY_FUNCTION__))
659 (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),(static_cast <bool> ((Idx == AMDGPU::getNamedOperandIdx
(MI->getOpcode(), AMDGPU::OpName::vaddr) || (Idx == AMDGPU
::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr
))) && "Should never see frame index on non-address operand"
) ? void (0) : __assert_fail ("(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr) || (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr))) && \"Should never see frame index on non-address operand\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 661, __extension__ __PRETTY_FUNCTION__))
660 AMDGPU::OpName::saddr))) &&(static_cast <bool> ((Idx == AMDGPU::getNamedOperandIdx
(MI->getOpcode(), AMDGPU::OpName::vaddr) || (Idx == AMDGPU
::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr
))) && "Should never see frame index on non-address operand"
) ? void (0) : __assert_fail ("(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr) || (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr))) && \"Should never see frame index on non-address operand\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 661, __extension__ __PRETTY_FUNCTION__))
661 "Should never see frame index on non-address operand")(static_cast <bool> ((Idx == AMDGPU::getNamedOperandIdx
(MI->getOpcode(), AMDGPU::OpName::vaddr) || (Idx == AMDGPU
::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr
))) && "Should never see frame index on non-address operand"
) ? void (0) : __assert_fail ("(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr) || (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr))) && \"Should never see frame index on non-address operand\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 661, __extension__ __PRETTY_FUNCTION__))
;
662
663 return getScratchInstrOffset(MI);
664}
665
666bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
667 if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI))
668 return false;
669
670 int64_t FullOffset = Offset + getScratchInstrOffset(MI);
671
672 if (SIInstrInfo::isMUBUF(*MI))
673 return !SIInstrInfo::isLegalMUBUFImmOffset(FullOffset);
674
675 const SIInstrInfo *TII = ST.getInstrInfo();
676 return !TII->isLegalFLATOffset(FullOffset, AMDGPUAS::PRIVATE_ADDRESS,
677 SIInstrFlags::FlatScratch);
678}
679
680Register SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
681 int FrameIdx,
682 int64_t Offset) const {
683 MachineBasicBlock::iterator Ins = MBB->begin();
684 DebugLoc DL; // Defaults to "unknown"
685
686 if (Ins != MBB->end())
687 DL = Ins->getDebugLoc();
688
689 MachineFunction *MF = MBB->getParent();
690 const SIInstrInfo *TII = ST.getInstrInfo();
691 MachineRegisterInfo &MRI = MF->getRegInfo();
692 unsigned MovOpc = ST.enableFlatScratch() ? AMDGPU::S_MOV_B32
693 : AMDGPU::V_MOV_B32_e32;
694
695 Register BaseReg = MRI.createVirtualRegister(
696 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XEXEC_HIRegClass
697 : &AMDGPU::VGPR_32RegClass);
698
699 if (Offset == 0) {
700 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), BaseReg)
701 .addFrameIndex(FrameIdx);
702 return BaseReg;
703 }
704
705 Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
706
707 Register FIReg = MRI.createVirtualRegister(
708 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XM0RegClass
709 : &AMDGPU::VGPR_32RegClass);
710
711 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
712 .addImm(Offset);
713 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), FIReg)
714 .addFrameIndex(FrameIdx);
715
716 if (ST.enableFlatScratch() ) {
717 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_I32), BaseReg)
718 .addReg(OffsetReg, RegState::Kill)
719 .addReg(FIReg);
720 return BaseReg;
721 }
722
723 TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
724 .addReg(OffsetReg, RegState::Kill)
725 .addReg(FIReg)
726 .addImm(0); // clamp bit
727
728 return BaseReg;
729}
730
731void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
732 int64_t Offset) const {
733 const SIInstrInfo *TII = ST.getInstrInfo();
734 bool IsFlat = TII->isFLATScratch(MI);
735
736#ifndef NDEBUG
737 // FIXME: Is it possible to be storing a frame index to itself?
738 bool SeenFI = false;
739 for (const MachineOperand &MO: MI.operands()) {
740 if (MO.isFI()) {
741 if (SeenFI)
742 llvm_unreachable("should not see multiple frame indices")::llvm::llvm_unreachable_internal("should not see multiple frame indices"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 742)
;
743
744 SeenFI = true;
745 }
746 }
747#endif
748
749 MachineOperand *FIOp =
750 TII->getNamedOperand(MI, IsFlat ? AMDGPU::OpName::saddr
751 : AMDGPU::OpName::vaddr);
752
753 MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
754 int64_t NewOffset = OffsetOp->getImm() + Offset;
755
756 assert(FIOp && FIOp->isFI() && "frame index must be address operand")(static_cast <bool> (FIOp && FIOp->isFI() &&
"frame index must be address operand") ? void (0) : __assert_fail
("FIOp && FIOp->isFI() && \"frame index must be address operand\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 756, __extension__ __PRETTY_FUNCTION__))
;
757 assert(TII->isMUBUF(MI) || TII->isFLATScratch(MI))(static_cast <bool> (TII->isMUBUF(MI) || TII->isFLATScratch
(MI)) ? void (0) : __assert_fail ("TII->isMUBUF(MI) || TII->isFLATScratch(MI)"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 757, __extension__ __PRETTY_FUNCTION__))
;
758
759 if (IsFlat) {
760 assert(TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,(static_cast <bool> (TII->isLegalFLATOffset(NewOffset
, AMDGPUAS::PRIVATE_ADDRESS, SIInstrFlags::FlatScratch) &&
"offset should be legal") ? void (0) : __assert_fail ("TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, SIInstrFlags::FlatScratch) && \"offset should be legal\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 762, __extension__ __PRETTY_FUNCTION__))
761 SIInstrFlags::FlatScratch) &&(static_cast <bool> (TII->isLegalFLATOffset(NewOffset
, AMDGPUAS::PRIVATE_ADDRESS, SIInstrFlags::FlatScratch) &&
"offset should be legal") ? void (0) : __assert_fail ("TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, SIInstrFlags::FlatScratch) && \"offset should be legal\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 762, __extension__ __PRETTY_FUNCTION__))
762 "offset should be legal")(static_cast <bool> (TII->isLegalFLATOffset(NewOffset
, AMDGPUAS::PRIVATE_ADDRESS, SIInstrFlags::FlatScratch) &&
"offset should be legal") ? void (0) : __assert_fail ("TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, SIInstrFlags::FlatScratch) && \"offset should be legal\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 762, __extension__ __PRETTY_FUNCTION__))
;
763 FIOp->ChangeToRegister(BaseReg, false);
764 OffsetOp->setImm(NewOffset);
765 return;
766 }
767
768#ifndef NDEBUG
769 MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
770 assert(SOffset->isImm() && SOffset->getImm() == 0)(static_cast <bool> (SOffset->isImm() && SOffset
->getImm() == 0) ? void (0) : __assert_fail ("SOffset->isImm() && SOffset->getImm() == 0"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 770, __extension__ __PRETTY_FUNCTION__))
;
771#endif
772
773 assert(SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) &&(static_cast <bool> (SIInstrInfo::isLegalMUBUFImmOffset
(NewOffset) && "offset should be legal") ? void (0) :
__assert_fail ("SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) && \"offset should be legal\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 774, __extension__ __PRETTY_FUNCTION__))
774 "offset should be legal")(static_cast <bool> (SIInstrInfo::isLegalMUBUFImmOffset
(NewOffset) && "offset should be legal") ? void (0) :
__assert_fail ("SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) && \"offset should be legal\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 774, __extension__ __PRETTY_FUNCTION__))
;
775
776 FIOp->ChangeToRegister(BaseReg, false);
777 OffsetOp->setImm(NewOffset);
778}
779
780bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
781 Register BaseReg,
782 int64_t Offset) const {
783 if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI))
784 return false;
785
786 int64_t NewOffset = Offset + getScratchInstrOffset(MI);
787
788 if (SIInstrInfo::isMUBUF(*MI))
789 return SIInstrInfo::isLegalMUBUFImmOffset(NewOffset);
790
791 const SIInstrInfo *TII = ST.getInstrInfo();
792 return TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
793 SIInstrFlags::FlatScratch);
794}
795
796const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
797 const MachineFunction &MF, unsigned Kind) const {
798 // This is inaccurate. It depends on the instruction and address space. The
799 // only place where we should hit this is for dealing with frame indexes /
800 // private accesses, so this is correct in that case.
801 return &AMDGPU::VGPR_32RegClass;
802}
803
804static unsigned getNumSubRegsForSpillOp(unsigned Op) {
805
806 switch (Op) {
807 case AMDGPU::SI_SPILL_S1024_SAVE:
808 case AMDGPU::SI_SPILL_S1024_RESTORE:
809 case AMDGPU::SI_SPILL_V1024_SAVE:
810 case AMDGPU::SI_SPILL_V1024_RESTORE:
811 case AMDGPU::SI_SPILL_A1024_SAVE:
812 case AMDGPU::SI_SPILL_A1024_RESTORE:
813 return 32;
814 case AMDGPU::SI_SPILL_S512_SAVE:
815 case AMDGPU::SI_SPILL_S512_RESTORE:
816 case AMDGPU::SI_SPILL_V512_SAVE:
817 case AMDGPU::SI_SPILL_V512_RESTORE:
818 case AMDGPU::SI_SPILL_A512_SAVE:
819 case AMDGPU::SI_SPILL_A512_RESTORE:
820 return 16;
821 case AMDGPU::SI_SPILL_S256_SAVE:
822 case AMDGPU::SI_SPILL_S256_RESTORE:
823 case AMDGPU::SI_SPILL_V256_SAVE:
824 case AMDGPU::SI_SPILL_V256_RESTORE:
825 case AMDGPU::SI_SPILL_A256_SAVE:
826 case AMDGPU::SI_SPILL_A256_RESTORE:
827 return 8;
828 case AMDGPU::SI_SPILL_S224_SAVE:
829 case AMDGPU::SI_SPILL_S224_RESTORE:
830 case AMDGPU::SI_SPILL_V224_SAVE:
831 case AMDGPU::SI_SPILL_V224_RESTORE:
832 case AMDGPU::SI_SPILL_A224_SAVE:
833 case AMDGPU::SI_SPILL_A224_RESTORE:
834 return 7;
835 case AMDGPU::SI_SPILL_S192_SAVE:
836 case AMDGPU::SI_SPILL_S192_RESTORE:
837 case AMDGPU::SI_SPILL_V192_SAVE:
838 case AMDGPU::SI_SPILL_V192_RESTORE:
839 case AMDGPU::SI_SPILL_A192_SAVE:
840 case AMDGPU::SI_SPILL_A192_RESTORE:
841 return 6;
842 case AMDGPU::SI_SPILL_S160_SAVE:
843 case AMDGPU::SI_SPILL_S160_RESTORE:
844 case AMDGPU::SI_SPILL_V160_SAVE:
845 case AMDGPU::SI_SPILL_V160_RESTORE:
846 case AMDGPU::SI_SPILL_A160_SAVE:
847 case AMDGPU::SI_SPILL_A160_RESTORE:
848 return 5;
849 case AMDGPU::SI_SPILL_S128_SAVE:
850 case AMDGPU::SI_SPILL_S128_RESTORE:
851 case AMDGPU::SI_SPILL_V128_SAVE:
852 case AMDGPU::SI_SPILL_V128_RESTORE:
853 case AMDGPU::SI_SPILL_A128_SAVE:
854 case AMDGPU::SI_SPILL_A128_RESTORE:
855 return 4;
856 case AMDGPU::SI_SPILL_S96_SAVE:
857 case AMDGPU::SI_SPILL_S96_RESTORE:
858 case AMDGPU::SI_SPILL_V96_SAVE:
859 case AMDGPU::SI_SPILL_V96_RESTORE:
860 case AMDGPU::SI_SPILL_A96_SAVE:
861 case AMDGPU::SI_SPILL_A96_RESTORE:
862 return 3;
863 case AMDGPU::SI_SPILL_S64_SAVE:
864 case AMDGPU::SI_SPILL_S64_RESTORE:
865 case AMDGPU::SI_SPILL_V64_SAVE:
866 case AMDGPU::SI_SPILL_V64_RESTORE:
867 case AMDGPU::SI_SPILL_A64_SAVE:
868 case AMDGPU::SI_SPILL_A64_RESTORE:
869 return 2;
870 case AMDGPU::SI_SPILL_S32_SAVE:
871 case AMDGPU::SI_SPILL_S32_RESTORE:
872 case AMDGPU::SI_SPILL_V32_SAVE:
873 case AMDGPU::SI_SPILL_V32_RESTORE:
874 case AMDGPU::SI_SPILL_A32_SAVE:
875 case AMDGPU::SI_SPILL_A32_RESTORE:
876 return 1;
877 default: llvm_unreachable("Invalid spill opcode")::llvm::llvm_unreachable_internal("Invalid spill opcode", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 877)
;
878 }
879}
880
881static int getOffsetMUBUFStore(unsigned Opc) {
882 switch (Opc) {
883 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
884 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
885 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
886 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
887 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
888 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
889 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
890 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
891 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
892 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
893 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
894 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
895 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
896 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
897 default:
898 return -1;
899 }
900}
901
902static int getOffsetMUBUFLoad(unsigned Opc) {
903 switch (Opc) {
904 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
905 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
906 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
907 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
908 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
909 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
910 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
911 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
912 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
913 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
914 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
915 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
916 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
917 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
918 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
919 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
920 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
921 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
922 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
923 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
924 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
925 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
926 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
927 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
928 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
929 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
930 default:
931 return -1;
932 }
933}
934
935static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST,
936 MachineBasicBlock &MBB,
937 MachineBasicBlock::iterator MI,
938 int Index, unsigned Lane,
939 unsigned ValueReg, bool IsKill) {
940 MachineFunction *MF = MBB.getParent();
941 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
942 const SIInstrInfo *TII = ST.getInstrInfo();
943
944 MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane);
945
946 if (Reg == AMDGPU::NoRegister)
947 return MachineInstrBuilder();
948
949 bool IsStore = MI->mayStore();
950 MachineRegisterInfo &MRI = MF->getRegInfo();
951 auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
952
953 unsigned Dst = IsStore ? Reg : ValueReg;
954 unsigned Src = IsStore ? ValueReg : Reg;
955 unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
956 : AMDGPU::V_ACCVGPR_READ_B32_e64;
957
958 auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
959 .addReg(Src, getKillRegState(IsKill));
960 MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
961 return MIB;
962}
963
964// This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
965// need to handle the case where an SGPR may need to be spilled while spilling.
966static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST,
967 MachineFrameInfo &MFI,
968 MachineBasicBlock::iterator MI,
969 int Index,
970 int64_t Offset) {
971 const SIInstrInfo *TII = ST.getInstrInfo();
972 MachineBasicBlock *MBB = MI->getParent();
973 const DebugLoc &DL = MI->getDebugLoc();
974 bool IsStore = MI->mayStore();
975
976 unsigned Opc = MI->getOpcode();
977 int LoadStoreOp = IsStore ?
978 getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
979 if (LoadStoreOp == -1)
980 return false;
981
982 const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
983 if (spillVGPRtoAGPR(ST, *MBB, MI, Index, 0, Reg->getReg(), false).getInstr())
984 return true;
985
986 MachineInstrBuilder NewMI =
987 BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
988 .add(*Reg)
989 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
990 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
991 .addImm(Offset)
992 .addImm(0) // cpol
993 .addImm(0) // tfe
994 .addImm(0) // swz
995 .cloneMemRefs(*MI);
996
997 const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
998 AMDGPU::OpName::vdata_in);
999 if (VDataIn)
1000 NewMI.add(*VDataIn);
1001 return true;
1002}
1003
1004static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII,
1005 unsigned LoadStoreOp,
1006 unsigned EltSize) {
1007 bool IsStore = TII->get(LoadStoreOp).mayStore();
1008 bool UseST =
1009 AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0 &&
1010 AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::saddr) < 0;
1011
1012 switch (EltSize) {
1013 case 4:
1014 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1015 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
1016 break;
1017 case 8:
1018 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1019 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1020 break;
1021 case 12:
1022 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1023 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1024 break;
1025 case 16:
1026 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1027 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1028 break;
1029 default:
1030 llvm_unreachable("Unexpected spill load/store size!")::llvm::llvm_unreachable_internal("Unexpected spill load/store size!"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1030)
;
1031 }
1032
1033 if (UseST)
1034 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
1035
1036 return LoadStoreOp;
1037}
1038
1039void SIRegisterInfo::buildSpillLoadStore(
1040 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1041 unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
1042 MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO,
1043 RegScavenger *RS, LivePhysRegs *LiveRegs) const {
1044 assert((!RS || !LiveRegs) && "Only RS or LiveRegs can be set but not both")(static_cast <bool> ((!RS || !LiveRegs) && "Only RS or LiveRegs can be set but not both"
) ? void (0) : __assert_fail ("(!RS || !LiveRegs) && \"Only RS or LiveRegs can be set but not both\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1044, __extension__ __PRETTY_FUNCTION__))
;
1045
1046 MachineFunction *MF = MBB.getParent();
1047 const SIInstrInfo *TII = ST.getInstrInfo();
1048 const MachineFrameInfo &MFI = MF->getFrameInfo();
1049 const SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
1050
1051 const MCInstrDesc *Desc = &TII->get(LoadStoreOp);
1052 const DebugLoc &DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1053 bool IsStore = Desc->mayStore();
1054 bool IsFlat = TII->isFLATScratch(LoadStoreOp);
1055
1056 bool Scavenged = false;
1057 MCRegister SOffset = ScratchOffsetReg;
1058
1059 const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
1060 // On gfx90a+ AGPR is a regular VGPR acceptable for loads and stores.
1061 const bool IsAGPR = !ST.hasGFX90AInsts() && hasAGPRs(RC);
1062 const unsigned RegWidth = AMDGPU::getRegBitWidth(RC->getID()) / 8;
1063
1064 // Always use 4 byte operations for AGPRs because we need to scavenge
1065 // a temporary VGPR.
1066 unsigned EltSize = (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) : 4u;
1067 unsigned NumSubRegs = RegWidth / EltSize;
1068 unsigned Size = NumSubRegs * EltSize;
1069 unsigned RemSize = RegWidth - Size;
1070 unsigned NumRemSubRegs = RemSize ? 1 : 0;
1071 int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
1072 int64_t MaxOffset = Offset + Size + RemSize - EltSize;
1073 int64_t ScratchOffsetRegDelta = 0;
1074
1075 if (IsFlat && EltSize > 4) {
1076 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
1077 Desc = &TII->get(LoadStoreOp);
1078 }
1079
1080 Align Alignment = MFI.getObjectAlign(Index);
1081 const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
1082
1083 assert((IsFlat || ((Offset % EltSize) == 0)) &&(static_cast <bool> ((IsFlat || ((Offset % EltSize) == 0
)) && "unexpected VGPR spill offset") ? void (0) : __assert_fail
("(IsFlat || ((Offset % EltSize) == 0)) && \"unexpected VGPR spill offset\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1084, __extension__ __PRETTY_FUNCTION__))
1084 "unexpected VGPR spill offset")(static_cast <bool> ((IsFlat || ((Offset % EltSize) == 0
)) && "unexpected VGPR spill offset") ? void (0) : __assert_fail
("(IsFlat || ((Offset % EltSize) == 0)) && \"unexpected VGPR spill offset\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1084, __extension__ __PRETTY_FUNCTION__))
;
1085
1086 bool IsOffsetLegal =
1087 IsFlat ? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
1088 SIInstrFlags::FlatScratch)
1089 : SIInstrInfo::isLegalMUBUFImmOffset(MaxOffset);
1090 if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) {
1091 SOffset = MCRegister();
1092
1093 // We currently only support spilling VGPRs to EltSize boundaries, meaning
1094 // we can simplify the adjustment of Offset here to just scale with
1095 // WavefrontSize.
1096 if (!IsFlat)
1097 Offset *= ST.getWavefrontSize();
1098
1099 // We don't have access to the register scavenger if this function is called
1100 // during PEI::scavengeFrameVirtualRegs() so use LiveRegs in this case.
1101 if (RS) {
1102 SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false);
1103 } else if (LiveRegs) {
1104 for (MCRegister Reg : AMDGPU::SGPR_32RegClass) {
1105 if (LiveRegs->available(MF->getRegInfo(), Reg)) {
1106 SOffset = Reg;
1107 break;
1108 }
1109 }
1110 }
1111
1112 if (!SOffset) {
1113 // There are no free SGPRs, and since we are in the process of spilling
1114 // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
1115 // on SI/CI and on VI it is true until we implement spilling using scalar
1116 // stores), we have no way to free up an SGPR. Our solution here is to
1117 // add the offset directly to the ScratchOffset or StackPtrOffset
1118 // register, and then subtract the offset after the spill to return the
1119 // register to it's original value.
1120 if (!ScratchOffsetReg)
1121 ScratchOffsetReg = FuncInfo->getStackPtrOffsetReg();
1122 SOffset = ScratchOffsetReg;
1123 ScratchOffsetRegDelta = Offset;
1124 } else {
1125 Scavenged = true;
1126 }
1127
1128 if (!SOffset)
1129 report_fatal_error("could not scavenge SGPR to spill in entry function");
1130
1131 if (ScratchOffsetReg == AMDGPU::NoRegister) {
1132 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset);
1133 } else {
1134 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
1135 .addReg(ScratchOffsetReg)
1136 .addImm(Offset);
1137 }
1138
1139 Offset = 0;
1140 }
1141
1142 if (IsFlat && SOffset == AMDGPU::NoRegister) {
1143 assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0(static_cast <bool> (AMDGPU::getNamedOperandIdx(LoadStoreOp
, AMDGPU::OpName::vaddr) < 0 && "Unexpected vaddr for flat scratch with a FI operand"
) ? void (0) : __assert_fail ("AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0 && \"Unexpected vaddr for flat scratch with a FI operand\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1144, __extension__ __PRETTY_FUNCTION__))
1144 && "Unexpected vaddr for flat scratch with a FI operand")(static_cast <bool> (AMDGPU::getNamedOperandIdx(LoadStoreOp
, AMDGPU::OpName::vaddr) < 0 && "Unexpected vaddr for flat scratch with a FI operand"
) ? void (0) : __assert_fail ("AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0 && \"Unexpected vaddr for flat scratch with a FI operand\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1144, __extension__ __PRETTY_FUNCTION__))
;
1145
1146 assert(ST.hasFlatScratchSTMode())(static_cast <bool> (ST.hasFlatScratchSTMode()) ? void (
0) : __assert_fail ("ST.hasFlatScratchSTMode()", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1146, __extension__ __PRETTY_FUNCTION__))
;
1147 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
1148 Desc = &TII->get(LoadStoreOp);
1149 }
1150
1151 Register TmpReg;
1152
1153 for (unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
1154 ++i, RegOffset += EltSize) {
1155 if (i == NumSubRegs) {
1156 EltSize = RemSize;
1157 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
1158 }
1159 Desc = &TII->get(LoadStoreOp);
1160
1161 unsigned NumRegs = EltSize / 4;
1162 Register SubReg = e == 1
1163 ? ValueReg
1164 : Register(getSubReg(ValueReg,
1165 getSubRegFromChannel(RegOffset / 4, NumRegs)));
1166
1167 unsigned SOffsetRegState = 0;
1168 unsigned SrcDstRegState = getDefRegState(!IsStore);
1169 if (i + 1 == e) {
1170 SOffsetRegState |= getKillRegState(Scavenged);
1171 // The last implicit use carries the "Kill" flag.
1172 SrcDstRegState |= getKillRegState(IsKill);
1173 }
1174
1175 // Make sure the whole register is defined if there are undef components by
1176 // adding an implicit def of the super-reg on the first instruction.
1177 bool NeedSuperRegDef = e > 1 && IsStore && i == 0;
1178 bool NeedSuperRegImpOperand = e > 1;
1179
1180 unsigned Lane = RegOffset / 4;
1181 unsigned LaneE = (RegOffset + EltSize) / 4;
1182 for ( ; Lane != LaneE; ++Lane) {
1183 bool IsSubReg = e > 1 || EltSize > 4;
1184 Register Sub = IsSubReg
1185 ? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane)))
1186 : ValueReg;
1187 auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill);
1188 if (!MIB.getInstr())
1189 break;
1190 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == 0)) {
1191 MIB.addReg(ValueReg, RegState::ImplicitDefine);
1192 NeedSuperRegDef = false;
1193 }
1194 if (IsSubReg || NeedSuperRegImpOperand) {
1195 NeedSuperRegImpOperand = true;
1196 unsigned State = SrcDstRegState;
1197 if (Lane + 1 != LaneE)
1198 State &= ~RegState::Kill;
1199 MIB.addReg(ValueReg, RegState::Implicit | State);
1200 }
1201 }
1202
1203 if (Lane == LaneE) // Fully spilled into AGPRs.
1204 continue;
1205
1206 // Offset in bytes from the beginning of the ValueReg to its portion we
1207 // still need to spill. It may differ from RegOffset if a portion of
1208 // current SubReg has been already spilled into AGPRs by the loop above.
1209 unsigned RemRegOffset = Lane * 4;
1210 unsigned RemEltSize = EltSize - (RemRegOffset - RegOffset);
1211 if (RemEltSize != EltSize) { // Partially spilled to AGPRs
1212 assert(IsFlat && EltSize > 4)(static_cast <bool> (IsFlat && EltSize > 4) ?
void (0) : __assert_fail ("IsFlat && EltSize > 4"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1212, __extension__ __PRETTY_FUNCTION__))
;
1213
1214 unsigned NumRegs = RemEltSize / 4;
1215 SubReg = Register(getSubReg(ValueReg,
1216 getSubRegFromChannel(RemRegOffset / 4, NumRegs)));
1217 unsigned Opc = getFlatScratchSpillOpcode(TII, LoadStoreOp, RemEltSize);
1218 Desc = &TII->get(Opc);
1219 }
1220
1221 unsigned FinalReg = SubReg;
1222
1223 if (IsAGPR) {
1224 assert(EltSize == 4)(static_cast <bool> (EltSize == 4) ? void (0) : __assert_fail
("EltSize == 4", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1224, __extension__ __PRETTY_FUNCTION__))
;
1225
1226 if (!TmpReg) {
1227 assert(RS && "Needs to have RegScavenger to spill an AGPR!")(static_cast <bool> (RS && "Needs to have RegScavenger to spill an AGPR!"
) ? void (0) : __assert_fail ("RS && \"Needs to have RegScavenger to spill an AGPR!\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1227, __extension__ __PRETTY_FUNCTION__))
;
1228 // FIXME: change to scavengeRegisterBackwards()
1229 TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
1230 RS->setRegUsed(TmpReg);
1231 }
1232 if (IsStore) {
1233 auto AccRead = BuildMI(MBB, MI, DL,
1234 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TmpReg)
1235 .addReg(SubReg, getKillRegState(IsKill));
1236 if (NeedSuperRegDef)
1237 AccRead.addReg(ValueReg, RegState::ImplicitDefine);
1238 AccRead->setAsmPrinterFlag(MachineInstr::ReloadReuse);
1239 }
1240 SubReg = TmpReg;
1241 }
1242
1243 MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RemRegOffset);
1244 MachineMemOperand *NewMMO =
1245 MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize,
1246 commonAlignment(Alignment, RemRegOffset));
1247
1248 auto MIB =
1249 BuildMI(MBB, MI, DL, *Desc)
1250 .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill));
1251 if (!IsFlat)
1252 MIB.addReg(FuncInfo->getScratchRSrcReg());
1253
1254 if (SOffset == AMDGPU::NoRegister) {
1255 if (!IsFlat)
1256 MIB.addImm(0);
1257 } else {
1258 MIB.addReg(SOffset, SOffsetRegState);
1259 }
1260 MIB.addImm(Offset + RemRegOffset)
1261 .addImm(0); // cpol
1262 if (!IsFlat)
1263 MIB.addImm(0) // tfe
1264 .addImm(0); // swz
1265 MIB.addMemOperand(NewMMO);
1266
1267 if (!IsAGPR && NeedSuperRegDef)
1268 MIB.addReg(ValueReg, RegState::ImplicitDefine);
1269
1270 if (!IsStore && TmpReg != AMDGPU::NoRegister) {
1271 MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64),
1272 FinalReg)
1273 .addReg(TmpReg, RegState::Kill);
1274 MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
1275 }
1276
1277 if (NeedSuperRegImpOperand)
1278 MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
1279 }
1280
1281 if (ScratchOffsetRegDelta != 0) {
1282 // Subtract the offset we added to the ScratchOffset register.
1283 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
1284 .addReg(SOffset)
1285 .addImm(-ScratchOffsetRegDelta);
1286 }
1287}
1288
1289void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index,
1290 int Offset, bool IsLoad,
1291 bool IsKill) const {
1292 // Load/store VGPR
1293 MachineFrameInfo &FrameInfo = SB.MF.getFrameInfo();
1294 assert(FrameInfo.getStackID(Index) != TargetStackID::SGPRSpill)(static_cast <bool> (FrameInfo.getStackID(Index) != TargetStackID
::SGPRSpill) ? void (0) : __assert_fail ("FrameInfo.getStackID(Index) != TargetStackID::SGPRSpill"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1294, __extension__ __PRETTY_FUNCTION__))
;
1295
1296 Register FrameReg =
1297 FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(SB.MF)
1298 ? getBaseRegister()
1299 : getFrameRegister(SB.MF);
1300
1301 Align Alignment = FrameInfo.getObjectAlign(Index);
1302 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SB.MF, Index);
1303 MachineMemOperand *MMO = SB.MF.getMachineMemOperand(
1304 PtrInfo, IsLoad ? MachineMemOperand::MOLoad : MachineMemOperand::MOStore,
1305 SB.EltSize, Alignment);
1306
1307 if (IsLoad) {
1308 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
1309 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1310 buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg,
1311 Offset * SB.EltSize, MMO, SB.RS);
1312 } else {
1313 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1314 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1315 buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg,
1316 Offset * SB.EltSize, MMO, SB.RS);
1317 // This only ever adds one VGPR spill
1318 SB.MFI.addToSpilledVGPRs(1);
1319 }
1320}
1321
1322bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
1323 int Index,
1324 RegScavenger *RS,
1325 LiveIntervals *LIS,
1326 bool OnlyToVGPR) const {
1327 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
1328
1329 ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills =
1330 SB.MFI.getSGPRToVGPRSpills(Index);
1331 bool SpillToVGPR = !VGPRSpills.empty();
1332 if (OnlyToVGPR && !SpillToVGPR)
1333 return false;
1334
1335 assert(SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() &&(static_cast <bool> (SpillToVGPR || (SB.SuperReg != SB.
MFI.getStackPtrOffsetReg() && SB.SuperReg != SB.MFI.getFrameOffsetReg
())) ? void (0) : __assert_fail ("SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() && SB.SuperReg != SB.MFI.getFrameOffsetReg())"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1336, __extension__ __PRETTY_FUNCTION__))
1336 SB.SuperReg != SB.MFI.getFrameOffsetReg()))(static_cast <bool> (SpillToVGPR || (SB.SuperReg != SB.
MFI.getStackPtrOffsetReg() && SB.SuperReg != SB.MFI.getFrameOffsetReg
())) ? void (0) : __assert_fail ("SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() && SB.SuperReg != SB.MFI.getFrameOffsetReg())"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1336, __extension__ __PRETTY_FUNCTION__))
;
1337
1338 if (SpillToVGPR) {
1339 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
1340 Register SubReg =
1341 SB.NumSubRegs == 1
1342 ? SB.SuperReg
1343 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1344 SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
1345
1346 bool UseKill = SB.IsKill && i == SB.NumSubRegs - 1;
1347
1348 // Mark the "old value of vgpr" input undef only if this is the first sgpr
1349 // spill to this specific vgpr in the first basic block.
1350 auto MIB = BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
1351 Spill.VGPR)
1352 .addReg(SubReg, getKillRegState(UseKill))
1353 .addImm(Spill.Lane)
1354 .addReg(Spill.VGPR);
1355 if (LIS) {
1356 if (i == 0)
1357 LIS->ReplaceMachineInstrInMaps(*MI, *MIB);
1358 else
1359 LIS->InsertMachineInstrInMaps(*MIB);
1360 }
1361
1362 if (i == 0 && SB.NumSubRegs > 1) {
1363 // We may be spilling a super-register which is only partially defined,
1364 // and need to ensure later spills think the value is defined.
1365 MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
1366 }
1367
1368 if (SB.NumSubRegs > 1)
1369 MIB.addReg(SB.SuperReg, getKillRegState(UseKill) | RegState::Implicit);
1370
1371 // FIXME: Since this spills to another register instead of an actual
1372 // frame index, we should delete the frame index when all references to
1373 // it are fixed.
1374 }
1375 } else {
1376 SB.prepare();
1377
1378 // SubReg carries the "Kill" flag when SubReg == SB.SuperReg.
1379 unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
1380
1381 // Per VGPR helper data
1382 auto PVD = SB.getPerVGPRData();
1383
1384 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1385 unsigned TmpVGPRFlags = RegState::Undef;
1386
1387 // Write sub registers into the VGPR
1388 for (unsigned i = Offset * PVD.PerVGPR,
1389 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1390 i < e; ++i) {
1391 Register SubReg =
1392 SB.NumSubRegs == 1
1393 ? SB.SuperReg
1394 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1395
1396 MachineInstrBuilder WriteLane =
1397 BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
1398 SB.TmpVGPR)
1399 .addReg(SubReg, SubKillState)
1400 .addImm(i % PVD.PerVGPR)
1401 .addReg(SB.TmpVGPR, TmpVGPRFlags);
1402 TmpVGPRFlags = 0;
1403
1404 if (LIS) {
1405 if (i == 0)
1406 LIS->ReplaceMachineInstrInMaps(*MI, *WriteLane);
1407 else
1408 LIS->InsertMachineInstrInMaps(*WriteLane);
1409 }
1410
1411 // There could be undef components of a spilled super register.
1412 // TODO: Can we detect this and skip the spill?
1413 if (SB.NumSubRegs > 1) {
1414 // The last implicit use of the SB.SuperReg carries the "Kill" flag.
1415 unsigned SuperKillState = 0;
1416 if (i + 1 == SB.NumSubRegs)
1417 SuperKillState |= getKillRegState(SB.IsKill);
1418 WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
1419 }
1420 }
1421
1422 // Write out VGPR
1423 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ false);
1424 }
1425
1426 SB.restore();
1427 }
1428
1429 MI->eraseFromParent();
1430 SB.MFI.addToSpilledSGPRs(SB.NumSubRegs);
1431
1432 if (LIS)
1433 LIS->removeAllRegUnitsForPhysReg(SB.SuperReg);
1434
1435 return true;
1436}
1437
1438bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
1439 int Index,
1440 RegScavenger *RS,
1441 LiveIntervals *LIS,
1442 bool OnlyToVGPR) const {
1443 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
1444
1445 ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills =
1446 SB.MFI.getSGPRToVGPRSpills(Index);
1447 bool SpillToVGPR = !VGPRSpills.empty();
5
Assuming the condition is false
1448 if (OnlyToVGPR
5.1
'OnlyToVGPR' is false
5.1
'OnlyToVGPR' is false
&& !SpillToVGPR)
1449 return false;
1450
1451 if (SpillToVGPR
5.2
'SpillToVGPR' is false
5.2
'SpillToVGPR' is false
) {
6
Taking false branch
1452 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
1453 Register SubReg =
1454 SB.NumSubRegs == 1
1455 ? SB.SuperReg
1456 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1457
1458 SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
1459 auto MIB =
1460 BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
1461 .addReg(Spill.VGPR)
1462 .addImm(Spill.Lane);
1463 if (SB.NumSubRegs > 1 && i == 0)
1464 MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
1465 if (LIS) {
1466 if (i == e - 1)
1467 LIS->ReplaceMachineInstrInMaps(*MI, *MIB);
1468 else
1469 LIS->InsertMachineInstrInMaps(*MIB);
1470 }
1471
1472 }
1473 } else {
1474 SB.prepare();
7
Calling 'SGPRSpillBuilder::prepare'
1475
1476 // Per VGPR helper data
1477 auto PVD = SB.getPerVGPRData();
1478
1479 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1480 // Load in VGPR data
1481 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ true);
1482
1483 // Unpack lanes
1484 for (unsigned i = Offset * PVD.PerVGPR,
1485 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1486 i < e; ++i) {
1487 Register SubReg =
1488 SB.NumSubRegs == 1
1489 ? SB.SuperReg
1490 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1491
1492 bool LastSubReg = (i + 1 == e);
1493 auto MIB = BuildMI(SB.MBB, MI, SB.DL,
1494 SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
1495 .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
1496 .addImm(i);
1497 if (SB.NumSubRegs > 1 && i == 0)
1498 MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
1499 if (LIS) {
1500 if (i == e - 1)
1501 LIS->ReplaceMachineInstrInMaps(*MI, *MIB);
1502 else
1503 LIS->InsertMachineInstrInMaps(*MIB);
1504 }
1505 }
1506 }
1507
1508 SB.restore();
1509 }
1510
1511 MI->eraseFromParent();
1512
1513 if (LIS)
1514 LIS->removeAllRegUnitsForPhysReg(SB.SuperReg);
1515
1516 return true;
1517}
1518
1519/// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
1520/// a VGPR and the stack slot can be safely eliminated when all other users are
1521/// handled.
1522bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
1523 MachineBasicBlock::iterator MI,
1524 int FI,
1525 RegScavenger *RS,
1526 LiveIntervals *LIS) const {
1527 switch (MI->getOpcode()) {
1528 case AMDGPU::SI_SPILL_S1024_SAVE:
1529 case AMDGPU::SI_SPILL_S512_SAVE:
1530 case AMDGPU::SI_SPILL_S256_SAVE:
1531 case AMDGPU::SI_SPILL_S224_SAVE:
1532 case AMDGPU::SI_SPILL_S192_SAVE:
1533 case AMDGPU::SI_SPILL_S160_SAVE:
1534 case AMDGPU::SI_SPILL_S128_SAVE:
1535 case AMDGPU::SI_SPILL_S96_SAVE:
1536 case AMDGPU::SI_SPILL_S64_SAVE:
1537 case AMDGPU::SI_SPILL_S32_SAVE:
1538 return spillSGPR(MI, FI, RS, LIS, true);
1539 case AMDGPU::SI_SPILL_S1024_RESTORE:
1540 case AMDGPU::SI_SPILL_S512_RESTORE:
1541 case AMDGPU::SI_SPILL_S256_RESTORE:
1542 case AMDGPU::SI_SPILL_S224_RESTORE:
1543 case AMDGPU::SI_SPILL_S192_RESTORE:
1544 case AMDGPU::SI_SPILL_S160_RESTORE:
1545 case AMDGPU::SI_SPILL_S128_RESTORE:
1546 case AMDGPU::SI_SPILL_S96_RESTORE:
1547 case AMDGPU::SI_SPILL_S64_RESTORE:
1548 case AMDGPU::SI_SPILL_S32_RESTORE:
1549 return restoreSGPR(MI, FI, RS, LIS, true);
1550 default:
1551 llvm_unreachable("not an SGPR spill instruction")::llvm::llvm_unreachable_internal("not an SGPR spill instruction"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1551)
;
1552 }
1553}
1554
1555void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
1556 int SPAdj, unsigned FIOperandNum,
1557 RegScavenger *RS) const {
1558 MachineFunction *MF = MI->getParent()->getParent();
1559 MachineBasicBlock *MBB = MI->getParent();
1560 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
1561 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
1562 const SIInstrInfo *TII = ST.getInstrInfo();
1563 DebugLoc DL = MI->getDebugLoc();
1564
1565 assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?")(static_cast <bool> (SPAdj == 0 && "unhandled SP adjustment in call sequence?"
) ? void (0) : __assert_fail ("SPAdj == 0 && \"unhandled SP adjustment in call sequence?\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1565, __extension__ __PRETTY_FUNCTION__))
;
1
Assuming 'SPAdj' is equal to 0
2
'?' condition is true
1566
1567 MachineOperand &FIOp = MI->getOperand(FIOperandNum);
1568 int Index = MI->getOperand(FIOperandNum).getIndex();
1569
1570 Register FrameReg = FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(*MF)
1571 ? getBaseRegister()
1572 : getFrameRegister(*MF);
1573
1574 switch (MI->getOpcode()) {
3
Control jumps to 'case SI_SPILL_S32_RESTORE:' at line 1600
1575 // SGPR register spill
1576 case AMDGPU::SI_SPILL_S1024_SAVE:
1577 case AMDGPU::SI_SPILL_S512_SAVE:
1578 case AMDGPU::SI_SPILL_S256_SAVE:
1579 case AMDGPU::SI_SPILL_S224_SAVE:
1580 case AMDGPU::SI_SPILL_S192_SAVE:
1581 case AMDGPU::SI_SPILL_S160_SAVE:
1582 case AMDGPU::SI_SPILL_S128_SAVE:
1583 case AMDGPU::SI_SPILL_S96_SAVE:
1584 case AMDGPU::SI_SPILL_S64_SAVE:
1585 case AMDGPU::SI_SPILL_S32_SAVE: {
1586 spillSGPR(MI, Index, RS);
1587 break;
1588 }
1589
1590 // SGPR register restore
1591 case AMDGPU::SI_SPILL_S1024_RESTORE:
1592 case AMDGPU::SI_SPILL_S512_RESTORE:
1593 case AMDGPU::SI_SPILL_S256_RESTORE:
1594 case AMDGPU::SI_SPILL_S224_RESTORE:
1595 case AMDGPU::SI_SPILL_S192_RESTORE:
1596 case AMDGPU::SI_SPILL_S160_RESTORE:
1597 case AMDGPU::SI_SPILL_S128_RESTORE:
1598 case AMDGPU::SI_SPILL_S96_RESTORE:
1599 case AMDGPU::SI_SPILL_S64_RESTORE:
1600 case AMDGPU::SI_SPILL_S32_RESTORE: {
1601 restoreSGPR(MI, Index, RS);
4
Calling 'SIRegisterInfo::restoreSGPR'
1602 break;
1603 }
1604
1605 // VGPR register spill
1606 case AMDGPU::SI_SPILL_V1024_SAVE:
1607 case AMDGPU::SI_SPILL_V512_SAVE:
1608 case AMDGPU::SI_SPILL_V256_SAVE:
1609 case AMDGPU::SI_SPILL_V224_SAVE:
1610 case AMDGPU::SI_SPILL_V192_SAVE:
1611 case AMDGPU::SI_SPILL_V160_SAVE:
1612 case AMDGPU::SI_SPILL_V128_SAVE:
1613 case AMDGPU::SI_SPILL_V96_SAVE:
1614 case AMDGPU::SI_SPILL_V64_SAVE:
1615 case AMDGPU::SI_SPILL_V32_SAVE:
1616 case AMDGPU::SI_SPILL_A1024_SAVE:
1617 case AMDGPU::SI_SPILL_A512_SAVE:
1618 case AMDGPU::SI_SPILL_A256_SAVE:
1619 case AMDGPU::SI_SPILL_A224_SAVE:
1620 case AMDGPU::SI_SPILL_A192_SAVE:
1621 case AMDGPU::SI_SPILL_A160_SAVE:
1622 case AMDGPU::SI_SPILL_A128_SAVE:
1623 case AMDGPU::SI_SPILL_A96_SAVE:
1624 case AMDGPU::SI_SPILL_A64_SAVE:
1625 case AMDGPU::SI_SPILL_A32_SAVE: {
1626 const MachineOperand *VData = TII->getNamedOperand(*MI,
1627 AMDGPU::OpName::vdata);
1628 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==(static_cast <bool> (TII->getNamedOperand(*MI, AMDGPU
::OpName::soffset)->getReg() == MFI->getStackPtrOffsetReg
()) ? void (0) : __assert_fail ("TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == MFI->getStackPtrOffsetReg()"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1629, __extension__ __PRETTY_FUNCTION__))
1629 MFI->getStackPtrOffsetReg())(static_cast <bool> (TII->getNamedOperand(*MI, AMDGPU
::OpName::soffset)->getReg() == MFI->getStackPtrOffsetReg
()) ? void (0) : __assert_fail ("TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == MFI->getStackPtrOffsetReg()"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1629, __extension__ __PRETTY_FUNCTION__))
;
1630
1631 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1632 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1633 auto *MBB = MI->getParent();
1634 buildSpillLoadStore(
1635 *MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
1636 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1637 *MI->memoperands_begin(), RS);
1638 MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1639 MI->eraseFromParent();
1640 break;
1641 }
1642 case AMDGPU::SI_SPILL_V32_RESTORE:
1643 case AMDGPU::SI_SPILL_V64_RESTORE:
1644 case AMDGPU::SI_SPILL_V96_RESTORE:
1645 case AMDGPU::SI_SPILL_V128_RESTORE:
1646 case AMDGPU::SI_SPILL_V160_RESTORE:
1647 case AMDGPU::SI_SPILL_V192_RESTORE:
1648 case AMDGPU::SI_SPILL_V224_RESTORE:
1649 case AMDGPU::SI_SPILL_V256_RESTORE:
1650 case AMDGPU::SI_SPILL_V512_RESTORE:
1651 case AMDGPU::SI_SPILL_V1024_RESTORE:
1652 case AMDGPU::SI_SPILL_A32_RESTORE:
1653 case AMDGPU::SI_SPILL_A64_RESTORE:
1654 case AMDGPU::SI_SPILL_A96_RESTORE:
1655 case AMDGPU::SI_SPILL_A128_RESTORE:
1656 case AMDGPU::SI_SPILL_A160_RESTORE:
1657 case AMDGPU::SI_SPILL_A192_RESTORE:
1658 case AMDGPU::SI_SPILL_A224_RESTORE:
1659 case AMDGPU::SI_SPILL_A256_RESTORE:
1660 case AMDGPU::SI_SPILL_A512_RESTORE:
1661 case AMDGPU::SI_SPILL_A1024_RESTORE: {
1662 const MachineOperand *VData = TII->getNamedOperand(*MI,
1663 AMDGPU::OpName::vdata);
1664 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==(static_cast <bool> (TII->getNamedOperand(*MI, AMDGPU
::OpName::soffset)->getReg() == MFI->getStackPtrOffsetReg
()) ? void (0) : __assert_fail ("TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == MFI->getStackPtrOffsetReg()"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1665, __extension__ __PRETTY_FUNCTION__))
1665 MFI->getStackPtrOffsetReg())(static_cast <bool> (TII->getNamedOperand(*MI, AMDGPU
::OpName::soffset)->getReg() == MFI->getStackPtrOffsetReg
()) ? void (0) : __assert_fail ("TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == MFI->getStackPtrOffsetReg()"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1665, __extension__ __PRETTY_FUNCTION__))
;
1666
1667 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
1668 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1669 auto *MBB = MI->getParent();
1670 buildSpillLoadStore(
1671 *MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
1672 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1673 *MI->memoperands_begin(), RS);
1674 MI->eraseFromParent();
1675 break;
1676 }
1677
1678 default: {
1679 // Other access to frame index
1680 const DebugLoc &DL = MI->getDebugLoc();
1681
1682 int64_t Offset = FrameInfo.getObjectOffset(Index);
1683 if (ST.enableFlatScratch()) {
1684 if (TII->isFLATScratch(*MI)) {
1685 assert((int16_t)FIOperandNum ==(static_cast <bool> ((int16_t)FIOperandNum == AMDGPU::getNamedOperandIdx
(MI->getOpcode(), AMDGPU::OpName::saddr)) ? void (0) : __assert_fail
("(int16_t)FIOperandNum == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr)"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1687, __extension__ __PRETTY_FUNCTION__))
1686 AMDGPU::getNamedOperandIdx(MI->getOpcode(),(static_cast <bool> ((int16_t)FIOperandNum == AMDGPU::getNamedOperandIdx
(MI->getOpcode(), AMDGPU::OpName::saddr)) ? void (0) : __assert_fail
("(int16_t)FIOperandNum == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr)"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1687, __extension__ __PRETTY_FUNCTION__))
1687 AMDGPU::OpName::saddr))(static_cast <bool> ((int16_t)FIOperandNum == AMDGPU::getNamedOperandIdx
(MI->getOpcode(), AMDGPU::OpName::saddr)) ? void (0) : __assert_fail
("(int16_t)FIOperandNum == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr)"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1687, __extension__ __PRETTY_FUNCTION__))
;
1688
1689 // The offset is always swizzled, just replace it
1690 if (FrameReg)
1691 FIOp.ChangeToRegister(FrameReg, false);
1692
1693 if (!Offset)
1694 return;
1695
1696 MachineOperand *OffsetOp =
1697 TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
1698 int64_t NewOffset = Offset + OffsetOp->getImm();
1699 if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
1700 SIInstrFlags::FlatScratch)) {
1701 OffsetOp->setImm(NewOffset);
1702 if (FrameReg)
1703 return;
1704 Offset = 0;
1705 }
1706
1707 assert(!TII->getNamedOperand(*MI, AMDGPU::OpName::vaddr) &&(static_cast <bool> (!TII->getNamedOperand(*MI, AMDGPU
::OpName::vaddr) && "Unexpected vaddr for flat scratch with a FI operand"
) ? void (0) : __assert_fail ("!TII->getNamedOperand(*MI, AMDGPU::OpName::vaddr) && \"Unexpected vaddr for flat scratch with a FI operand\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1708, __extension__ __PRETTY_FUNCTION__))
1708 "Unexpected vaddr for flat scratch with a FI operand")(static_cast <bool> (!TII->getNamedOperand(*MI, AMDGPU
::OpName::vaddr) && "Unexpected vaddr for flat scratch with a FI operand"
) ? void (0) : __assert_fail ("!TII->getNamedOperand(*MI, AMDGPU::OpName::vaddr) && \"Unexpected vaddr for flat scratch with a FI operand\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1708, __extension__ __PRETTY_FUNCTION__))
;
1709
1710 // On GFX10 we have ST mode to use no registers for an address.
1711 // Otherwise we need to materialize 0 into an SGPR.
1712 if (!Offset && ST.hasFlatScratchSTMode()) {
1713 unsigned Opc = MI->getOpcode();
1714 unsigned NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc);
1715 MI->RemoveOperand(
1716 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
1717 MI->setDesc(TII->get(NewOpc));
1718 return;
1719 }
1720 }
1721
1722 if (!FrameReg) {
1723 FIOp.ChangeToImmediate(Offset);
1724 if (TII->isImmOperandLegal(*MI, FIOperandNum, FIOp))
1725 return;
1726 }
1727
1728 // We need to use register here. Check if we can use an SGPR or need
1729 // a VGPR.
1730 FIOp.ChangeToRegister(AMDGPU::M0, false);
1731 bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, &FIOp);
1732
1733 if (!Offset && FrameReg && UseSGPR) {
1734 FIOp.setReg(FrameReg);
1735 return;
1736 }
1737
1738 const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass
1739 : &AMDGPU::VGPR_32RegClass;
1740
1741 Register TmpReg = RS->scavengeRegister(RC, MI, 0, !UseSGPR);
1742 FIOp.setReg(TmpReg);
1743 FIOp.setIsKill(true);
1744
1745 if ((!FrameReg || !Offset) && TmpReg) {
1746 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1747 auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg);
1748 if (FrameReg)
1749 MIB.addReg(FrameReg);
1750 else
1751 MIB.addImm(Offset);
1752
1753 return;
1754 }
1755
1756 Register TmpSReg =
1757 UseSGPR ? TmpReg
1758 : RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0,
1759 !UseSGPR);
1760
1761 // TODO: for flat scratch another attempt can be made with a VGPR index
1762 // if no SGPRs can be scavenged.
1763 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
1764 report_fatal_error("Cannot scavenge register in FI elimination!");
1765
1766 if (!TmpSReg) {
1767 // Use frame register and restore it after.
1768 TmpSReg = FrameReg;
1769 FIOp.setReg(FrameReg);
1770 FIOp.setIsKill(false);
1771 }
1772
1773 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
1774 .addReg(FrameReg)
1775 .addImm(Offset);
1776
1777 if (!UseSGPR)
1778 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1779 .addReg(TmpSReg, RegState::Kill);
1780
1781 if (TmpSReg == FrameReg) {
1782 // Undo frame register modification.
1783 BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
1784 FrameReg)
1785 .addReg(FrameReg)
1786 .addImm(-Offset);
1787 }
1788
1789 return;
1790 }
1791
1792 bool IsMUBUF = TII->isMUBUF(*MI);
1793
1794 if (!IsMUBUF && !MFI->isEntryFunction()) {
1795 // Convert to a swizzled stack address by scaling by the wave size.
1796 //
1797 // In an entry function/kernel the offset is already swizzled.
1798
1799 bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1800 Register ResultReg =
1801 IsCopy ? MI->getOperand(0).getReg()
1802 : RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
1803
1804 int64_t Offset = FrameInfo.getObjectOffset(Index);
1805 if (Offset == 0) {
1806 // XXX - This never happens because of emergency scavenging slot at 0?
1807 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1808 .addImm(ST.getWavefrontSizeLog2())
1809 .addReg(FrameReg);
1810 } else {
1811 if (auto MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) {
1812 // Reuse ResultReg in intermediate step.
1813 Register ScaledReg = ResultReg;
1814
1815 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
1816 ScaledReg)
1817 .addImm(ST.getWavefrontSizeLog2())
1818 .addReg(FrameReg);
1819
1820 const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
1821
1822 // TODO: Fold if use instruction is another add of a constant.
1823 if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
1824 // FIXME: This can fail
1825 MIB.addImm(Offset);
1826 MIB.addReg(ScaledReg, RegState::Kill);
1827 if (!IsVOP2)
1828 MIB.addImm(0); // clamp bit
1829 } else {
1830 assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&(static_cast <bool> (MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64
&& "Need to reuse carry out register") ? void (0) : __assert_fail
("MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 && \"Need to reuse carry out register\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1831, __extension__ __PRETTY_FUNCTION__))
1831 "Need to reuse carry out register")(static_cast <bool> (MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64
&& "Need to reuse carry out register") ? void (0) : __assert_fail
("MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 && \"Need to reuse carry out register\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1831, __extension__ __PRETTY_FUNCTION__))
;
1832
1833 // Use scavenged unused carry out as offset register.
1834 Register ConstOffsetReg;
1835 if (!isWave32)
1836 ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
1837 else
1838 ConstOffsetReg = MIB.getReg(1);
1839
1840 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1841 .addImm(Offset);
1842 MIB.addReg(ConstOffsetReg, RegState::Kill);
1843 MIB.addReg(ScaledReg, RegState::Kill);
1844 MIB.addImm(0); // clamp bit
1845 }
1846 } else {
1847 // We have to produce a carry out, and there isn't a free SGPR pair
1848 // for it. We can keep the whole computation on the SALU to avoid
1849 // clobbering an additional register at the cost of an extra mov.
1850
1851 // We may have 1 free scratch SGPR even though a carry out is
1852 // unavailable. Only one additional mov is needed.
1853 Register TmpScaledReg =
1854 RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
1855 Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
1856
1857 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
1858 .addReg(FrameReg)
1859 .addImm(ST.getWavefrontSizeLog2());
1860 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
1861 .addReg(ScaledReg, RegState::Kill)
1862 .addImm(Offset);
1863 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
1864 .addReg(ScaledReg, RegState::Kill);
1865
1866 // If there were truly no free SGPRs, we need to undo everything.
1867 if (!TmpScaledReg.isValid()) {
1868 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
1869 .addReg(ScaledReg, RegState::Kill)
1870 .addImm(-Offset);
1871 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
1872 .addReg(FrameReg)
1873 .addImm(ST.getWavefrontSizeLog2());
1874 }
1875 }
1876 }
1877
1878 // Don't introduce an extra copy if we're just materializing in a mov.
1879 if (IsCopy)
1880 MI->eraseFromParent();
1881 else
1882 FIOp.ChangeToRegister(ResultReg, false, false, true);
1883 return;
1884 }
1885
1886 if (IsMUBUF) {
1887 // Disable offen so we don't need a 0 vgpr base.
1888 assert(static_cast<int>(FIOperandNum) ==(static_cast <bool> (static_cast<int>(FIOperandNum
) == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName
::vaddr)) ? void (0) : __assert_fail ("static_cast<int>(FIOperandNum) == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr)"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1890, __extension__ __PRETTY_FUNCTION__))
1889 AMDGPU::getNamedOperandIdx(MI->getOpcode(),(static_cast <bool> (static_cast<int>(FIOperandNum
) == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName
::vaddr)) ? void (0) : __assert_fail ("static_cast<int>(FIOperandNum) == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr)"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1890, __extension__ __PRETTY_FUNCTION__))
1890 AMDGPU::OpName::vaddr))(static_cast <bool> (static_cast<int>(FIOperandNum
) == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName
::vaddr)) ? void (0) : __assert_fail ("static_cast<int>(FIOperandNum) == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr)"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1890, __extension__ __PRETTY_FUNCTION__))
;
1891
1892 auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
1893 assert((SOffset.isImm() && SOffset.getImm() == 0))(static_cast <bool> ((SOffset.isImm() && SOffset
.getImm() == 0)) ? void (0) : __assert_fail ("(SOffset.isImm() && SOffset.getImm() == 0)"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1893, __extension__ __PRETTY_FUNCTION__))
;
1894
1895 if (FrameReg != AMDGPU::NoRegister)
1896 SOffset.ChangeToRegister(FrameReg, false);
1897
1898 int64_t Offset = FrameInfo.getObjectOffset(Index);
1899 int64_t OldImm
1900 = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1901 int64_t NewOffset = OldImm + Offset;
1902
1903 if (SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) &&
1904 buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
1905 MI->eraseFromParent();
1906 return;
1907 }
1908 }
1909
1910 // If the offset is simply too big, don't convert to a scratch wave offset
1911 // relative index.
1912
1913 FIOp.ChangeToImmediate(Offset);
1914 if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1915 Register TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
1916 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1917 .addImm(Offset);
1918 FIOp.ChangeToRegister(TmpReg, false, false, true);
1919 }
1920 }
1921 }
1922}
1923
1924StringRef SIRegisterInfo::getRegAsmName(MCRegister Reg) const {
1925 return AMDGPUInstPrinter::getRegisterName(Reg);
1926}
1927
1928static const TargetRegisterClass *
1929getAnyVGPRClassForBitWidth(unsigned BitWidth) {
1930 if (BitWidth <= 64)
1931 return &AMDGPU::VReg_64RegClass;
1932 if (BitWidth <= 96)
1933 return &AMDGPU::VReg_96RegClass;
1934 if (BitWidth <= 128)
1935 return &AMDGPU::VReg_128RegClass;
1936 if (BitWidth <= 160)
1937 return &AMDGPU::VReg_160RegClass;
1938 if (BitWidth <= 192)
1939 return &AMDGPU::VReg_192RegClass;
1940 if (BitWidth <= 224)
1941 return &AMDGPU::VReg_224RegClass;
1942 if (BitWidth <= 256)
1943 return &AMDGPU::VReg_256RegClass;
1944 if (BitWidth <= 512)
1945 return &AMDGPU::VReg_512RegClass;
1946 if (BitWidth <= 1024)
1947 return &AMDGPU::VReg_1024RegClass;
1948
1949 return nullptr;
1950}
1951
1952static const TargetRegisterClass *
1953getAlignedVGPRClassForBitWidth(unsigned BitWidth) {
1954 if (BitWidth <= 64)
1955 return &AMDGPU::VReg_64_Align2RegClass;
1956 if (BitWidth <= 96)
1957 return &AMDGPU::VReg_96_Align2RegClass;
1958 if (BitWidth <= 128)
1959 return &AMDGPU::VReg_128_Align2RegClass;
1960 if (BitWidth <= 160)
1961 return &AMDGPU::VReg_160_Align2RegClass;
1962 if (BitWidth <= 192)
1963 return &AMDGPU::VReg_192_Align2RegClass;
1964 if (BitWidth <= 224)
1965 return &AMDGPU::VReg_224_Align2RegClass;
1966 if (BitWidth <= 256)
1967 return &AMDGPU::VReg_256_Align2RegClass;
1968 if (BitWidth <= 512)
1969 return &AMDGPU::VReg_512_Align2RegClass;
1970 if (BitWidth <= 1024)
1971 return &AMDGPU::VReg_1024_Align2RegClass;
1972
1973 return nullptr;
1974}
1975
1976const TargetRegisterClass *
1977SIRegisterInfo::getVGPRClassForBitWidth(unsigned BitWidth) const {
1978 if (BitWidth == 1)
1979 return &AMDGPU::VReg_1RegClass;
1980 if (BitWidth <= 16)
1981 return &AMDGPU::VGPR_LO16RegClass;
1982 if (BitWidth <= 32)
1983 return &AMDGPU::VGPR_32RegClass;
1984 return ST.needsAlignedVGPRs() ? getAlignedVGPRClassForBitWidth(BitWidth)
1985 : getAnyVGPRClassForBitWidth(BitWidth);
1986}
1987
1988static const TargetRegisterClass *
1989getAnyAGPRClassForBitWidth(unsigned BitWidth) {
1990 if (BitWidth <= 64)
1991 return &AMDGPU::AReg_64RegClass;
1992 if (BitWidth <= 96)
1993 return &AMDGPU::AReg_96RegClass;
1994 if (BitWidth <= 128)
1995 return &AMDGPU::AReg_128RegClass;
1996 if (BitWidth <= 160)
1997 return &AMDGPU::AReg_160RegClass;
1998 if (BitWidth <= 192)
1999 return &AMDGPU::AReg_192RegClass;
2000 if (BitWidth <= 224)
2001 return &AMDGPU::AReg_224RegClass;
2002 if (BitWidth <= 256)
2003 return &AMDGPU::AReg_256RegClass;
2004 if (BitWidth <= 512)
2005 return &AMDGPU::AReg_512RegClass;
2006 if (BitWidth <= 1024)
2007 return &AMDGPU::AReg_1024RegClass;
2008
2009 return nullptr;
2010}
2011
2012static const TargetRegisterClass *
2013getAlignedAGPRClassForBitWidth(unsigned BitWidth) {
2014 if (BitWidth <= 64)
2015 return &AMDGPU::AReg_64_Align2RegClass;
2016 if (BitWidth <= 96)
2017 return &AMDGPU::AReg_96_Align2RegClass;
2018 if (BitWidth <= 128)
2019 return &AMDGPU::AReg_128_Align2RegClass;
2020 if (BitWidth <= 160)
2021 return &AMDGPU::AReg_160_Align2RegClass;
2022 if (BitWidth <= 192)
2023 return &AMDGPU::AReg_192_Align2RegClass;
2024 if (BitWidth <= 224)
2025 return &AMDGPU::AReg_224_Align2RegClass;
2026 if (BitWidth <= 256)
2027 return &AMDGPU::AReg_256_Align2RegClass;
2028 if (BitWidth <= 512)
2029 return &AMDGPU::AReg_512_Align2RegClass;
2030 if (BitWidth <= 1024)
2031 return &AMDGPU::AReg_1024_Align2RegClass;
2032
2033 return nullptr;
2034}
2035
2036const TargetRegisterClass *
2037SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) const {
2038 if (BitWidth <= 16)
2039 return &AMDGPU::AGPR_LO16RegClass;
2040 if (BitWidth <= 32)
2041 return &AMDGPU::AGPR_32RegClass;
2042 return ST.needsAlignedVGPRs() ? getAlignedAGPRClassForBitWidth(BitWidth)
2043 : getAnyAGPRClassForBitWidth(BitWidth);
2044}
2045
2046const TargetRegisterClass *
2047SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) {
2048 if (BitWidth <= 16)
2049 return &AMDGPU::SGPR_LO16RegClass;
2050 if (BitWidth <= 32)
2051 return &AMDGPU::SReg_32RegClass;
2052 if (BitWidth <= 64)
2053 return &AMDGPU::SReg_64RegClass;
2054 if (BitWidth <= 96)
2055 return &AMDGPU::SGPR_96RegClass;
2056 if (BitWidth <= 128)
2057 return &AMDGPU::SGPR_128RegClass;
2058 if (BitWidth <= 160)
2059 return &AMDGPU::SGPR_160RegClass;
2060 if (BitWidth <= 192)
2061 return &AMDGPU::SGPR_192RegClass;
2062 if (BitWidth <= 224)
2063 return &AMDGPU::SGPR_224RegClass;
2064 if (BitWidth <= 256)
2065 return &AMDGPU::SGPR_256RegClass;
2066 if (BitWidth <= 512)
2067 return &AMDGPU::SGPR_512RegClass;
2068 if (BitWidth <= 1024)
2069 return &AMDGPU::SGPR_1024RegClass;
2070
2071 return nullptr;
2072}
2073
2074// FIXME: This is very slow. It might be worth creating a map from physreg to
2075// register class.
2076const TargetRegisterClass *
2077SIRegisterInfo::getPhysRegClass(MCRegister Reg) const {
2078 static const TargetRegisterClass *const BaseClasses[] = {
2079 &AMDGPU::VGPR_LO16RegClass,
2080 &AMDGPU::VGPR_HI16RegClass,
2081 &AMDGPU::SReg_LO16RegClass,
2082 &AMDGPU::AGPR_LO16RegClass,
2083 &AMDGPU::VGPR_32RegClass,
2084 &AMDGPU::SReg_32RegClass,
2085 &AMDGPU::AGPR_32RegClass,
2086 &AMDGPU::AGPR_32RegClass,
2087 &AMDGPU::VReg_64_Align2RegClass,
2088 &AMDGPU::VReg_64RegClass,
2089 &AMDGPU::SReg_64RegClass,
2090 &AMDGPU::AReg_64_Align2RegClass,
2091 &AMDGPU::AReg_64RegClass,
2092 &AMDGPU::VReg_96_Align2RegClass,
2093 &AMDGPU::VReg_96RegClass,
2094 &AMDGPU::SReg_96RegClass,
2095 &AMDGPU::AReg_96_Align2RegClass,
2096 &AMDGPU::AReg_96RegClass,
2097 &AMDGPU::VReg_128_Align2RegClass,
2098 &AMDGPU::VReg_128RegClass,
2099 &AMDGPU::SReg_128RegClass,
2100 &AMDGPU::AReg_128_Align2RegClass,
2101 &AMDGPU::AReg_128RegClass,
2102 &AMDGPU::VReg_160_Align2RegClass,
2103 &AMDGPU::VReg_160RegClass,
2104 &AMDGPU::SReg_160RegClass,
2105 &AMDGPU::AReg_160_Align2RegClass,
2106 &AMDGPU::AReg_160RegClass,
2107 &AMDGPU::VReg_192_Align2RegClass,
2108 &AMDGPU::VReg_192RegClass,
2109 &AMDGPU::SReg_192RegClass,
2110 &AMDGPU::AReg_192_Align2RegClass,
2111 &AMDGPU::AReg_192RegClass,
2112 &AMDGPU::VReg_224_Align2RegClass,
2113 &AMDGPU::VReg_224RegClass,
2114 &AMDGPU::SReg_224RegClass,
2115 &AMDGPU::AReg_224_Align2RegClass,
2116 &AMDGPU::AReg_224RegClass,
2117 &AMDGPU::VReg_256_Align2RegClass,
2118 &AMDGPU::VReg_256RegClass,
2119 &AMDGPU::SReg_256RegClass,
2120 &AMDGPU::AReg_256_Align2RegClass,
2121 &AMDGPU::AReg_256RegClass,
2122 &AMDGPU::VReg_512_Align2RegClass,
2123 &AMDGPU::VReg_512RegClass,
2124 &AMDGPU::SReg_512RegClass,
2125 &AMDGPU::AReg_512_Align2RegClass,
2126 &AMDGPU::AReg_512RegClass,
2127 &AMDGPU::SReg_1024RegClass,
2128 &AMDGPU::VReg_1024_Align2RegClass,
2129 &AMDGPU::VReg_1024RegClass,
2130 &AMDGPU::AReg_1024_Align2RegClass,
2131 &AMDGPU::AReg_1024RegClass,
2132 &AMDGPU::SCC_CLASSRegClass,
2133 &AMDGPU::Pseudo_SReg_32RegClass,
2134 &AMDGPU::Pseudo_SReg_128RegClass,
2135 };
2136
2137 for (const TargetRegisterClass *BaseClass : BaseClasses) {
2138 if (BaseClass->contains(Reg)) {
2139 return BaseClass;
2140 }
2141 }
2142 return nullptr;
2143}
2144
2145bool SIRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI,
2146 Register Reg) const {
2147 const TargetRegisterClass *RC;
2148 if (Reg.isVirtual())
2149 RC = MRI.getRegClass(Reg);
2150 else
2151 RC = getPhysRegClass(Reg);
2152 return isSGPRClass(RC);
2153}
2154
2155// TODO: It might be helpful to have some target specific flags in
2156// TargetRegisterClass to mark which classes are VGPRs to make this trivial.
2157bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
2158 unsigned Size = getRegSizeInBits(*RC);
2159 if (Size == 16) {
2160 return getCommonSubClass(&AMDGPU::VGPR_LO16RegClass, RC) != nullptr ||
2161 getCommonSubClass(&AMDGPU::VGPR_HI16RegClass, RC) != nullptr;
2162 }
2163 const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size);
2164 if (!VRC) {
2165 assert(Size < 32 && "Invalid register class size")(static_cast <bool> (Size < 32 && "Invalid register class size"
) ? void (0) : __assert_fail ("Size < 32 && \"Invalid register class size\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2165, __extension__ __PRETTY_FUNCTION__))
;
2166 return false;
2167 }
2168 return getCommonSubClass(VRC, RC) != nullptr;
2169}
2170
2171bool SIRegisterInfo::hasAGPRs(const TargetRegisterClass *RC) const {
2172 unsigned Size = getRegSizeInBits(*RC);
2173 if (Size < 16)
2174 return false;
2175 const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size);
2176 if (!ARC) {
2177 assert(getVGPRClassForBitWidth(Size) && "Invalid register class size")(static_cast <bool> (getVGPRClassForBitWidth(Size) &&
"Invalid register class size") ? void (0) : __assert_fail ("getVGPRClassForBitWidth(Size) && \"Invalid register class size\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2177, __extension__ __PRETTY_FUNCTION__))
;
2178 return false;
2179 }
2180 return getCommonSubClass(ARC, RC) != nullptr;
2181}
2182
2183const TargetRegisterClass *
2184SIRegisterInfo::getEquivalentVGPRClass(const TargetRegisterClass *SRC) const {
2185 unsigned Size = getRegSizeInBits(*SRC);
2186 const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size);
2187 assert(VRC && "Invalid register class size")(static_cast <bool> (VRC && "Invalid register class size"
) ? void (0) : __assert_fail ("VRC && \"Invalid register class size\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2187, __extension__ __PRETTY_FUNCTION__))
;
2188 return VRC;
2189}
2190
2191const TargetRegisterClass *
2192SIRegisterInfo::getEquivalentAGPRClass(const TargetRegisterClass *SRC) const {
2193 unsigned Size = getRegSizeInBits(*SRC);
2194 const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size);
2195 assert(ARC && "Invalid register class size")(static_cast <bool> (ARC && "Invalid register class size"
) ? void (0) : __assert_fail ("ARC && \"Invalid register class size\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2195, __extension__ __PRETTY_FUNCTION__))
;
2196 return ARC;
2197}
2198
2199const TargetRegisterClass *
2200SIRegisterInfo::getEquivalentSGPRClass(const TargetRegisterClass *VRC) const {
2201 unsigned Size = getRegSizeInBits(*VRC);
2202 if (Size == 32)
2203 return &AMDGPU::SGPR_32RegClass;
2204 const TargetRegisterClass *SRC = getSGPRClassForBitWidth(Size);
2205 assert(SRC && "Invalid register class size")(static_cast <bool> (SRC && "Invalid register class size"
) ? void (0) : __assert_fail ("SRC && \"Invalid register class size\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2205, __extension__ __PRETTY_FUNCTION__))
;
2206 return SRC;
2207}
2208
2209const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
2210 const TargetRegisterClass *RC, unsigned SubIdx) const {
2211 if (SubIdx == AMDGPU::NoSubRegister)
2212 return RC;
2213
2214 // We can assume that each lane corresponds to one 32-bit register.
2215 unsigned Size = getNumChannelsFromSubReg(SubIdx) * 32;
2216 if (isSGPRClass(RC)) {
2217 if (Size == 32)
2218 RC = &AMDGPU::SGPR_32RegClass;
2219 else
2220 RC = getSGPRClassForBitWidth(Size);
2221 } else if (hasAGPRs(RC)) {
2222 RC = getAGPRClassForBitWidth(Size);
2223 } else {
2224 RC = getVGPRClassForBitWidth(Size);
2225 }
2226 assert(RC && "Invalid sub-register class size")(static_cast <bool> (RC && "Invalid sub-register class size"
) ? void (0) : __assert_fail ("RC && \"Invalid sub-register class size\""
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2226, __extension__ __PRETTY_FUNCTION__))
;
2227 return RC;
2228}
2229
2230const TargetRegisterClass *
2231SIRegisterInfo::getCompatibleSubRegClass(const TargetRegisterClass *SuperRC,
2232 const TargetRegisterClass *SubRC,
2233 unsigned SubIdx) const {
2234 // Ensure this subregister index is aligned in the super register.
2235 const TargetRegisterClass *MatchRC =
2236 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
2237 return MatchRC && MatchRC->hasSubClassEq(SuperRC) ? MatchRC : nullptr;
2238}
2239
2240bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
2241 if (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
2242 OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST)
2243 return !ST.hasMFMAInlineLiteralBug();
2244
2245 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2246 OpType <= AMDGPU::OPERAND_SRC_LAST;
2247}
2248
2249bool SIRegisterInfo::shouldRewriteCopySrc(
2250 const TargetRegisterClass *DefRC,
2251 unsigned DefSubReg,
2252 const TargetRegisterClass *SrcRC,
2253 unsigned SrcSubReg) const {
2254 // We want to prefer the smallest register class possible, so we don't want to
2255 // stop and rewrite on anything that looks like a subregister
2256 // extract. Operations mostly don't care about the super register class, so we
2257 // only want to stop on the most basic of copies between the same register
2258 // class.
2259 //
2260 // e.g. if we have something like
2261 // %0 = ...
2262 // %1 = ...
2263 // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
2264 // %3 = COPY %2, sub0
2265 //
2266 // We want to look through the COPY to find:
2267 // => %3 = COPY %0
2268
2269 // Plain copy.
2270 return getCommonSubClass(DefRC, SrcRC) != nullptr;
2271}
2272
2273bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
2274 // TODO: 64-bit operands have extending behavior from 32-bit literal.
2275 return OpType >= AMDGPU::OPERAND_REG_IMM_FIRST &&
2276 OpType <= AMDGPU::OPERAND_REG_IMM_LAST;
2277}
2278
2279/// Returns a lowest register that is not used at any point in the function.
2280/// If all registers are used, then this function will return
2281/// AMDGPU::NoRegister. If \p ReserveHighestVGPR = true, then return
2282/// highest unused register.
2283MCRegister SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
2284 const TargetRegisterClass *RC,
2285 const MachineFunction &MF,
2286 bool ReserveHighestVGPR) const {
2287 if (ReserveHighestVGPR) {
2288 for (MCRegister Reg : reverse(*RC))
2289 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
2290 return Reg;
2291 } else {
2292 for (MCRegister Reg : *RC)
2293 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
2294 return Reg;
2295 }
2296 return MCRegister();
2297}
2298
2299ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
2300 unsigned EltSize) const {
2301 const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC->MC);
2302 assert(RegBitWidth >= 32 && RegBitWidth <= 1024)(static_cast <bool> (RegBitWidth >= 32 && RegBitWidth
<= 1024) ? void (0) : __assert_fail ("RegBitWidth >= 32 && RegBitWidth <= 1024"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2302, __extension__ __PRETTY_FUNCTION__))
;
2303
2304 const unsigned RegDWORDs = RegBitWidth / 32;
2305 const unsigned EltDWORDs = EltSize / 4;
2306 assert(RegSplitParts.size() + 1 >= EltDWORDs)(static_cast <bool> (RegSplitParts.size() + 1 >= EltDWORDs
) ? void (0) : __assert_fail ("RegSplitParts.size() + 1 >= EltDWORDs"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2306, __extension__ __PRETTY_FUNCTION__))
;
2307
2308 const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1];
2309 const unsigned NumParts = RegDWORDs / EltDWORDs;
2310
2311 return makeArrayRef(Parts.data(), NumParts);
2312}
2313
2314const TargetRegisterClass*
2315SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
2316 Register Reg) const {
2317 return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegClass(Reg);
2318}
2319
2320bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
2321 Register Reg) const {
2322 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
2323 // Registers without classes are unaddressable, SGPR-like registers.
2324 return RC && hasVGPRs(RC);
2325}
2326
2327bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI,
2328 Register Reg) const {
2329 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
2330
2331 // Registers without classes are unaddressable, SGPR-like registers.
2332 return RC && hasAGPRs(RC);
2333}
2334
2335bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
2336 const TargetRegisterClass *SrcRC,
2337 unsigned SubReg,
2338 const TargetRegisterClass *DstRC,
2339 unsigned DstSubReg,
2340 const TargetRegisterClass *NewRC,
2341 LiveIntervals &LIS) const {
2342 unsigned SrcSize = getRegSizeInBits(*SrcRC);
2343 unsigned DstSize = getRegSizeInBits(*DstRC);
2344 unsigned NewSize = getRegSizeInBits(*NewRC);
2345
2346 // Do not increase size of registers beyond dword, we would need to allocate
2347 // adjacent registers and constraint regalloc more than needed.
2348
2349 // Always allow dword coalescing.
2350 if (SrcSize <= 32 || DstSize <= 32)
2351 return true;
2352
2353 return NewSize <= DstSize || NewSize <= SrcSize;
2354}
2355
2356unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
2357 MachineFunction &MF) const {
2358 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
2359
2360 unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
2361 MF.getFunction());
2362 switch (RC->getID()) {
2363 default:
2364 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
2365 case AMDGPU::VGPR_32RegClassID:
2366 case AMDGPU::VGPR_LO16RegClassID:
2367 case AMDGPU::VGPR_HI16RegClassID:
2368 return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
2369 case AMDGPU::SGPR_32RegClassID:
2370 case AMDGPU::SGPR_LO16RegClassID:
2371 return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
2372 }
2373}
2374
2375unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
2376 unsigned Idx) const {
2377 if (Idx == AMDGPU::RegisterPressureSets::VGPR_32 ||
2378 Idx == AMDGPU::RegisterPressureSets::AGPR_32)
2379 return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
2380 const_cast<MachineFunction &>(MF));
2381
2382 if (Idx == AMDGPU::RegisterPressureSets::SReg_32)
2383 return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
2384 const_cast<MachineFunction &>(MF));
2385
2386 llvm_unreachable("Unexpected register pressure set!")::llvm::llvm_unreachable_internal("Unexpected register pressure set!"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2386)
;
2387}
2388
2389const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
2390 static const int Empty[] = { -1 };
2391
2392 if (RegPressureIgnoredUnits[RegUnit])
2393 return Empty;
2394
2395 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
2396}
2397
2398MCRegister SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const {
2399 // Not a callee saved register.
2400 return AMDGPU::SGPR30_SGPR31;
2401}
2402
2403const TargetRegisterClass *
2404SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
2405 const RegisterBank &RB,
2406 const MachineRegisterInfo &MRI) const {
2407 switch (RB.getID()) {
2408 case AMDGPU::VGPRRegBankID:
2409 return getVGPRClassForBitWidth(std::max(32u, Size));
2410 case AMDGPU::VCCRegBankID:
2411 assert(Size == 1)(static_cast <bool> (Size == 1) ? void (0) : __assert_fail
("Size == 1", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2411, __extension__ __PRETTY_FUNCTION__))
;
2412 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
2413 : &AMDGPU::SReg_64_XEXECRegClass;
2414 case AMDGPU::SGPRRegBankID:
2415 return getSGPRClassForBitWidth(std::max(32u, Size));
2416 case AMDGPU::AGPRRegBankID:
2417 return getAGPRClassForBitWidth(std::max(32u, Size));
2418 default:
2419 llvm_unreachable("unknown register bank")::llvm::llvm_unreachable_internal("unknown register bank", "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2419)
;
2420 }
2421}
2422
2423const TargetRegisterClass *
2424SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
2425 const MachineRegisterInfo &MRI) const {
2426 const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg());
2427 if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank*>())
2428 return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI);
2429
2430 const TargetRegisterClass *RC = RCOrRB.get<const TargetRegisterClass*>();
2431 return getAllocatableClass(RC);
2432}
2433
2434MCRegister SIRegisterInfo::getVCC() const {
2435 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
2436}
2437
2438const TargetRegisterClass *SIRegisterInfo::getVGPR64Class() const {
2439 // VGPR tuples have an alignment requirement on gfx90a variants.
2440 return ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass
2441 : &AMDGPU::VReg_64RegClass;
2442}
2443
2444const TargetRegisterClass *
2445SIRegisterInfo::getRegClass(unsigned RCID) const {
2446 switch ((int)RCID) {
2447 case AMDGPU::SReg_1RegClassID:
2448 return getBoolRC();
2449 case AMDGPU::SReg_1_XEXECRegClassID:
2450 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
2451 : &AMDGPU::SReg_64_XEXECRegClass;
2452 case -1:
2453 return nullptr;
2454 default:
2455 return AMDGPUGenRegisterInfo::getRegClass(RCID);
2456 }
2457}
2458
2459// Find reaching register definition
2460MachineInstr *SIRegisterInfo::findReachingDef(Register Reg, unsigned SubReg,
2461 MachineInstr &Use,
2462 MachineRegisterInfo &MRI,
2463 LiveIntervals *LIS) const {
2464 auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
2465 SlotIndex UseIdx = LIS->getInstructionIndex(Use);
2466 SlotIndex DefIdx;
2467
2468 if (Reg.isVirtual()) {
2469 if (!LIS->hasInterval(Reg))
2470 return nullptr;
2471 LiveInterval &LI = LIS->getInterval(Reg);
2472 LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
2473 : MRI.getMaxLaneMaskForVReg(Reg);
2474 VNInfo *V = nullptr;
2475 if (LI.hasSubRanges()) {
2476 for (auto &S : LI.subranges()) {
2477 if ((S.LaneMask & SubLanes) == SubLanes) {
2478 V = S.getVNInfoAt(UseIdx);
2479 break;
2480 }
2481 }
2482 } else {
2483 V = LI.getVNInfoAt(UseIdx);
2484 }
2485 if (!V)
2486 return nullptr;
2487 DefIdx = V->def;
2488 } else {
2489 // Find last def.
2490 for (MCRegUnitIterator Units(Reg.asMCReg(), this); Units.isValid();
2491 ++Units) {
2492 LiveRange &LR = LIS->getRegUnit(*Units);
2493 if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
2494 if (!DefIdx.isValid() ||
2495 MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
2496 LIS->getInstructionFromIndex(V->def)))
2497 DefIdx = V->def;
2498 } else {
2499 return nullptr;
2500 }
2501 }
2502 }
2503
2504 MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
2505
2506 if (!Def || !MDT.dominates(Def, &Use))
2507 return nullptr;
2508
2509 assert(Def->modifiesRegister(Reg, this))(static_cast <bool> (Def->modifiesRegister(Reg, this
)) ? void (0) : __assert_fail ("Def->modifiesRegister(Reg, this)"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2509, __extension__ __PRETTY_FUNCTION__))
;
2510
2511 return Def;
2512}
2513
2514MCPhysReg SIRegisterInfo::get32BitRegister(MCPhysReg Reg) const {
2515 assert(getRegSizeInBits(*getPhysRegClass(Reg)) <= 32)(static_cast <bool> (getRegSizeInBits(*getPhysRegClass(
Reg)) <= 32) ? void (0) : __assert_fail ("getRegSizeInBits(*getPhysRegClass(Reg)) <= 32"
, "/build/llvm-toolchain-snapshot-13~++20210726100616+dead50d4427c/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2515, __extension__ __PRETTY_FUNCTION__))
;
2516
2517 for (const TargetRegisterClass &RC : { AMDGPU::VGPR_32RegClass,
2518 AMDGPU::SReg_32RegClass,
2519 AMDGPU::AGPR_32RegClass } ) {
2520 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
2521 return Super;
2522 }
2523 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
2524 &AMDGPU::VGPR_32RegClass)) {
2525 return Super;
2526 }
2527
2528 return AMDGPU::NoRegister;
2529}
2530
2531bool SIRegisterInfo::isProperlyAlignedRC(const TargetRegisterClass &RC) const {
2532 if (!ST.needsAlignedVGPRs())
2533 return true;
2534
2535 if (hasVGPRs(&RC))
2536 return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC)));
2537 if (hasAGPRs(&RC))
2538 return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC)));
2539
2540 return true;
2541}
2542
2543bool SIRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const {
2544 switch (PhysReg) {
2545 case AMDGPU::SGPR_NULL:
2546 case AMDGPU::SRC_SHARED_BASE:
2547 case AMDGPU::SRC_PRIVATE_BASE:
2548 case AMDGPU::SRC_SHARED_LIMIT:
2549 case AMDGPU::SRC_PRIVATE_LIMIT:
2550 return true;
2551 default:
2552 return false;
2553 }
2554}
2555
2556ArrayRef<MCPhysReg>
2557SIRegisterInfo::getAllSGPR128(const MachineFunction &MF) const {
2558 return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
2559 ST.getMaxNumSGPRs(MF) / 4);
2560}
2561
2562ArrayRef<MCPhysReg>
2563SIRegisterInfo::getAllSGPR64(const MachineFunction &MF) const {
2564 return makeArrayRef(AMDGPU::SGPR_64RegClass.begin(),
2565 ST.getMaxNumSGPRs(MF) / 2);
2566}
2567
2568ArrayRef<MCPhysReg>
2569SIRegisterInfo::getAllSGPR32(const MachineFunction &MF) const {
2570 return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF));
2571}

/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/stl_algobase.h

1// Core algorithmic facilities -*- C++ -*-
2
3// Copyright (C) 2001-2020 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/*
26 *
27 * Copyright (c) 1994
28 * Hewlett-Packard Company
29 *
30 * Permission to use, copy, modify, distribute and sell this software
31 * and its documentation for any purpose is hereby granted without fee,
32 * provided that the above copyright notice appear in all copies and
33 * that both that copyright notice and this permission notice appear
34 * in supporting documentation. Hewlett-Packard Company makes no
35 * representations about the suitability of this software for any
36 * purpose. It is provided "as is" without express or implied warranty.
37 *
38 *
39 * Copyright (c) 1996-1998
40 * Silicon Graphics Computer Systems, Inc.
41 *
42 * Permission to use, copy, modify, distribute and sell this software
43 * and its documentation for any purpose is hereby granted without fee,
44 * provided that the above copyright notice appear in all copies and
45 * that both that copyright notice and this permission notice appear
46 * in supporting documentation. Silicon Graphics makes no
47 * representations about the suitability of this software for any
48 * purpose. It is provided "as is" without express or implied warranty.
49 */
50
51/** @file bits/stl_algobase.h
52 * This is an internal header file, included by other library headers.
53 * Do not attempt to use it directly. @headername{algorithm}
54 */
55
56#ifndef _STL_ALGOBASE_H1
57#define _STL_ALGOBASE_H1 1
58
59#include <bits/c++config.h>
60#include <bits/functexcept.h>
61#include <bits/cpp_type_traits.h>
62#include <ext/type_traits.h>
63#include <ext/numeric_traits.h>
64#include <bits/stl_pair.h>
65#include <bits/stl_iterator_base_types.h>
66#include <bits/stl_iterator_base_funcs.h>
67#include <bits/stl_iterator.h>
68#include <bits/concept_check.h>
69#include <debug/debug.h>
70#include <bits/move.h> // For std::swap
71#include <bits/predefined_ops.h>
72#if __cplusplus201402L >= 201103L
73# include <type_traits>
74#endif
75#if __cplusplus201402L > 201703L
76# include <compare>
77#endif
78
79namespace std _GLIBCXX_VISIBILITY(default)__attribute__ ((__visibility__ ("default")))
80{
81_GLIBCXX_BEGIN_NAMESPACE_VERSION
82
83 /*
84 * A constexpr wrapper for __builtin_memcmp.
85 * @param __num The number of elements of type _Tp (not bytes).
86 */
87 template<typename _Tp, typename _Up>
88 _GLIBCXX14_CONSTEXPRconstexpr
89 inline int
90 __memcmp(const _Tp* __first1, const _Up* __first2, size_t __num)
91 {
92#if __cplusplus201402L >= 201103L
93 static_assert(sizeof(_Tp) == sizeof(_Up), "can be compared with memcmp");
94#endif
95#ifdef __cpp_lib_is_constant_evaluated
96 if (std::is_constant_evaluated())
97 {
98 for(; __num > 0; ++__first1, ++__first2, --__num)
99 if (*__first1 != *__first2)
100 return *__first1 < *__first2 ? -1 : 1;
101 return 0;
102 }
103 else
104#endif
105 return __builtin_memcmp(__first1, __first2, sizeof(_Tp) * __num);
106 }
107
108#if __cplusplus201402L < 201103L
109 // See http://gcc.gnu.org/ml/libstdc++/2004-08/msg00167.html: in a
110 // nutshell, we are partially implementing the resolution of DR 187,
111 // when it's safe, i.e., the value_types are equal.
112 template<bool _BoolType>
113 struct __iter_swap
114 {
115 template<typename _ForwardIterator1, typename _ForwardIterator2>
116 static void
117 iter_swap(_ForwardIterator1 __a, _ForwardIterator2 __b)
118 {
119 typedef typename iterator_traits<_ForwardIterator1>::value_type
120 _ValueType1;
121 _ValueType1 __tmp = *__a;
122 *__a = *__b;
123 *__b = __tmp;
124 }
125 };
126
127 template<>
128 struct __iter_swap<true>
129 {
130 template<typename _ForwardIterator1, typename _ForwardIterator2>
131 static void
132 iter_swap(_ForwardIterator1 __a, _ForwardIterator2 __b)
133 {
134 swap(*__a, *__b);
135 }
136 };
137#endif // C++03
138
139 /**
140 * @brief Swaps the contents of two iterators.
141 * @ingroup mutating_algorithms
142 * @param __a An iterator.
143 * @param __b Another iterator.
144 * @return Nothing.
145 *
146 * This function swaps the values pointed to by two iterators, not the
147 * iterators themselves.
148 */
149 template<typename _ForwardIterator1, typename _ForwardIterator2>
150 _GLIBCXX20_CONSTEXPR
151 inline void
152 iter_swap(_ForwardIterator1 __a, _ForwardIterator2 __b)
153 {
154 // concept requirements
155 __glibcxx_function_requires(_Mutable_ForwardIteratorConcept<
156 _ForwardIterator1>)
157 __glibcxx_function_requires(_Mutable_ForwardIteratorConcept<
158 _ForwardIterator2>)
159
160#if __cplusplus201402L < 201103L
161 typedef typename iterator_traits<_ForwardIterator1>::value_type
162 _ValueType1;
163 typedef typename iterator_traits<_ForwardIterator2>::value_type
164 _ValueType2;
165
166 __glibcxx_function_requires(_ConvertibleConcept<_ValueType1,
167 _ValueType2>)
168 __glibcxx_function_requires(_ConvertibleConcept<_ValueType2,
169 _ValueType1>)
170
171 typedef typename iterator_traits<_ForwardIterator1>::reference
172 _ReferenceType1;
173 typedef typename iterator_traits<_ForwardIterator2>::reference
174 _ReferenceType2;
175 std::__iter_swap<__are_same<_ValueType1, _ValueType2>::__value
176 && __are_same<_ValueType1&, _ReferenceType1>::__value
177 && __are_same<_ValueType2&, _ReferenceType2>::__value>::
178 iter_swap(__a, __b);
179#else
180 // _GLIBCXX_RESOLVE_LIB_DEFECTS
181 // 187. iter_swap underspecified
182 swap(*__a, *__b);
183#endif
184 }
185
186 /**
187 * @brief Swap the elements of two sequences.
188 * @ingroup mutating_algorithms
189 * @param __first1 A forward iterator.
190 * @param __last1 A forward iterator.
191 * @param __first2 A forward iterator.
192 * @return An iterator equal to @p first2+(last1-first1).
193 *
194 * Swaps each element in the range @p [first1,last1) with the
195 * corresponding element in the range @p [first2,(last1-first1)).
196 * The ranges must not overlap.
197 */
198 template<typename _ForwardIterator1, typename _ForwardIterator2>
199 _GLIBCXX20_CONSTEXPR
200 _ForwardIterator2
201 swap_ranges(_ForwardIterator1 __first1, _ForwardIterator1 __last1,
202 _ForwardIterator2 __first2)
203 {
204 // concept requirements
205 __glibcxx_function_requires(_Mutable_ForwardIteratorConcept<
206 _ForwardIterator1>)
207 __glibcxx_function_requires(_Mutable_ForwardIteratorConcept<
208 _ForwardIterator2>)
209 __glibcxx_requires_valid_range(__first1, __last1);
210
211 for (; __first1 != __last1; ++__first1, (void)++__first2)
212 std::iter_swap(__first1, __first2);
213 return __first2;
214 }
215
216 /**
217 * @brief This does what you think it does.
218 * @ingroup sorting_algorithms
219 * @param __a A thing of arbitrary type.
220 * @param __b Another thing of arbitrary type.
221 * @return The lesser of the parameters.
222 *
223 * This is the simple classic generic implementation. It will work on
224 * temporary expressions, since they are only evaluated once, unlike a
225 * preprocessor macro.
226 */
227 template<typename _Tp>
228 _GLIBCXX14_CONSTEXPRconstexpr
229 inline const _Tp&
230 min(const _Tp& __a, const _Tp& __b)
231 {
232 // concept requirements
233 __glibcxx_function_requires(_LessThanComparableConcept<_Tp>)
234 //return __b < __a ? __b : __a;
235 if (__b < __a)
20
Assuming '__b' is >= '__a'
21
Taking false branch
236 return __b;
237 return __a;
22
Returning the value 64 (reference to 'Data.PerVGPR')
238 }
239
240 /**
241 * @brief This does what you think it does.
242 * @ingroup sorting_algorithms
243 * @param __a A thing of arbitrary type.
244 * @param __b Another thing of arbitrary type.
245 * @return The greater of the parameters.
246 *
247 * This is the simple classic generic implementation. It will work on
248 * temporary expressions, since they are only evaluated once, unlike a
249 * preprocessor macro.
250 */
251 template<typename _Tp>
252 _GLIBCXX14_CONSTEXPRconstexpr
253 inline const _Tp&
254 max(const _Tp& __a, const _Tp& __b)
255 {
256 // concept requirements
257 __glibcxx_function_requires(_LessThanComparableConcept<_Tp>)
258 //return __a < __b ? __b : __a;
259 if (__a < __b)
260 return __b;
261 return __a;
262 }
263
264 /**
265 * @brief This does what you think it does.
266 * @ingroup sorting_algorithms
267 * @param __a A thing of arbitrary type.
268 * @param __b Another thing of arbitrary type.
269 * @param __comp A @link comparison_functors comparison functor@endlink.
270 * @return The lesser of the parameters.
271 *
272 * This will work on temporary expressions, since they are only evaluated
273 * once, unlike a preprocessor macro.
274 */
275 template<typename _Tp, typename _Compare>
276 _GLIBCXX14_CONSTEXPRconstexpr
277 inline const _Tp&
278 min(const _Tp& __a, const _Tp& __b, _Compare __comp)
279 {
280 //return __comp(__b, __a) ? __b : __a;
281 if (__comp(__b, __a))
282 return __b;
283 return __a;
284 }
285
286 /**
287 * @brief This does what you think it does.
288 * @ingroup sorting_algorithms
289 * @param __a A thing of arbitrary type.
290 * @param __b Another thing of arbitrary type.
291 * @param __comp A @link comparison_functors comparison functor@endlink.
292 * @return The greater of the parameters.
293 *
294 * This will work on temporary expressions, since they are only evaluated
295 * once, unlike a preprocessor macro.
296 */
297 template<typename _Tp, typename _Compare>
298 _GLIBCXX14_CONSTEXPRconstexpr
299 inline const _Tp&
300 max(const _Tp& __a, const _Tp& __b, _Compare __comp)
301 {
302 //return __comp(__a, __b) ? __b : __a;
303 if (__comp(__a, __b))
304 return __b;
305 return __a;
306 }
307
308 // Fallback implementation of the function in bits/stl_iterator.h used to
309 // remove the __normal_iterator wrapper. See copy, fill, ...
310 template<typename _Iterator>
311 _GLIBCXX20_CONSTEXPR
312 inline _Iterator
313 __niter_base(_Iterator __it)
314 _GLIBCXX_NOEXCEPT_IF(std::is_nothrow_copy_constructible<_Iterator>::value)noexcept(std::is_nothrow_copy_constructible<_Iterator>::
value)
315 { return __it; }
316
317 // Reverse the __niter_base transformation to get a
318 // __normal_iterator back again (this assumes that __normal_iterator
319 // is only used to wrap random access iterators, like pointers).
320 template<typename _From, typename _To>
321 _GLIBCXX20_CONSTEXPR
322 inline _From
323 __niter_wrap(_From __from, _To __res)
324 { return __from + (__res - std::__niter_base(__from)); }
325
326 // No need to wrap, iterator already has the right type.
327 template<typename _Iterator>
328 _GLIBCXX20_CONSTEXPR
329 inline _Iterator
330 __niter_wrap(const _Iterator&, _Iterator __res)
331 { return __res; }
332
333 // All of these auxiliary structs serve two purposes. (1) Replace
334 // calls to copy with memmove whenever possible. (Memmove, not memcpy,
335 // because the input and output ranges are permitted to overlap.)
336 // (2) If we're using random access iterators, then write the loop as
337 // a for loop with an explicit count.
338
339 template<bool _IsMove, bool _IsSimple, typename _Category>
340 struct __copy_move
341 {
342 template<typename _II, typename _OI>
343 _GLIBCXX20_CONSTEXPR
344 static _OI
345 __copy_m(_II __first, _II __last, _OI __result)
346 {
347 for (; __first != __last; ++__result, (void)++__first)
348 *__result = *__first;
349 return __result;
350 }
351 };
352
353#if __cplusplus201402L >= 201103L
354 template<typename _Category>
355 struct __copy_move<true, false, _Category>
356 {
357 template<typename _II, typename _OI>
358 _GLIBCXX20_CONSTEXPR
359 static _OI
360 __copy_m(_II __first, _II __last, _OI __result)
361 {
362 for (; __first != __last; ++__result, (void)++__first)
363 *__result = std::move(*__first);
364 return __result;
365 }
366 };
367#endif
368
369 template<>
370 struct __copy_move<false, false, random_access_iterator_tag>
371 {
372 template<typename _II, typename _OI>
373 _GLIBCXX20_CONSTEXPR
374 static _OI
375 __copy_m(_II __first, _II __last, _OI __result)
376 {
377 typedef typename iterator_traits<_II>::difference_type _Distance;
378 for(_Distance __n = __last - __first; __n > 0; --__n)
379 {
380 *__result = *__first;
381 ++__first;
382 ++__result;
383 }
384 return __result;
385 }
386 };
387
388#if __cplusplus201402L >= 201103L
389 template<>
390 struct __copy_move<true, false, random_access_iterator_tag>
391 {
392 template<typename _II, typename _OI>
393 _GLIBCXX20_CONSTEXPR
394 static _OI
395 __copy_m(_II __first, _II __last, _OI __result)
396 {
397 typedef typename iterator_traits<_II>::difference_type _Distance;
398 for(_Distance __n = __last - __first; __n > 0; --__n)
399 {
400 *__result = std::move(*__first);
401 ++__first;
402 ++__result;
403 }
404 return __result;
405 }
406 };
407#endif
408
409 template<bool _IsMove>
410 struct __copy_move<_IsMove, true, random_access_iterator_tag>
411 {
412 template<typename _Tp>
413 _GLIBCXX20_CONSTEXPR
414 static _Tp*
415 __copy_m(const _Tp* __first, const _Tp* __last, _Tp* __result)
416 {
417#if __cplusplus201402L >= 201103L
418 using __assignable = conditional<_IsMove,
419 is_move_assignable<_Tp>,
420 is_copy_assignable<_Tp>>;
421 // trivial types can have deleted assignment
422 static_assert( __assignable::type::value, "type is not assignable" );
423#endif
424 const ptrdiff_t _Num = __last - __first;
425 if (_Num)
426 __builtin_memmove(__result, __first, sizeof(_Tp) * _Num);
427 return __result + _Num;
428 }
429 };
430
431 // Helpers for streambuf iterators (either istream or ostream).
432 // NB: avoid including <iosfwd>, relatively large.
433 template<typename _CharT>
434 struct char_traits;
435
436 template<typename _CharT, typename _Traits>
437 class istreambuf_iterator;
438
439 template<typename _CharT, typename _Traits>
440 class ostreambuf_iterator;
441
442 template<bool _IsMove, typename _CharT>
443 typename __gnu_cxx::__enable_if<__is_char<_CharT>::__value,
444 ostreambuf_iterator<_CharT, char_traits<_CharT> > >::__type
445 __copy_move_a2(_CharT*, _CharT*,
446 ostreambuf_iterator<_CharT, char_traits<_CharT> >);
447
448 template<bool _IsMove, typename _CharT>
449 typename __gnu_cxx::__enable_if<__is_char<_CharT>::__value,
450 ostreambuf_iterator<_CharT, char_traits<_CharT> > >::__type
451 __copy_move_a2(const _CharT*, const _CharT*,
452 ostreambuf_iterator<_CharT, char_traits<_CharT> >);
453
454 template<bool _IsMove, typename _CharT>
455 typename __gnu_cxx::__enable_if<__is_char<_CharT>::__value,
456 _CharT*>::__type
457 __copy_move_a2(istreambuf_iterator<_CharT, char_traits<_CharT> >,
458 istreambuf_iterator<_CharT, char_traits<_CharT> >, _CharT*);
459
460 template<bool _IsMove, typename _II, typename _OI>
461 _GLIBCXX20_CONSTEXPR
462 inline _OI
463 __copy_move_a2(_II __first, _II __last, _OI __result)
464 {
465 typedef typename iterator_traits<_II>::iterator_category _Category;
466#ifdef __cpp_lib_is_constant_evaluated
467 if (std::is_constant_evaluated())
468 return std::__copy_move<_IsMove, false, _Category>::
469 __copy_m(__first, __last, __result);
470#endif
471 return std::__copy_move<_IsMove, __memcpyable<_OI, _II>::__value,
472 _Category>::__copy_m(__first, __last, __result);
473 }
474
475_GLIBCXX_BEGIN_NAMESPACE_CONTAINER
476
477 template<typename _Tp, typename _Ref, typename _Ptr>
478 struct _Deque_iterator;
479
480_GLIBCXX_END_NAMESPACE_CONTAINER
481
482 template<bool _IsMove,
483 typename _Tp, typename _Ref, typename _Ptr, typename _OI>
484 _OI
485 __copy_move_a1(_GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Ref, _Ptr>,
486 _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Ref, _Ptr>,
487 _OI);
488
489 template<bool _IsMove,
490 typename _ITp, typename _IRef, typename _IPtr, typename _OTp>
491 _GLIBCXX_STD_Cstd::_Deque_iterator<_OTp, _OTp&, _OTp*>
492 __copy_move_a1(_GLIBCXX_STD_Cstd::_Deque_iterator<_ITp, _IRef, _IPtr>,
493 _GLIBCXX_STD_Cstd::_Deque_iterator<_ITp, _IRef, _IPtr>,
494 _GLIBCXX_STD_Cstd::_Deque_iterator<_OTp, _OTp&, _OTp*>);
495
496 template<bool _IsMove, typename _II, typename _Tp>
497 typename __gnu_cxx::__enable_if<
498 __is_random_access_iter<_II>::__value,
499 _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Tp&, _Tp*> >::__type
500 __copy_move_a1(_II, _II, _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Tp&, _Tp*>);
501
502 template<bool _IsMove, typename _II, typename _OI>
503 _GLIBCXX20_CONSTEXPR
504 inline _OI
505 __copy_move_a1(_II __first, _II __last, _OI __result)
506 { return std::__copy_move_a2<_IsMove>(__first, __last, __result); }
507
508 template<bool _IsMove, typename _II, typename _OI>
509 _GLIBCXX20_CONSTEXPR
510 inline _OI
511 __copy_move_a(_II __first, _II __last, _OI __result)
512 {
513 return std::__niter_wrap(__result,
514 std::__copy_move_a1<_IsMove>(std::__niter_base(__first),
515 std::__niter_base(__last),
516 std::__niter_base(__result)));
517 }
518
519 template<bool _IsMove,
520 typename _Ite, typename _Seq, typename _Cat, typename _OI>
521 _OI
522 __copy_move_a(const ::__gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>&,
523 const ::__gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>&,
524 _OI);
525
526 template<bool _IsMove,
527 typename _II, typename _Ite, typename _Seq, typename _Cat>
528 __gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>
529 __copy_move_a(_II, _II,
530 const ::__gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>&);
531
532 template<bool _IsMove,
533 typename _IIte, typename _ISeq, typename _ICat,
534 typename _OIte, typename _OSeq, typename _OCat>
535 ::__gnu_debug::_Safe_iterator<_OIte, _OSeq, _OCat>
536 __copy_move_a(const ::__gnu_debug::_Safe_iterator<_IIte, _ISeq, _ICat>&,
537 const ::__gnu_debug::_Safe_iterator<_IIte, _ISeq, _ICat>&,
538 const ::__gnu_debug::_Safe_iterator<_OIte, _OSeq, _OCat>&);
539
540 /**
541 * @brief Copies the range [first,last) into result.
542 * @ingroup mutating_algorithms
543 * @param __first An input iterator.
544 * @param __last An input iterator.
545 * @param __result An output iterator.
546 * @return result + (last - first)
547 *
548 * This inline function will boil down to a call to @c memmove whenever
549 * possible. Failing that, if random access iterators are passed, then the
550 * loop count will be known (and therefore a candidate for compiler
551 * optimizations such as unrolling). Result may not be contained within
552 * [first,last); the copy_backward function should be used instead.
553 *
554 * Note that the end of the output range is permitted to be contained
555 * within [first,last).
556 */
557 template<typename _II, typename _OI>
558 _GLIBCXX20_CONSTEXPR
559 inline _OI
560 copy(_II __first, _II __last, _OI __result)
561 {
562 // concept requirements
563 __glibcxx_function_requires(_InputIteratorConcept<_II>)
564 __glibcxx_function_requires(_OutputIteratorConcept<_OI,
565 typename iterator_traits<_II>::value_type>)
566 __glibcxx_requires_can_increment_range(__first, __last, __result);
567
568 return std::__copy_move_a<__is_move_iterator<_II>::__value>
569 (std::__miter_base(__first), std::__miter_base(__last), __result);
570 }
571
572#if __cplusplus201402L >= 201103L
573 /**
574 * @brief Moves the range [first,last) into result.
575 * @ingroup mutating_algorithms
576 * @param __first An input iterator.
577 * @param __last An input iterator.
578 * @param __result An output iterator.
579 * @return result + (last - first)
580 *
581 * This inline function will boil down to a call to @c memmove whenever
582 * possible. Failing that, if random access iterators are passed, then the
583 * loop count will be known (and therefore a candidate for compiler
584 * optimizations such as unrolling). Result may not be contained within
585 * [first,last); the move_backward function should be used instead.
586 *
587 * Note that the end of the output range is permitted to be contained
588 * within [first,last).
589 */
590 template<typename _II, typename _OI>
591 _GLIBCXX20_CONSTEXPR
592 inline _OI
593 move(_II __first, _II __last, _OI __result)
594 {
595 // concept requirements
596 __glibcxx_function_requires(_InputIteratorConcept<_II>)
597 __glibcxx_function_requires(_OutputIteratorConcept<_OI,
598 typename iterator_traits<_II>::value_type>)
599 __glibcxx_requires_can_increment_range(__first, __last, __result);
600
601 return std::__copy_move_a<true>(std::__miter_base(__first),
602 std::__miter_base(__last), __result);
603 }
604
605#define _GLIBCXX_MOVE3(_Tp, _Up, _Vp)std::move(_Tp, _Up, _Vp) std::move(_Tp, _Up, _Vp)
606#else
607#define _GLIBCXX_MOVE3(_Tp, _Up, _Vp)std::move(_Tp, _Up, _Vp) std::copy(_Tp, _Up, _Vp)
608#endif
609
610 template<bool _IsMove, bool _IsSimple, typename _Category>
611 struct __copy_move_backward
612 {
613 template<typename _BI1, typename _BI2>
614 _GLIBCXX20_CONSTEXPR
615 static _BI2
616 __copy_move_b(_BI1 __first, _BI1 __last, _BI2 __result)
617 {
618 while (__first != __last)
619 *--__result = *--__last;
620 return __result;
621 }
622 };
623
624#if __cplusplus201402L >= 201103L
625 template<typename _Category>
626 struct __copy_move_backward<true, false, _Category>
627 {
628 template<typename _BI1, typename _BI2>
629 _GLIBCXX20_CONSTEXPR
630 static _BI2
631 __copy_move_b(_BI1 __first, _BI1 __last, _BI2 __result)
632 {
633 while (__first != __last)
634 *--__result = std::move(*--__last);
635 return __result;
636 }
637 };
638#endif
639
640 template<>
641 struct __copy_move_backward<false, false, random_access_iterator_tag>
642 {
643 template<typename _BI1, typename _BI2>
644 _GLIBCXX20_CONSTEXPR
645 static _BI2
646 __copy_move_b(_BI1 __first, _BI1 __last, _BI2 __result)
647 {
648 typename iterator_traits<_BI1>::difference_type
649 __n = __last - __first;
650 for (; __n > 0; --__n)
651 *--__result = *--__last;
652 return __result;
653 }
654 };
655
656#if __cplusplus201402L >= 201103L
657 template<>
658 struct __copy_move_backward<true, false, random_access_iterator_tag>
659 {
660 template<typename _BI1, typename _BI2>
661 _GLIBCXX20_CONSTEXPR
662 static _BI2
663 __copy_move_b(_BI1 __first, _BI1 __last, _BI2 __result)
664 {
665 typename iterator_traits<_BI1>::difference_type
666 __n = __last - __first;
667 for (; __n > 0; --__n)
668 *--__result = std::move(*--__last);
669 return __result;
670 }
671 };
672#endif
673
674 template<bool _IsMove>
675 struct __copy_move_backward<_IsMove, true, random_access_iterator_tag>
676 {
677 template<typename _Tp>
678 _GLIBCXX20_CONSTEXPR
679 static _Tp*
680 __copy_move_b(const _Tp* __first, const _Tp* __last, _Tp* __result)
681 {
682#if __cplusplus201402L >= 201103L
683 using __assignable = conditional<_IsMove,
684 is_move_assignable<_Tp>,
685 is_copy_assignable<_Tp>>;
686 // trivial types can have deleted assignment
687 static_assert( __assignable::type::value, "type is not assignable" );
688#endif
689 const ptrdiff_t _Num = __last - __first;
690 if (_Num)
691 __builtin_memmove(__result - _Num, __first, sizeof(_Tp) * _Num);
692 return __result - _Num;
693 }
694 };
695
696 template<bool _IsMove, typename _BI1, typename _BI2>
697 _GLIBCXX20_CONSTEXPR
698 inline _BI2
699 __copy_move_backward_a2(_BI1 __first, _BI1 __last, _BI2 __result)
700 {
701 typedef typename iterator_traits<_BI1>::iterator_category _Category;
702#ifdef __cpp_lib_is_constant_evaluated
703 if (std::is_constant_evaluated())
704 return std::__copy_move_backward<_IsMove, false, _Category>::
705 __copy_move_b(__first, __last, __result);
706#endif
707 return std::__copy_move_backward<_IsMove,
708 __memcpyable<_BI2, _BI1>::__value,
709 _Category>::__copy_move_b(__first,
710 __last,
711 __result);
712 }
713
714 template<bool _IsMove, typename _BI1, typename _BI2>
715 _GLIBCXX20_CONSTEXPR
716 inline _BI2
717 __copy_move_backward_a1(_BI1 __first, _BI1 __last, _BI2 __result)
718 { return std::__copy_move_backward_a2<_IsMove>(__first, __last, __result); }
719
720 template<bool _IsMove,
721 typename _Tp, typename _Ref, typename _Ptr, typename _OI>
722 _OI
723 __copy_move_backward_a1(_GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Ref, _Ptr>,
724 _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Ref, _Ptr>,
725 _OI);
726
727 template<bool _IsMove,
728 typename _ITp, typename _IRef, typename _IPtr, typename _OTp>
729 _GLIBCXX_STD_Cstd::_Deque_iterator<_OTp, _OTp&, _OTp*>
730 __copy_move_backward_a1(
731 _GLIBCXX_STD_Cstd::_Deque_iterator<_ITp, _IRef, _IPtr>,
732 _GLIBCXX_STD_Cstd::_Deque_iterator<_ITp, _IRef, _IPtr>,
733 _GLIBCXX_STD_Cstd::_Deque_iterator<_OTp, _OTp&, _OTp*>);
734
735 template<bool _IsMove, typename _II, typename _Tp>
736 typename __gnu_cxx::__enable_if<
737 __is_random_access_iter<_II>::__value,
738 _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Tp&, _Tp*> >::__type
739 __copy_move_backward_a1(_II, _II,
740 _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Tp&, _Tp*>);
741
742 template<bool _IsMove, typename _II, typename _OI>
743 _GLIBCXX20_CONSTEXPR
744 inline _OI
745 __copy_move_backward_a(_II __first, _II __last, _OI __result)
746 {
747 return std::__niter_wrap(__result,
748 std::__copy_move_backward_a1<_IsMove>
749 (std::__niter_base(__first), std::__niter_base(__last),
750 std::__niter_base(__result)));
751 }
752
753 template<bool _IsMove,
754 typename _Ite, typename _Seq, typename _Cat, typename _OI>
755 _OI
756 __copy_move_backward_a(
757 const ::__gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>&,
758 const ::__gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>&,
759 _OI);
760
761 template<bool _IsMove,
762 typename _II, typename _Ite, typename _Seq, typename _Cat>
763 __gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>
764 __copy_move_backward_a(_II, _II,
765 const ::__gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>&);
766
767 template<bool _IsMove,
768 typename _IIte, typename _ISeq, typename _ICat,
769 typename _OIte, typename _OSeq, typename _OCat>
770 ::__gnu_debug::_Safe_iterator<_OIte, _OSeq, _OCat>
771 __copy_move_backward_a(
772 const ::__gnu_debug::_Safe_iterator<_IIte, _ISeq, _ICat>&,
773 const ::__gnu_debug::_Safe_iterator<_IIte, _ISeq, _ICat>&,
774 const ::__gnu_debug::_Safe_iterator<_OIte, _OSeq, _OCat>&);
775
776 /**
777 * @brief Copies the range [first,last) into result.
778 * @ingroup mutating_algorithms
779 * @param __first A bidirectional iterator.
780 * @param __last A bidirectional iterator.
781 * @param __result A bidirectional iterator.
782 * @return result - (last - first)
783 *
784 * The function has the same effect as copy, but starts at the end of the
785 * range and works its way to the start, returning the start of the result.
786 * This inline function will boil down to a call to @c memmove whenever
787 * possible. Failing that, if random access iterators are passed, then the
788 * loop count will be known (and therefore a candidate for compiler
789 * optimizations such as unrolling).
790 *
791 * Result may not be in the range (first,last]. Use copy instead. Note
792 * that the start of the output range may overlap [first,last).
793 */
794 template<typename _BI1, typename _BI2>
795 _GLIBCXX20_CONSTEXPR
796 inline _BI2
797 copy_backward(_BI1 __first, _BI1 __last, _BI2 __result)
798 {
799 // concept requirements
800 __glibcxx_function_requires(_BidirectionalIteratorConcept<_BI1>)
801 __glibcxx_function_requires(_Mutable_BidirectionalIteratorConcept<_BI2>)
802 __glibcxx_function_requires(_ConvertibleConcept<
803 typename iterator_traits<_BI1>::value_type,
804 typename iterator_traits<_BI2>::value_type>)
805 __glibcxx_requires_can_decrement_range(__first, __last, __result);
806
807 return std::__copy_move_backward_a<__is_move_iterator<_BI1>::__value>
808 (std::__miter_base(__first), std::__miter_base(__last), __result);
809 }
810
811#if __cplusplus201402L >= 201103L
812 /**
813 * @brief Moves the range [first,last) into result.
814 * @ingroup mutating_algorithms
815 * @param __first A bidirectional iterator.
816 * @param __last A bidirectional iterator.
817 * @param __result A bidirectional iterator.
818 * @return result - (last - first)
819 *
820 * The function has the same effect as move, but starts at the end of the
821 * range and works its way to the start, returning the start of the result.
822 * This inline function will boil down to a call to @c memmove whenever
823 * possible. Failing that, if random access iterators are passed, then the
824 * loop count will be known (and therefore a candidate for compiler
825 * optimizations such as unrolling).
826 *
827 * Result may not be in the range (first,last]. Use move instead. Note
828 * that the start of the output range may overlap [first,last).
829 */
830 template<typename _BI1, typename _BI2>
831 _GLIBCXX20_CONSTEXPR
832 inline _BI2
833 move_backward(_BI1 __first, _BI1 __last, _BI2 __result)
834 {
835 // concept requirements
836 __glibcxx_function_requires(_BidirectionalIteratorConcept<_BI1>)
837 __glibcxx_function_requires(_Mutable_BidirectionalIteratorConcept<_BI2>)
838 __glibcxx_function_requires(_ConvertibleConcept<
839 typename iterator_traits<_BI1>::value_type,
840 typename iterator_traits<_BI2>::value_type>)
841 __glibcxx_requires_can_decrement_range(__first, __last, __result);
842
843 return std::__copy_move_backward_a<true>(std::__miter_base(__first),
844 std::__miter_base(__last),
845 __result);
846 }
847
848#define _GLIBCXX_MOVE_BACKWARD3(_Tp, _Up, _Vp)std::move_backward(_Tp, _Up, _Vp) std::move_backward(_Tp, _Up, _Vp)
849#else
850#define _GLIBCXX_MOVE_BACKWARD3(_Tp, _Up, _Vp)std::move_backward(_Tp, _Up, _Vp) std::copy_backward(_Tp, _Up, _Vp)
851#endif
852
853 template<typename _ForwardIterator, typename _Tp>
854 _GLIBCXX20_CONSTEXPR
855 inline typename
856 __gnu_cxx::__enable_if<!__is_scalar<_Tp>::__value, void>::__type
857 __fill_a1(_ForwardIterator __first, _ForwardIterator __last,
858 const _Tp& __value)
859 {
860 for (; __first != __last; ++__first)
861 *__first = __value;
862 }
863
864 template<typename _ForwardIterator, typename _Tp>
865 _GLIBCXX20_CONSTEXPR
866 inline typename
867 __gnu_cxx::__enable_if<__is_scalar<_Tp>::__value, void>::__type
868 __fill_a1(_ForwardIterator __first, _ForwardIterator __last,
869 const _Tp& __value)
870 {
871 const _Tp __tmp = __value;
872 for (; __first != __last; ++__first)
873 *__first = __tmp;
874 }
875
876 // Specialization: for char types we can use memset.
877 template<typename _Tp>
878 _GLIBCXX20_CONSTEXPR
879 inline typename
880 __gnu_cxx::__enable_if<__is_byte<_Tp>::__value, void>::__type
881 __fill_a1(_Tp* __first, _Tp* __last, const _Tp& __c)
882 {
883 const _Tp __tmp = __c;
884#if __cpp_lib_is_constant_evaluated
885 if (std::is_constant_evaluated())
886 {
887 for (; __first != __last; ++__first)
888 *__first = __tmp;
889 return;
890 }
891#endif
892 if (const size_t __len = __last - __first)
893 __builtin_memset(__first, static_cast<unsigned char>(__tmp), __len);
894 }
895
896 template<typename _Ite, typename _Cont, typename _Tp>
897 _GLIBCXX20_CONSTEXPR
898 inline void
899 __fill_a1(::__gnu_cxx::__normal_iterator<_Ite, _Cont> __first,
900 ::__gnu_cxx::__normal_iterator<_Ite, _Cont> __last,
901 const _Tp& __value)
902 { std::__fill_a1(__first.base(), __last.base(), __value); }
903
904 template<typename _Tp, typename _VTp>
905 void
906 __fill_a1(const _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Tp&, _Tp*>&,
907 const _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Tp&, _Tp*>&,
908 const _VTp&);
909
910 template<typename _FIte, typename _Tp>
911 _GLIBCXX20_CONSTEXPR
912 inline void
913 __fill_a(_FIte __first, _FIte __last, const _Tp& __value)
914 { std::__fill_a1(__first, __last, __value); }
915
916 template<typename _Ite, typename _Seq, typename _Cat, typename _Tp>
917 void
918 __fill_a(const ::__gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>&,
919 const ::__gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>&,
920 const _Tp&);
921
922 /**
923 * @brief Fills the range [first,last) with copies of value.
924 * @ingroup mutating_algorithms
925 * @param __first A forward iterator.
926 * @param __last A forward iterator.
927 * @param __value A reference-to-const of arbitrary type.
928 * @return Nothing.
929 *
930 * This function fills a range with copies of the same value. For char
931 * types filling contiguous areas of memory, this becomes an inline call
932 * to @c memset or @c wmemset.
933 */
934 template<typename _ForwardIterator, typename _Tp>
935 _GLIBCXX20_CONSTEXPR
936 inline void
937 fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value)
938 {
939 // concept requirements
940 __glibcxx_function_requires(_Mutable_ForwardIteratorConcept<
941 _ForwardIterator>)
942 __glibcxx_requires_valid_range(__first, __last);
943
944 std::__fill_a(__first, __last, __value);
945 }
946
947 // Used by fill_n, generate_n, etc. to convert _Size to an integral type:
948 inline _GLIBCXX_CONSTEXPRconstexpr int
949 __size_to_integer(int __n) { return __n; }
950 inline _GLIBCXX_CONSTEXPRconstexpr unsigned
951 __size_to_integer(unsigned __n) { return __n; }
952 inline _GLIBCXX_CONSTEXPRconstexpr long
953 __size_to_integer(long __n) { return __n; }
954 inline _GLIBCXX_CONSTEXPRconstexpr unsigned long
955 __size_to_integer(unsigned long __n) { return __n; }
956 inline _GLIBCXX_CONSTEXPRconstexpr long long
957 __size_to_integer(long long __n) { return __n; }
958 inline _GLIBCXX_CONSTEXPRconstexpr unsigned long long
959 __size_to_integer(unsigned long long __n) { return __n; }
960
961#if defined(__GLIBCXX_TYPE_INT_N_0)
962 inline _GLIBCXX_CONSTEXPRconstexpr __GLIBCXX_TYPE_INT_N_0
963 __size_to_integer(__GLIBCXX_TYPE_INT_N_0 __n) { return __n; }
964 inline _GLIBCXX_CONSTEXPRconstexpr unsigned __GLIBCXX_TYPE_INT_N_0
965 __size_to_integer(unsigned __GLIBCXX_TYPE_INT_N_0 __n) { return __n; }
966#endif
967#if defined(__GLIBCXX_TYPE_INT_N_1)
968 inline _GLIBCXX_CONSTEXPRconstexpr __GLIBCXX_TYPE_INT_N_1
969 __size_to_integer(__GLIBCXX_TYPE_INT_N_1 __n) { return __n; }
970 inline _GLIBCXX_CONSTEXPRconstexpr unsigned __GLIBCXX_TYPE_INT_N_1
971 __size_to_integer(unsigned __GLIBCXX_TYPE_INT_N_1 __n) { return __n; }
972#endif
973#if defined(__GLIBCXX_TYPE_INT_N_2)
974 inline _GLIBCXX_CONSTEXPRconstexpr __GLIBCXX_TYPE_INT_N_2
975 __size_to_integer(__GLIBCXX_TYPE_INT_N_2 __n) { return __n; }
976 inline _GLIBCXX_CONSTEXPRconstexpr unsigned __GLIBCXX_TYPE_INT_N_2
977 __size_to_integer(unsigned __GLIBCXX_TYPE_INT_N_2 __n) { return __n; }
978#endif
979#if defined(__GLIBCXX_TYPE_INT_N_3)
980 inline _GLIBCXX_CONSTEXPRconstexpr unsigned __GLIBCXX_TYPE_INT_N_3
981 __size_to_integer(__GLIBCXX_TYPE_INT_N_3 __n) { return __n; }
982 inline _GLIBCXX_CONSTEXPRconstexpr __GLIBCXX_TYPE_INT_N_3
983 __size_to_integer(unsigned __GLIBCXX_TYPE_INT_N_3 __n) { return __n; }
984#endif
985
986 inline _GLIBCXX_CONSTEXPRconstexpr long long
987 __size_to_integer(float __n) { return __n; }
988 inline _GLIBCXX_CONSTEXPRconstexpr long long
989 __size_to_integer(double __n) { return __n; }
990 inline _GLIBCXX_CONSTEXPRconstexpr long long
991 __size_to_integer(long double __n) { return __n; }
992#if !defined(__STRICT_ANSI__1) && defined(_GLIBCXX_USE_FLOAT1281) && !defined(__CUDACC__)
993 inline _GLIBCXX_CONSTEXPRconstexpr long long
994 __size_to_integer(__float128 __n) { return __n; }
995#endif
996
997 template<typename _OutputIterator, typename _Size, typename _Tp>
998 _GLIBCXX20_CONSTEXPR
999 inline typename
1000 __gnu_cxx::__enable_if<!__is_scalar<_Tp>::__value, _OutputIterator>::__type
1001 __fill_n_a1(_OutputIterator __first, _Size __n, const _Tp& __value)
1002 {
1003 for (; __n > 0; --__n, (void) ++__first)
1004 *__first = __value;
1005 return __first;
1006 }
1007
1008 template<typename _OutputIterator, typename _Size, typename _Tp>
1009 _GLIBCXX20_CONSTEXPR
1010 inline typename
1011 __gnu_cxx::__enable_if<__is_scalar<_Tp>::__value, _OutputIterator>::__type
1012 __fill_n_a1(_OutputIterator __first, _Size __n, const _Tp& __value)
1013 {
1014 const _Tp __tmp = __value;
1015 for (; __n > 0; --__n, (void) ++__first)
1016 *__first = __tmp;
1017 return __first;
1018 }
1019
1020 template<typename _Ite, typename _Seq, typename _Cat, typename _Size,
1021 typename _Tp>
1022 ::__gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>
1023 __fill_n_a(const ::__gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>& __first,
1024 _Size __n, const _Tp& __value,
1025 std::input_iterator_tag);
1026
1027 template<typename _OutputIterator, typename _Size, typename _Tp>
1028 _GLIBCXX20_CONSTEXPR
1029 inline _OutputIterator
1030 __fill_n_a(_OutputIterator __first, _Size __n, const _Tp& __value,
1031 std::output_iterator_tag)
1032 {
1033#if __cplusplus201402L >= 201103L
1034 static_assert(is_integral<_Size>{}, "fill_n must pass integral size");
1035#endif
1036 return __fill_n_a1(__first, __n, __value);
1037 }
1038
1039 template<typename _OutputIterator, typename _Size, typename _Tp>
1040 _GLIBCXX20_CONSTEXPR
1041 inline _OutputIterator
1042 __fill_n_a(_OutputIterator __first, _Size __n, const _Tp& __value,
1043 std::input_iterator_tag)
1044 {
1045#if __cplusplus201402L >= 201103L
1046 static_assert(is_integral<_Size>{}, "fill_n must pass integral size");
1047#endif
1048 return __fill_n_a1(__first, __n, __value);
1049 }
1050
1051 template<typename _OutputIterator, typename _Size, typename _Tp>
1052 _GLIBCXX20_CONSTEXPR
1053 inline _OutputIterator
1054 __fill_n_a(_OutputIterator __first, _Size __n, const _Tp& __value,
1055 std::random_access_iterator_tag)
1056 {
1057#if __cplusplus201402L >= 201103L
1058 static_assert(is_integral<_Size>{}, "fill_n must pass integral size");
1059#endif
1060 if (__n <= 0)
1061 return __first;
1062
1063 __glibcxx_requires_can_increment(__first, __n);
1064
1065 std::__fill_a(__first, __first + __n, __value);
1066 return __first + __n;
1067 }
1068
1069 /**
1070 * @brief Fills the range [first,first+n) with copies of value.
1071 * @ingroup mutating_algorithms
1072 * @param __first An output iterator.
1073 * @param __n The count of copies to perform.
1074 * @param __value A reference-to-const of arbitrary type.
1075 * @return The iterator at first+n.
1076 *
1077 * This function fills a range with copies of the same value. For char
1078 * types filling contiguous areas of memory, this becomes an inline call
1079 * to @c memset or @c wmemset.
1080 *
1081 * If @p __n is negative, the function does nothing.
1082 */
1083 // _GLIBCXX_RESOLVE_LIB_DEFECTS
1084 // DR 865. More algorithms that throw away information
1085 // DR 426. search_n(), fill_n(), and generate_n() with negative n
1086 template<typename _OI, typename _Size, typename _Tp>
1087 _GLIBCXX20_CONSTEXPR
1088 inline _OI
1089 fill_n(_OI __first, _Size __n, const _Tp& __value)
1090 {
1091 // concept requirements
1092 __glibcxx_function_requires(_OutputIteratorConcept<_OI, _Tp>)
1093
1094 return std::__fill_n_a(__first, std::__size_to_integer(__n), __value,
1095 std::__iterator_category(__first));
1096 }
1097
1098 template<bool _BoolType>
1099 struct __equal
1100 {
1101 template<typename _II1, typename _II2>
1102 _GLIBCXX20_CONSTEXPR
1103 static bool
1104 equal(_II1 __first1, _II1 __last1, _II2 __first2)
1105 {
1106 for (; __first1 != __last1; ++__first1, (void) ++__first2)
1107 if (!(*__first1 == *__first2))
1108 return false;
1109 return true;
1110 }
1111 };
1112
1113 template<>
1114 struct __equal<true>
1115 {
1116 template<typename _Tp>
1117 _GLIBCXX20_CONSTEXPR
1118 static bool
1119 equal(const _Tp* __first1, const _Tp* __last1, const _Tp* __first2)
1120 {
1121 if (const size_t __len = (__last1 - __first1))
1122 return !std::__memcmp(__first1, __first2, __len);
1123 return true;
1124 }
1125 };
1126
1127 template<typename _Tp, typename _Ref, typename _Ptr, typename _II>
1128 typename __gnu_cxx::__enable_if<
1129 __is_random_access_iter<_II>::__value, bool>::__type
1130 __equal_aux1(_GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Ref, _Ptr>,
1131 _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Ref, _Ptr>,
1132 _II);
1133
1134 template<typename _Tp1, typename _Ref1, typename _Ptr1,
1135 typename _Tp2, typename _Ref2, typename _Ptr2>
1136 bool
1137 __equal_aux1(_GLIBCXX_STD_Cstd::_Deque_iterator<_Tp1, _Ref1, _Ptr1>,
1138 _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp1, _Ref1, _Ptr1>,
1139 _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp2, _Ref2, _Ptr2>);
1140
1141 template<typename _II, typename _Tp, typename _Ref, typename _Ptr>
1142 typename __gnu_cxx::__enable_if<
1143 __is_random_access_iter<_II>::__value, bool>::__type
1144 __equal_aux1(_II, _II,
1145 _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Ref, _Ptr>);
1146
1147 template<typename _II1, typename _II2>
1148 _GLIBCXX20_CONSTEXPR
1149 inline bool
1150 __equal_aux1(_II1 __first1, _II1 __last1, _II2 __first2)
1151 {
1152 typedef typename iterator_traits<_II1>::value_type _ValueType1;
1153 const bool __simple = ((__is_integer<_ValueType1>::__value
1154 || __is_pointer<_ValueType1>::__value)
1155 && __memcmpable<_II1, _II2>::__value);
1156 return std::__equal<__simple>::equal(__first1, __last1, __first2);
1157 }
1158
1159 template<typename _II1, typename _II2>
1160 _GLIBCXX20_CONSTEXPR
1161 inline bool
1162 __equal_aux(_II1 __first1, _II1 __last1, _II2 __first2)
1163 {
1164 return std::__equal_aux1(std::__niter_base(__first1),
1165 std::__niter_base(__last1),
1166 std::__niter_base(__first2));
1167 }
1168
1169 template<typename _II1, typename _Seq1, typename _Cat1, typename _II2>
1170 bool
1171 __equal_aux(const ::__gnu_debug::_Safe_iterator<_II1, _Seq1, _Cat1>&,
1172 const ::__gnu_debug::_Safe_iterator<_II1, _Seq1, _Cat1>&,
1173 _II2);
1174
1175 template<typename _II1, typename _II2, typename _Seq2, typename _Cat2>
1176 bool
1177 __equal_aux(_II1, _II1,
1178 const ::__gnu_debug::_Safe_iterator<_II2, _Seq2, _Cat2>&);
1179
1180 template<typename _II1, typename _Seq1, typename _Cat1,
1181 typename _II2, typename _Seq2, typename _Cat2>
1182 bool
1183 __equal_aux(const ::__gnu_debug::_Safe_iterator<_II1, _Seq1, _Cat1>&,
1184 const ::__gnu_debug::_Safe_iterator<_II1, _Seq1, _Cat1>&,
1185 const ::__gnu_debug::_Safe_iterator<_II2, _Seq2, _Cat2>&);
1186
1187 template<typename, typename>
1188 struct __lc_rai
1189 {
1190 template<typename _II1, typename _II2>
1191 _GLIBCXX20_CONSTEXPR
1192 static _II1
1193 __newlast1(_II1, _II1 __last1, _II2, _II2)
1194 { return __last1; }
1195
1196 template<typename _II>
1197 _GLIBCXX20_CONSTEXPR
1198 static bool
1199 __cnd2(_II __first, _II __last)
1200 { return __first != __last; }
1201 };
1202
1203 template<>
1204 struct __lc_rai<random_access_iterator_tag, random_access_iterator_tag>
1205 {
1206 template<typename _RAI1, typename _RAI2>
1207 _GLIBCXX20_CONSTEXPR
1208 static _RAI1
1209 __newlast1(_RAI1 __first1, _RAI1 __last1,
1210 _RAI2 __first2, _RAI2 __last2)
1211 {
1212 const typename iterator_traits<_RAI1>::difference_type
1213 __diff1 = __last1 - __first1;
1214 const typename iterator_traits<_RAI2>::difference_type
1215 __diff2 = __last2 - __first2;
1216 return __diff2 < __diff1 ? __first1 + __diff2 : __last1;
1217 }
1218
1219 template<typename _RAI>
1220 static _GLIBCXX20_CONSTEXPR bool
1221 __cnd2(_RAI, _RAI)
1222 { return true; }
1223 };
1224
1225 template<typename _II1, typename _II2, typename _Compare>
1226 _GLIBCXX20_CONSTEXPR
1227 bool
1228 __lexicographical_compare_impl(_II1 __first1, _II1 __last1,
1229 _II2 __first2, _II2 __last2,
1230 _Compare __comp)
1231 {
1232 typedef typename iterator_traits<_II1>::iterator_category _Category1;
1233 typedef typename iterator_traits<_II2>::iterator_category _Category2;
1234 typedef std::__lc_rai<_Category1, _Category2> __rai_type;
1235
1236 __last1 = __rai_type::__newlast1(__first1, __last1, __first2, __last2);
1237 for (; __first1 != __last1 && __rai_type::__cnd2(__first2, __last2);
1238 ++__first1, (void)++__first2)
1239 {
1240 if (__comp(__first1, __first2))
1241 return true;
1242 if (__comp(__first2, __first1))
1243 return false;
1244 }
1245 return __first1 == __last1 && __first2 != __last2;
1246 }
1247
1248 template<bool _BoolType>
1249 struct __lexicographical_compare
1250 {
1251 template<typename _II1, typename _II2>
1252 _GLIBCXX20_CONSTEXPR
1253 static bool
1254 __lc(_II1 __first1, _II1 __last1, _II2 __first2, _II2 __last2)
1255 {
1256 using __gnu_cxx::__ops::__iter_less_iter;
1257 return std::__lexicographical_compare_impl(__first1, __last1,
1258 __first2, __last2,
1259 __iter_less_iter());
1260 }
1261 };
1262
1263 template<>
1264 struct __lexicographical_compare<true>
1265 {
1266 template<typename _Tp, typename _Up>
1267 _GLIBCXX20_CONSTEXPR
1268 static bool
1269 __lc(const _Tp* __first1, const _Tp* __last1,
1270 const _Up* __first2, const _Up* __last2)
1271 {
1272 const size_t __len1 = __last1 - __first1;
1273 const size_t __len2 = __last2 - __first2;
1274 if (const size_t __len = std::min(__len1, __len2))
1275 if (int __result = std::__memcmp(__first1, __first2, __len))
1276 return __result < 0;
1277 return __len1 < __len2;
1278 }
1279 };
1280
1281 template<typename _II1, typename _II2>
1282 _GLIBCXX20_CONSTEXPR
1283 inline bool
1284 __lexicographical_compare_aux(_II1 __first1, _II1 __last1,
1285 _II2 __first2, _II2 __last2)
1286 {
1287 typedef typename iterator_traits<_II1>::value_type _ValueType1;
1288 typedef typename iterator_traits<_II2>::value_type _ValueType2;
1289 const bool __simple =
1290 (__is_memcmp_ordered_with<_ValueType1, _ValueType2>::__value
1291 && __is_pointer<_II1>::__value
1292 && __is_pointer<_II2>::__value
1293#if __cplusplus201402L > 201703L && __cpp_lib_concepts
1294 // For C++20 iterator_traits<volatile T*>::value_type is non-volatile
1295 // so __is_byte<T> could be true, but we can't use memcmp with
1296 // volatile data.
1297 && !is_volatile_v<remove_reference_t<iter_reference_t<_II1>>>
1298 && !is_volatile_v<remove_reference_t<iter_reference_t<_II2>>>
1299#endif
1300 );
1301
1302 return std::__lexicographical_compare<__simple>::__lc(__first1, __last1,
1303 __first2, __last2);
1304 }
1305
1306 template<typename _ForwardIterator, typename _Tp, typename _Compare>
1307 _GLIBCXX20_CONSTEXPR
1308 _ForwardIterator
1309 __lower_bound(_ForwardIterator __first, _ForwardIterator __last,
1310 const _Tp& __val, _Compare __comp)
1311 {
1312 typedef typename iterator_traits<_ForwardIterator>::difference_type
1313 _DistanceType;
1314
1315 _DistanceType __len = std::distance(__first, __last);
1316
1317 while (__len > 0)
1318 {
1319 _DistanceType __half = __len >> 1;
1320 _ForwardIterator __middle = __first;
1321 std::advance(__middle, __half);
1322 if (__comp(__middle, __val))
1323 {
1324 __first = __middle;
1325 ++__first;
1326 __len = __len - __half - 1;
1327 }
1328 else
1329 __len = __half;
1330 }
1331 return __first;
1332 }
1333
1334 /**
1335 * @brief Finds the first position in which @a val could be inserted
1336 * without changing the ordering.
1337 * @param __first An iterator.
1338 * @param __last Another iterator.
1339 * @param __val The search term.
1340 * @return An iterator pointing to the first element <em>not less
1341 * than</em> @a val, or end() if every element is less than
1342 * @a val.
1343 * @ingroup binary_search_algorithms
1344 */
1345 template<typename _ForwardIterator, typename _Tp>
1346 _GLIBCXX20_CONSTEXPR
1347 inline _ForwardIterator
1348 lower_bound(_ForwardIterator __first, _ForwardIterator __last,
1349 const _Tp& __val)
1350 {
1351 // concept requirements
1352 __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator>)
1353 __glibcxx_function_requires(_LessThanOpConcept<
1354 typename iterator_traits<_ForwardIterator>::value_type, _Tp>)
1355 __glibcxx_requires_partitioned_lower(__first, __last, __val);
1356
1357 return std::__lower_bound(__first, __last, __val,
1358 __gnu_cxx::__ops::__iter_less_val());
1359 }
1360
1361 /// This is a helper function for the sort routines and for random.tcc.
1362 // Precondition: __n > 0.
1363 inline _GLIBCXX_CONSTEXPRconstexpr int
1364 __lg(int __n)
1365 { return (int)sizeof(int) * __CHAR_BIT__8 - 1 - __builtin_clz(__n); }
1366
1367 inline _GLIBCXX_CONSTEXPRconstexpr unsigned
1368 __lg(unsigned __n)
1369 { return (int)sizeof(int) * __CHAR_BIT__8 - 1 - __builtin_clz(__n); }
1370
1371 inline _GLIBCXX_CONSTEXPRconstexpr long
1372 __lg(long __n)
1373 { return (int)sizeof(long) * __CHAR_BIT__8 - 1 - __builtin_clzl(__n); }
1374
1375 inline _GLIBCXX_CONSTEXPRconstexpr unsigned long
1376 __lg(unsigned long __n)
1377 { return (int)sizeof(long) * __CHAR_BIT__8 - 1 - __builtin_clzl(__n); }
1378
1379 inline _GLIBCXX_CONSTEXPRconstexpr long long
1380 __lg(long long __n)
1381 { return (int)sizeof(long long) * __CHAR_BIT__8 - 1 - __builtin_clzll(__n); }
1382
1383 inline _GLIBCXX_CONSTEXPRconstexpr unsigned long long
1384 __lg(unsigned long long __n)
1385 { return (int)sizeof(long long) * __CHAR_BIT__8 - 1 - __builtin_clzll(__n); }
1386
1387_GLIBCXX_BEGIN_NAMESPACE_ALGO
1388
1389 /**
1390 * @brief Tests a range for element-wise equality.
1391 * @ingroup non_mutating_algorithms
1392 * @param __first1 An input iterator.
1393 * @param __last1 An input iterator.
1394 * @param __first2 An input iterator.
1395 * @return A boolean true or false.
1396 *
1397 * This compares the elements of two ranges using @c == and returns true or
1398 * false depending on whether all of the corresponding elements of the
1399 * ranges are equal.
1400 */
1401 template<typename _II1, typename _II2>
1402 _GLIBCXX20_CONSTEXPR
1403 inline bool
1404 equal(_II1 __first1, _II1 __last1, _II2 __first2)
1405 {
1406 // concept requirements
1407 __glibcxx_function_requires(_InputIteratorConcept<_II1>)
1408 __glibcxx_function_requires(_InputIteratorConcept<_II2>)
1409 __glibcxx_function_requires(_EqualOpConcept<
1410 typename iterator_traits<_II1>::value_type,
1411 typename iterator_traits<_II2>::value_type>)
1412 __glibcxx_requires_can_increment_range(__first1, __last1, __first2);
1413
1414 return std::__equal_aux(__first1, __last1, __first2);
1415 }
1416
1417 /**
1418 * @brief Tests a range for element-wise equality.
1419 * @ingroup non_mutating_algorithms
1420 * @param __first1 An input iterator.
1421 * @param __last1 An input iterator.
1422 * @param __first2 An input iterator.
1423 * @param __binary_pred A binary predicate @link functors
1424 * functor@endlink.
1425 * @return A boolean true or false.
1426 *
1427 * This compares the elements of two ranges using the binary_pred
1428 * parameter, and returns true or
1429 * false depending on whether all of the corresponding elements of the
1430 * ranges are equal.
1431 */
1432 template<typename _IIter1, typename _IIter2, typename _BinaryPredicate>
1433 _GLIBCXX20_CONSTEXPR
1434 inline bool
1435 equal(_IIter1 __first1, _IIter1 __last1,
1436 _IIter2 __first2, _BinaryPredicate __binary_pred)
1437 {
1438 // concept requirements
1439 __glibcxx_function_requires(_InputIteratorConcept<_IIter1>)
1440 __glibcxx_function_requires(_InputIteratorConcept<_IIter2>)
1441 __glibcxx_requires_valid_range(__first1, __last1);
1442
1443 for (; __first1 != __last1; ++__first1, (void)++__first2)
1444 if (!bool(__binary_pred(*__first1, *__first2)))
1445 return false;
1446 return true;
1447 }
1448
1449#if __cplusplus201402L >= 201103L
1450 // 4-iterator version of std::equal<It1, It2> for use in C++11.
1451 template<typename _II1, typename _II2>
1452 _GLIBCXX20_CONSTEXPR
1453 inline bool
1454 __equal4(_II1 __first1, _II1 __last1, _II2 __first2, _II2 __last2)
1455 {
1456 using _RATag = random_access_iterator_tag;
1457 using _Cat1 = typename iterator_traits<_II1>::iterator_category;
1458 using _Cat2 = typename iterator_traits<_II2>::iterator_category;
1459 using _RAIters = __and_<is_same<_Cat1, _RATag>, is_same<_Cat2, _RATag>>;
1460 if (_RAIters())
1461 {
1462 auto __d1 = std::distance(__first1, __last1);
1463 auto __d2 = std::distance(__first2, __last2);
1464 if (__d1 != __d2)
1465 return false;
1466 return _GLIBCXX_STD_Astd::equal(__first1, __last1, __first2);
1467 }
1468
1469 for (; __first1 != __last1 && __first2 != __last2;
1470 ++__first1, (void)++__first2)
1471 if (!(*__first1 == *__first2))
1472 return false;
1473 return __first1 == __last1 && __first2 == __last2;
1474 }
1475
1476 // 4-iterator version of std::equal<It1, It2, BinaryPred> for use in C++11.
1477 template<typename _II1, typename _II2, typename _BinaryPredicate>
1478 _GLIBCXX20_CONSTEXPR
1479 inline bool
1480 __equal4(_II1 __first1, _II1 __last1, _II2 __first2, _II2 __last2,
1481 _BinaryPredicate __binary_pred)
1482 {
1483 using _RATag = random_access_iterator_tag;
1484 using _Cat1 = typename iterator_traits<_II1>::iterator_category;
1485 using _Cat2 = typename iterator_traits<_II2>::iterator_category;
1486 using _RAIters = __and_<is_same<_Cat1, _RATag>, is_same<_Cat2, _RATag>>;
1487 if (_RAIters())
1488 {
1489 auto __d1 = std::distance(__first1, __last1);
1490 auto __d2 = std::distance(__first2, __last2);
1491 if (__d1 != __d2)
1492 return false;
1493 return _GLIBCXX_STD_Astd::equal(__first1, __last1, __first2,
1494 __binary_pred);
1495 }
1496
1497 for (; __first1 != __last1 && __first2 != __last2;
1498 ++__first1, (void)++__first2)
1499 if (!bool(__binary_pred(*__first1, *__first2)))
1500 return false;
1501 return __first1 == __last1 && __first2 == __last2;
1502 }
1503#endif // C++11
1504
1505#if __cplusplus201402L > 201103L
1506
1507#define __cpp_lib_robust_nonmodifying_seq_ops201304 201304
1508
1509 /**
1510 * @brief Tests a range for element-wise equality.
1511 * @ingroup non_mutating_algorithms
1512 * @param __first1 An input iterator.
1513 * @param __last1 An input iterator.
1514 * @param __first2 An input iterator.
1515 * @param __last2 An input iterator.
1516 * @return A boolean true or false.
1517 *
1518 * This compares the elements of two ranges using @c == and returns true or
1519 * false depending on whether all of the corresponding elements of the
1520 * ranges are equal.
1521 */
1522 template<typename _II1, typename _II2>
1523 _GLIBCXX20_CONSTEXPR
1524 inline bool
1525 equal(_II1 __first1, _II1 __last1, _II2 __first2, _II2 __last2)
1526 {
1527 // concept requirements
1528 __glibcxx_function_requires(_InputIteratorConcept<_II1>)
1529 __glibcxx_function_requires(_InputIteratorConcept<_II2>)
1530 __glibcxx_function_requires(_EqualOpConcept<
1531 typename iterator_traits<_II1>::value_type,
1532 typename iterator_traits<_II2>::value_type>)
1533 __glibcxx_requires_valid_range(__first1, __last1);
1534 __glibcxx_requires_valid_range(__first2, __last2);
1535
1536 return _GLIBCXX_STD_Astd::__equal4(__first1, __last1, __first2, __last2);
1537 }
1538
1539 /**
1540 * @brief Tests a range for element-wise equality.
1541 * @ingroup non_mutating_algorithms
1542 * @param __first1 An input iterator.
1543 * @param __last1 An input iterator.
1544 * @param __first2 An input iterator.
1545 * @param __last2 An input iterator.
1546 * @param __binary_pred A binary predicate @link functors
1547 * functor@endlink.
1548 * @return A boolean true or false.
1549 *
1550 * This compares the elements of two ranges using the binary_pred
1551 * parameter, and returns true or
1552 * false depending on whether all of the corresponding elements of the
1553 * ranges are equal.
1554 */
1555 template<typename _IIter1, typename _IIter2, typename _BinaryPredicate>
1556 _GLIBCXX20_CONSTEXPR
1557 inline bool
1558 equal(_IIter1 __first1, _IIter1 __last1,
1559 _IIter2 __first2, _IIter2 __last2, _BinaryPredicate __binary_pred)
1560 {
1561 // concept requirements
1562 __glibcxx_function_requires(_InputIteratorConcept<_IIter1>)
1563 __glibcxx_function_requires(_InputIteratorConcept<_IIter2>)
1564 __glibcxx_requires_valid_range(__first1, __last1);
1565 __glibcxx_requires_valid_range(__first2, __last2);
1566
1567 return _GLIBCXX_STD_Astd::__equal4(__first1, __last1, __first2, __last2,
1568 __binary_pred);
1569 }
1570#endif // C++14
1571
1572 /**
1573 * @brief Performs @b dictionary comparison on ranges.
1574 * @ingroup sorting_algorithms
1575 * @param __first1 An input iterator.
1576 * @param __last1 An input iterator.
1577 * @param __first2 An input iterator.
1578 * @param __last2 An input iterator.
1579 * @return A boolean true or false.
1580 *
1581 * <em>Returns true if the sequence of elements defined by the range
1582 * [first1,last1) is lexicographically less than the sequence of elements
1583 * defined by the range [first2,last2). Returns false otherwise.</em>
1584 * (Quoted from [25.3.8]/1.) If the iterators are all character pointers,
1585 * then this is an inline call to @c memcmp.
1586 */
1587 template<typename _II1, typename _II2>
1588 _GLIBCXX20_CONSTEXPR
1589 inline bool
1590 lexicographical_compare(_II1 __first1, _II1 __last1,
1591 _II2 __first2, _II2 __last2)
1592 {
1593#ifdef _GLIBCXX_CONCEPT_CHECKS
1594 // concept requirements
1595 typedef typename iterator_traits<_II1>::value_type _ValueType1;
1596 typedef typename iterator_traits<_II2>::value_type _ValueType2;
1597#endif
1598 __glibcxx_function_requires(_InputIteratorConcept<_II1>)
1599 __glibcxx_function_requires(_InputIteratorConcept<_II2>)
1600 __glibcxx_function_requires(_LessThanOpConcept<_ValueType1, _ValueType2>)
1601 __glibcxx_function_requires(_LessThanOpConcept<_ValueType2, _ValueType1>)
1602 __glibcxx_requires_valid_range(__first1, __last1);
1603 __glibcxx_requires_valid_range(__first2, __last2);
1604
1605 return std::__lexicographical_compare_aux(std::__niter_base(__first1),
1606 std::__niter_base(__last1),
1607 std::__niter_base(__first2),
1608 std::__niter_base(__last2));
1609 }
1610
1611 /**
1612 * @brief Performs @b dictionary comparison on ranges.
1613 * @ingroup sorting_algorithms
1614 * @param __first1 An input iterator.
1615 * @param __last1 An input iterator.
1616 * @param __first2 An input iterator.
1617 * @param __last2 An input iterator.
1618 * @param __comp A @link comparison_functors comparison functor@endlink.
1619 * @return A boolean true or false.
1620 *
1621 * The same as the four-parameter @c lexicographical_compare, but uses the
1622 * comp parameter instead of @c <.
1623 */
1624 template<typename _II1, typename _II2, typename _Compare>
1625 _GLIBCXX20_CONSTEXPR
1626 inline bool
1627 lexicographical_compare(_II1 __first1, _II1 __last1,
1628 _II2 __first2, _II2 __last2, _Compare __comp)
1629 {
1630 // concept requirements
1631 __glibcxx_function_requires(_InputIteratorConcept<_II1>)
1632 __glibcxx_function_requires(_InputIteratorConcept<_II2>)
1633 __glibcxx_requires_valid_range(__first1, __last1);
1634 __glibcxx_requires_valid_range(__first2, __last2);
1635
1636 return std::__lexicographical_compare_impl
1637 (__first1, __last1, __first2, __last2,
1638 __gnu_cxx::__ops::__iter_comp_iter(__comp));
1639 }
1640
1641#if __cpp_lib_three_way_comparison
1642 // Iter points to a contiguous range of unsigned narrow character type
1643 // or std::byte, suitable for comparison by memcmp.
1644 template<typename _Iter>
1645 concept __is_byte_iter = contiguous_iterator<_Iter>
1646 && __is_memcmp_ordered<iter_value_t<_Iter>>::__value;
1647
1648 // Return a struct with two members, initialized to the smaller of x and y
1649 // (or x if they compare equal) and the result of the comparison x <=> y.
1650 template<typename _Tp>
1651 constexpr auto
1652 __min_cmp(_Tp __x, _Tp __y)
1653 {
1654 struct _Res {
1655 _Tp _M_min;
1656 decltype(__x <=> __y) _M_cmp;
1657 };
1658 auto __c = __x <=> __y;
1659 if (__c > 0)
1660 return _Res{__y, __c};
1661 return _Res{__x, __c};
1662 }
1663
1664 /**
1665 * @brief Performs dictionary comparison on ranges.
1666 * @ingroup sorting_algorithms
1667 * @param __first1 An input iterator.
1668 * @param __last1 An input iterator.
1669 * @param __first2 An input iterator.
1670 * @param __last2 An input iterator.
1671 * @param __comp A @link comparison_functors comparison functor@endlink.
1672 * @return The comparison category that `__comp(*__first1, *__first2)`
1673 * returns.
1674 */
1675 template<typename _InputIter1, typename _InputIter2, typename _Comp>
1676 constexpr auto
1677 lexicographical_compare_three_way(_InputIter1 __first1,
1678 _InputIter1 __last1,
1679 _InputIter2 __first2,
1680 _InputIter2 __last2,
1681 _Comp __comp)
1682 -> decltype(__comp(*__first1, *__first2))
1683 {
1684 // concept requirements
1685 __glibcxx_function_requires(_InputIteratorConcept<_InputIter1>)
1686 __glibcxx_function_requires(_InputIteratorConcept<_InputIter2>)
1687 __glibcxx_requires_valid_range(__first1, __last1);
1688 __glibcxx_requires_valid_range(__first2, __last2);
1689
1690#if __cpp_lib_is_constant_evaluated
1691 using _Cat = decltype(__comp(*__first1, *__first2));
1692 static_assert(same_as<common_comparison_category_t<_Cat>, _Cat>);
1693
1694 if (!std::is_constant_evaluated())
1695 if constexpr (same_as<_Comp, __detail::_Synth3way>
1696 || same_as<_Comp, compare_three_way>)
1697 if constexpr (__is_byte_iter<_InputIter1>)
1698 if constexpr (__is_byte_iter<_InputIter2>)
1699 {
1700 const auto [__len, __lencmp]
1701 = std::__min_cmp(__last1 - __first1, __last2 - __first2);
1702 if (__len)
1703 {
1704 const auto __c
1705 = __builtin_memcmp(&*__first1, &*__first2, __len) <=> 0;
1706 if (__c != 0)
1707 return __c;
1708 }
1709 return __lencmp;
1710 }
1711#endif // is_constant_evaluated
1712 while (__first1 != __last1)
1713 {
1714 if (__first2 == __last2)
1715 return strong_ordering::greater;
1716 if (auto __cmp = __comp(*__first1, *__first2); __cmp != 0)
1717 return __cmp;
1718 ++__first1;
1719 ++__first2;
1720 }
1721 return (__first2 == __last2) <=> true; // See PR 94006
1722 }
1723
1724 template<typename _InputIter1, typename _InputIter2>
1725 constexpr auto
1726 lexicographical_compare_three_way(_InputIter1 __first1,
1727 _InputIter1 __last1,
1728 _InputIter2 __first2,
1729 _InputIter2 __last2)
1730 {
1731 return std::lexicographical_compare_three_way(__first1, __last1,
1732 __first2, __last2,
1733 compare_three_way{});
1734 }
1735#endif // three_way_comparison
1736
1737 template<typename _InputIterator1, typename _InputIterator2,
1738 typename _BinaryPredicate>
1739 _GLIBCXX20_CONSTEXPR
1740 pair<_InputIterator1, _InputIterator2>
1741 __mismatch(_InputIterator1 __first1, _InputIterator1 __last1,
1742 _InputIterator2 __first2, _BinaryPredicate __binary_pred)
1743 {
1744 while (__first1 != __last1 && __binary_pred(__first1, __first2))
1745 {
1746 ++__first1;
1747 ++__first2;
1748 }
1749 return pair<_InputIterator1, _InputIterator2>(__first1, __first2);
1750 }
1751
1752 /**
1753 * @brief Finds the places in ranges which don't match.
1754 * @ingroup non_mutating_algorithms
1755 * @param __first1 An input iterator.
1756 * @param __last1 An input iterator.
1757 * @param __first2 An input iterator.
1758 * @return A pair of iterators pointing to the first mismatch.
1759 *
1760 * This compares the elements of two ranges using @c == and returns a pair
1761 * of iterators. The first iterator points into the first range, the
1762 * second iterator points into the second range, and the elements pointed
1763 * to by the iterators are not equal.
1764 */
1765 template<typename _InputIterator1, typename _InputIterator2>
1766 _GLIBCXX20_CONSTEXPR
1767 inline pair<_InputIterator1, _InputIterator2>
1768 mismatch(_InputIterator1 __first1, _InputIterator1 __last1,
1769 _InputIterator2 __first2)
1770 {
1771 // concept requirements
1772 __glibcxx_function_requires(_InputIteratorConcept<_InputIterator1>)
1773 __glibcxx_function_requires(_InputIteratorConcept<_InputIterator2>)
1774 __glibcxx_function_requires(_EqualOpConcept<
1775 typename iterator_traits<_InputIterator1>::value_type,
1776 typename iterator_traits<_InputIterator2>::value_type>)
1777 __glibcxx_requires_valid_range(__first1, __last1);
1778
1779 return _GLIBCXX_STD_Astd::__mismatch(__first1, __last1, __first2,
1780 __gnu_cxx::__ops::__iter_equal_to_iter());
1781 }
1782
1783 /**
1784 * @brief Finds the places in ranges which don't match.
1785 * @ingroup non_mutating_algorithms
1786 * @param __first1 An input iterator.
1787 * @param __last1 An input iterator.
1788 * @param __first2 An input iterator.
1789 * @param __binary_pred A binary predicate @link functors
1790 * functor@endlink.
1791 * @return A pair of iterators pointing to the first mismatch.
1792 *
1793 * This compares the elements of two ranges using the binary_pred
1794 * parameter, and returns a pair
1795 * of iterators. The first iterator points into the first range, the
1796 * second iterator points into the second range, and the elements pointed
1797 * to by the iterators are not equal.
1798 */
1799 template<typename _InputIterator1, typename _InputIterator2,
1800 typename _BinaryPredicate>
1801 _GLIBCXX20_CONSTEXPR
1802 inline pair<_InputIterator1, _InputIterator2>
1803 mismatch(_InputIterator1 __first1, _InputIterator1 __last1,
1804 _InputIterator2 __first2, _BinaryPredicate __binary_pred)
1805 {
1806 // concept requirements
1807 __glibcxx_function_requires(_InputIteratorConcept<_InputIterator1>)
1808 __glibcxx_function_requires(_InputIteratorConcept<_InputIterator2>)
1809 __glibcxx_requires_valid_range(__first1, __last1);
1810
1811 return _GLIBCXX_STD_Astd::__mismatch(__first1, __last1, __first2,
1812 __gnu_cxx::__ops::__iter_comp_iter(__binary_pred));
1813 }
1814
1815#if __cplusplus201402L > 201103L
1816
1817 template<typename _InputIterator1, typename _InputIterator2,
1818 typename _BinaryPredicate>
1819 _GLIBCXX20_CONSTEXPR
1820 pair<_InputIterator1, _InputIterator2>
1821 __mismatch(_InputIterator1 __first1, _InputIterator1 __last1,
1822 _InputIterator2 __first2, _InputIterator2 __last2,
1823 _BinaryPredicate __binary_pred)
1824 {
1825 while (__first1 != __last1 && __first2 != __last2
1826 && __binary_pred(__first1, __first2))
1827 {
1828 ++__first1;
1829 ++__first2;
1830 }
1831 return pair<_InputIterator1, _InputIterator2>(__first1, __first2);
1832 }
1833
1834 /**
1835 * @brief Finds the places in ranges which don't match.
1836 * @ingroup non_mutating_algorithms
1837 * @param __first1 An input iterator.
1838 * @param __last1 An input iterator.
1839 * @param __first2 An input iterator.
1840 * @param __last2 An input iterator.
1841 * @return A pair of iterators pointing to the first mismatch.
1842 *
1843 * This compares the elements of two ranges using @c == and returns a pair
1844 * of iterators. The first iterator points into the first range, the
1845 * second iterator points into the second range, and the elements pointed
1846 * to by the iterators are not equal.
1847 */
1848 template<typename _InputIterator1, typename _InputIterator2>
1849 _GLIBCXX20_CONSTEXPR
1850 inline pair<_InputIterator1, _InputIterator2>
1851 mismatch(_InputIterator1 __first1, _InputIterator1 __last1,
1852 _InputIterator2 __first2, _InputIterator2 __last2)
1853 {
1854 // concept requirements
1855 __glibcxx_function_requires(_InputIteratorConcept<_InputIterator1>)
1856 __glibcxx_function_requires(_InputIteratorConcept<_InputIterator2>)
1857 __glibcxx_function_requires(_EqualOpConcept<
1858 typename iterator_traits<_InputIterator1>::value_type,
1859 typename iterator_traits<_InputIterator2>::value_type>)
1860 __glibcxx_requires_valid_range(__first1, __last1);
1861 __glibcxx_requires_valid_range(__first2, __last2);
1862
1863 return _GLIBCXX_STD_Astd::__mismatch(__first1, __last1, __first2, __last2,
1864 __gnu_cxx::__ops::__iter_equal_to_iter());
1865 }
1866
1867 /**
1868 * @brief Finds the places in ranges which don't match.
1869 * @ingroup non_mutating_algorithms
1870 * @param __first1 An input iterator.
1871 * @param __last1 An input iterator.
1872 * @param __first2 An input iterator.
1873 * @param __last2 An input iterator.
1874 * @param __binary_pred A binary predicate @link functors
1875 * functor@endlink.
1876 * @return A pair of iterators pointing to the first mismatch.
1877 *
1878 * This compares the elements of two ranges using the binary_pred
1879 * parameter, and returns a pair
1880 * of iterators. The first iterator points into the first range, the
1881 * second iterator points into the second range, and the elements pointed
1882 * to by the iterators are not equal.
1883 */
1884 template<typename _InputIterator1, typename _InputIterator2,
1885 typename _BinaryPredicate>
1886 _GLIBCXX20_CONSTEXPR
1887 inline pair<_InputIterator1, _InputIterator2>
1888 mismatch(_InputIterator1 __first1, _InputIterator1 __last1,
1889 _InputIterator2 __first2, _InputIterator2 __last2,
1890 _BinaryPredicate __binary_pred)
1891 {
1892 // concept requirements
1893 __glibcxx_function_requires(_InputIteratorConcept<_InputIterator1>)
1894 __glibcxx_function_requires(_InputIteratorConcept<_InputIterator2>)
1895 __glibcxx_requires_valid_range(__first1, __last1);
1896 __glibcxx_requires_valid_range(__first2, __last2);
1897
1898 return _GLIBCXX_STD_Astd::__mismatch(__first1, __last1, __first2, __last2,
1899 __gnu_cxx::__ops::__iter_comp_iter(__binary_pred));
1900 }
1901#endif
1902
1903_GLIBCXX_END_NAMESPACE_ALGO
1904
1905 /// This is an overload used by find algos for the Input Iterator case.
1906 template<typename _InputIterator, typename _Predicate>
1907 _GLIBCXX20_CONSTEXPR
1908 inline _InputIterator
1909 __find_if(_InputIterator __first, _InputIterator __last,
1910 _Predicate __pred, input_iterator_tag)
1911 {
1912 while (__first != __last && !__pred(__first))
1913 ++__first;
1914 return __first;
1915 }
1916
1917 /// This is an overload used by find algos for the RAI case.
1918 template<typename _RandomAccessIterator, typename _Predicate>
1919 _GLIBCXX20_CONSTEXPR
1920 _RandomAccessIterator
1921 __find_if(_RandomAccessIterator __first, _RandomAccessIterator __last,
1922 _Predicate __pred, random_access_iterator_tag)
1923 {
1924 typename iterator_traits<_RandomAccessIterator>::difference_type
1925 __trip_count = (__last - __first) >> 2;
1926
1927 for (; __trip_count > 0; --__trip_count)
1928 {
1929 if (__pred(__first))
1930 return __first;
1931 ++__first;
1932
1933 if (__pred(__first))
1934 return __first;
1935 ++__first;
1936
1937 if (__pred(__first))
1938 return __first;
1939 ++__first;
1940
1941 if (__pred(__first))
1942 return __first;
1943 ++__first;
1944 }
1945
1946 switch (__last - __first)
1947 {
1948 case 3:
1949 if (__pred(__first))
1950 return __first;
1951 ++__first;
1952 // FALLTHRU
1953 case 2:
1954 if (__pred(__first))
1955 return __first;
1956 ++__first;
1957 // FALLTHRU
1958 case 1:
1959 if (__pred(__first))
1960 return __first;
1961 ++__first;
1962 // FALLTHRU
1963 case 0:
1964 default:
1965 return __last;
1966 }
1967 }
1968
1969 template<typename _Iterator, typename _Predicate>
1970 _GLIBCXX20_CONSTEXPR
1971 inline _Iterator
1972 __find_if(_Iterator __first, _Iterator __last, _Predicate __pred)
1973 {
1974 return __find_if(__first, __last, __pred,
1975 std::__iterator_category(__first));
1976 }
1977
1978 template<typename _InputIterator, typename _Predicate>
1979 _GLIBCXX20_CONSTEXPR
1980 typename iterator_traits<_InputIterator>::difference_type
1981 __count_if(_InputIterator __first, _InputIterator __last, _Predicate __pred)
1982 {
1983 typename iterator_traits<_InputIterator>::difference_type __n = 0;
1984 for (; __first != __last; ++__first)
1985 if (__pred(__first))
1986 ++__n;
1987 return __n;
1988 }
1989
1990#if __cplusplus201402L >= 201103L
1991 template<typename _ForwardIterator1, typename _ForwardIterator2,
1992 typename _BinaryPredicate>
1993 _GLIBCXX20_CONSTEXPR
1994 bool
1995 __is_permutation(_ForwardIterator1 __first1, _ForwardIterator1 __last1,
1996 _ForwardIterator2 __first2, _BinaryPredicate __pred)
1997 {
1998 // Efficiently compare identical prefixes: O(N) if sequences
1999 // have the same elements in the same order.
2000 for (; __first1 != __last1; ++__first1, (void)++__first2)
2001 if (!__pred(__first1, __first2))
2002 break;
2003
2004 if (__first1 == __last1)
2005 return true;
2006
2007 // Establish __last2 assuming equal ranges by iterating over the
2008 // rest of the list.
2009 _ForwardIterator2 __last2 = __first2;
2010 std::advance(__last2, std::distance(__first1, __last1));
2011 for (_ForwardIterator1 __scan = __first1; __scan != __last1; ++__scan)
2012 {
2013 if (__scan != std::__find_if(__first1, __scan,
2014 __gnu_cxx::__ops::__iter_comp_iter(__pred, __scan)))
2015 continue; // We've seen this one before.
2016
2017 auto __matches
2018 = std::__count_if(__first2, __last2,
2019 __gnu_cxx::__ops::__iter_comp_iter(__pred, __scan));
2020 if (0 == __matches ||
2021 std::__count_if(__scan, __last1,
2022 __gnu_cxx::__ops::__iter_comp_iter(__pred, __scan))
2023 != __matches)
2024 return false;
2025 }
2026 return true;
2027 }
2028
2029 /**
2030 * @brief Checks whether a permutation of the second sequence is equal
2031 * to the first sequence.
2032 * @ingroup non_mutating_algorithms
2033 * @param __first1 Start of first range.
2034 * @param __last1 End of first range.
2035 * @param __first2 Start of second range.
2036 * @return true if there exists a permutation of the elements in the range
2037 * [__first2, __first2 + (__last1 - __first1)), beginning with
2038 * ForwardIterator2 begin, such that equal(__first1, __last1, begin)
2039 * returns true; otherwise, returns false.
2040 */
2041 template<typename _ForwardIterator1, typename _ForwardIterator2>
2042 _GLIBCXX20_CONSTEXPR
2043 inline bool
2044 is_permutation(_ForwardIterator1 __first1, _ForwardIterator1 __last1,
2045 _ForwardIterator2 __first2)
2046 {
2047 // concept requirements
2048 __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator1>)
2049 __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator2>)
2050 __glibcxx_function_requires(_EqualOpConcept<
2051 typename iterator_traits<_ForwardIterator1>::value_type,
2052 typename iterator_traits<_ForwardIterator2>::value_type>)
2053 __glibcxx_requires_valid_range(__first1, __last1);
2054
2055 return std::__is_permutation(__first1, __last1, __first2,
2056 __gnu_cxx::__ops::__iter_equal_to_iter());
2057 }
2058#endif // C++11
2059
2060_GLIBCXX_END_NAMESPACE_VERSION
2061} // namespace std
2062
2063// NB: This file is included within many other C++ includes, as a way
2064// of getting the base algorithms. So, make sure that parallel bits
2065// come in too if requested.
2066#ifdef _GLIBCXX_PARALLEL
2067# include <parallel/algobase.h>
2068#endif
2069
2070#endif