Bug Summary

File:llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Warning:line 141, column 27
The result of the left shift is undefined due to shifting by '64', which is greater or equal to the width of type 'long long'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SIRegisterInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/build-llvm/lib/Target/AMDGPU -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/build-llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/build-llvm/lib/Target/AMDGPU -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-05-07-005843-9350-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

1//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// SI implementation of the TargetRegisterInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "SIRegisterInfo.h"
15#include "AMDGPU.h"
16#include "AMDGPURegisterBankInfo.h"
17#include "GCNSubtarget.h"
18#include "MCTargetDesc/AMDGPUInstPrinter.h"
19#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20#include "SIMachineFunctionInfo.h"
21#include "llvm/CodeGen/LiveIntervals.h"
22#include "llvm/CodeGen/MachineDominators.h"
23#include "llvm/CodeGen/RegisterScavenging.h"
24
25using namespace llvm;
26
27#define GET_REGINFO_TARGET_DESC
28#include "AMDGPUGenRegisterInfo.inc"
29
30static cl::opt<bool> EnableSpillSGPRToVGPR(
31 "amdgpu-spill-sgpr-to-vgpr",
32 cl::desc("Enable spilling VGPRs to SGPRs"),
33 cl::ReallyHidden,
34 cl::init(true));
35
36std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
37std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
38
39// Map numbers of DWORDs to indexes in SubRegFromChannelTable.
40// Valid indexes are shifted 1, such that a 0 mapping means unsupported.
41// e.g. for 8 DWORDs (256-bit), SubRegFromChannelTableWidthMap[8] = 8,
42// meaning index 7 in SubRegFromChannelTable.
43static const std::array<unsigned, 17> SubRegFromChannelTableWidthMap = {
44 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
45
46namespace llvm {
47
48// A temporary struct to spill SGPRs.
49// This is mostly to spill SGPRs to memory. Spilling SGPRs into VGPR lanes emits
50// just v_writelane and v_readlane.
51//
52// When spilling to memory, the SGPRs are written into VGPR lanes and the VGPR
53// is saved to scratch (or the other way around for loads).
54// For this, a VGPR is required where the needed lanes can be clobbered. The
55// RegScavenger can provide a VGPR where currently active lanes can be
56// clobbered, but we still need to save inactive lanes.
57// The high-level steps are:
58// - Try to scavenge SGPR(s) to save exec
59// - Try to scavenge VGPR
60// - Save needed, all or inactive lanes of a TmpVGPR
61// - Spill/Restore SGPRs using TmpVGPR
62// - Restore TmpVGPR
63//
64// To save all lanes of TmpVGPR, exec needs to be saved and modified. If we
65// cannot scavenge temporary SGPRs to save exec, we use the following code:
66// buffer_store_dword TmpVGPR ; only if active lanes need to be saved
67// s_not exec, exec
68// buffer_store_dword TmpVGPR ; save inactive lanes
69// s_not exec, exec
70struct SGPRSpillBuilder {
71 struct PerVGPRData {
72 unsigned PerVGPR;
73 unsigned NumVGPRs;
74 int64_t VGPRLanes;
75 };
76
77 // The SGPR to save
78 Register SuperReg;
79 MachineBasicBlock::iterator MI;
80 ArrayRef<int16_t> SplitParts;
81 unsigned NumSubRegs;
82 bool IsKill;
83 const DebugLoc &DL;
84
85 /* When spilling to stack */
86 // The SGPRs are written into this VGPR, which is then written to scratch
87 // (or vice versa for loads).
88 Register TmpVGPR = AMDGPU::NoRegister;
89 // Temporary spill slot to save TmpVGPR to.
90 int TmpVGPRIndex = 0;
91 // If TmpVGPR is live before the spill or if it is scavenged.
92 bool TmpVGPRLive = false;
93 // Scavenged SGPR to save EXEC.
94 Register SavedExecReg = AMDGPU::NoRegister;
95 // Stack index to write the SGPRs to.
96 int Index;
97 unsigned EltSize = 4;
98
99 RegScavenger *RS;
100 MachineBasicBlock &MBB;
101 MachineFunction &MF;
102 SIMachineFunctionInfo &MFI;
103 const SIInstrInfo &TII;
104 const SIRegisterInfo &TRI;
105 bool IsWave32;
106 Register ExecReg;
107 unsigned MovOpc;
108 unsigned NotOpc;
109
110 SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII,
111 bool IsWave32, MachineBasicBlock::iterator MI, int Index,
112 RegScavenger *RS)
113 : SuperReg(MI->getOperand(0).getReg()), MI(MI),
114 IsKill(MI->getOperand(0).isKill()), DL(MI->getDebugLoc()), Index(Index),
115 RS(RS), MBB(*MI->getParent()), MF(*MBB.getParent()),
116 MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
117 IsWave32(IsWave32) {
118 const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg);
119 SplitParts = TRI.getRegSplitParts(RC, EltSize);
120 NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
121
122 if (IsWave32) {
123 ExecReg = AMDGPU::EXEC_LO;
124 MovOpc = AMDGPU::S_MOV_B32;
125 NotOpc = AMDGPU::S_NOT_B32;
126 } else {
127 ExecReg = AMDGPU::EXEC;
128 MovOpc = AMDGPU::S_MOV_B64;
129 NotOpc = AMDGPU::S_NOT_B64;
130 }
131
132 assert(SuperReg != AMDGPU::M0 && "m0 should never spill")(static_cast <bool> (SuperReg != AMDGPU::M0 && "m0 should never spill"
) ? void (0) : __assert_fail ("SuperReg != AMDGPU::M0 && \"m0 should never spill\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 132, __extension__ __PRETTY_FUNCTION__))
;
133 assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI &&(static_cast <bool> (SuperReg != AMDGPU::EXEC_LO &&
SuperReg != AMDGPU::EXEC_HI && SuperReg != AMDGPU::EXEC
&& "exec should never spill") ? void (0) : __assert_fail
("SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI && SuperReg != AMDGPU::EXEC && \"exec should never spill\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 134, __extension__ __PRETTY_FUNCTION__))
134 SuperReg != AMDGPU::EXEC && "exec should never spill")(static_cast <bool> (SuperReg != AMDGPU::EXEC_LO &&
SuperReg != AMDGPU::EXEC_HI && SuperReg != AMDGPU::EXEC
&& "exec should never spill") ? void (0) : __assert_fail
("SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI && SuperReg != AMDGPU::EXEC && \"exec should never spill\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 134, __extension__ __PRETTY_FUNCTION__))
;
135 }
136
137 PerVGPRData getPerVGPRData() {
138 PerVGPRData Data;
139 Data.PerVGPR = IsWave32
15.1
Field 'IsWave32' is false
15.1
Field 'IsWave32' is false
? 32 : 64
;
16
'?' condition is false
17
The value 64 is assigned to 'Data.PerVGPR'
140 Data.NumVGPRs = (NumSubRegs + (Data.PerVGPR - 1)) / Data.PerVGPR;
141 Data.VGPRLanes = (1LL << std::min(Data.PerVGPR, NumSubRegs)) - 1LL;
18
Passing value via 1st parameter '__a'
19
Calling 'min<unsigned int>'
23
Returning from 'min<unsigned int>'
24
The result of the left shift is undefined due to shifting by '64', which is greater or equal to the width of type 'long long'
142 return Data;
143 }
144
145 // Tries to scavenge SGPRs to save EXEC and a VGPR. Uses v0 if no VGPR is
146 // free.
147 // Writes these instructions if an SGPR can be scavenged:
148 // s_mov_b64 s[6:7], exec ; Save exec
149 // s_mov_b64 exec, 3 ; Wanted lanemask
150 // buffer_store_dword v1 ; Write scavenged VGPR to emergency slot
151 //
152 // Writes these instructions if no SGPR can be scavenged:
153 // buffer_store_dword v0 ; Only if no free VGPR was found
154 // s_not_b64 exec, exec
155 // buffer_store_dword v0 ; Save inactive lanes
156 // ; exec stays inverted, it is flipped back in
157 // ; restore.
158 void prepare() {
159 // Scavenged temporary VGPR to use. It must be scavenged once for any number
160 // of spilled subregs.
161 // FIXME: The liveness analysis is limited and does not tell if a register
162 // is in use in lanes that are currently inactive. We can never be sure if
163 // a register as actually in use in another lane, so we need to save all
164 // used lanes of the chosen VGPR.
165 assert(RS && "Cannot spill SGPR to memory without RegScavenger")(static_cast <bool> (RS && "Cannot spill SGPR to memory without RegScavenger"
) ? void (0) : __assert_fail ("RS && \"Cannot spill SGPR to memory without RegScavenger\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 165, __extension__ __PRETTY_FUNCTION__))
;
8
Assuming field 'RS' is non-null
9
'?' condition is true
166 TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0, false);
167
168 // Reserve temporary stack slot
169 TmpVGPRIndex = MFI.getScavengeFI(MF.getFrameInfo(), TRI);
170 if (TmpVGPR) {
10
Assuming the condition is true
11
Taking true branch
171 // Found a register that is dead in the currently active lanes, we only
172 // need to spill inactive lanes.
173 TmpVGPRLive = false;
174 } else {
175 // Pick v0 because it doesn't make a difference.
176 TmpVGPR = AMDGPU::VGPR0;
177 TmpVGPRLive = true;
178 }
179
180 // Try to scavenge SGPRs to save exec
181 assert(!SavedExecReg && "Exec is already saved, refuse to save again")(static_cast <bool> (!SavedExecReg && "Exec is already saved, refuse to save again"
) ? void (0) : __assert_fail ("!SavedExecReg && \"Exec is already saved, refuse to save again\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 181, __extension__ __PRETTY_FUNCTION__))
;
12
Assuming the condition is true
13
'?' condition is true
182 const TargetRegisterClass &RC =
183 IsWave32
13.1
Field 'IsWave32' is false
13.1
Field 'IsWave32' is false
? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
14
'?' condition is false
184 RS->setRegUsed(SuperReg);
185 SavedExecReg = RS->scavengeRegister(&RC, MI, 0, false);
186
187 int64_t VGPRLanes = getPerVGPRData().VGPRLanes;
15
Calling 'SGPRSpillBuilder::getPerVGPRData'
188
189 if (SavedExecReg) {
190 RS->setRegUsed(SavedExecReg);
191 // Set exec to needed lanes
192 BuildMI(MBB, MI, DL, TII.get(MovOpc), SavedExecReg).addReg(ExecReg);
193 auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes);
194 if (!TmpVGPRLive)
195 I.addReg(TmpVGPR, RegState::ImplicitDefine);
196 // Spill needed lanes
197 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
198 } else {
199 // Spill active lanes
200 if (TmpVGPRLive)
201 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false,
202 /*IsKill*/ false);
203 // Spill inactive lanes
204 auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
205 if (!TmpVGPRLive)
206 I.addReg(TmpVGPR, RegState::ImplicitDefine);
207 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
208 }
209 }
210
211 // Writes these instructions if an SGPR can be scavenged:
212 // buffer_load_dword v1 ; Write scavenged VGPR to emergency slot
213 // s_waitcnt vmcnt(0) ; If a free VGPR was found
214 // s_mov_b64 exec, s[6:7] ; Save exec
215 //
216 // Writes these instructions if no SGPR can be scavenged:
217 // buffer_load_dword v0 ; Restore inactive lanes
218 // s_waitcnt vmcnt(0) ; If a free VGPR was found
219 // s_not_b64 exec, exec
220 // buffer_load_dword v0 ; Only if no free VGPR was found
221 void restore() {
222 if (SavedExecReg) {
223 // Restore used lanes
224 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
225 /*IsKill*/ false);
226 // Restore exec
227 auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg)
228 .addReg(SavedExecReg, RegState::Kill);
229 // Add an implicit use of the load so it is not dead.
230 // FIXME This inserts an unnecessary waitcnt
231 if (!TmpVGPRLive) {
232 I.addReg(TmpVGPR, RegState::ImplicitKill);
233 }
234 } else {
235 // Restore inactive lanes
236 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
237 /*IsKill*/ false);
238 auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
239 if (!TmpVGPRLive) {
240 I.addReg(TmpVGPR, RegState::ImplicitKill);
241 }
242 // Restore active lanes
243 if (TmpVGPRLive)
244 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true);
245 }
246 }
247
248 // Write TmpVGPR to memory or read TmpVGPR from memory.
249 // Either using a single buffer_load/store if exec is set to the needed mask
250 // or using
251 // buffer_load
252 // s_not exec, exec
253 // buffer_load
254 // s_not exec, exec
255 void readWriteTmpVGPR(unsigned Offset, bool IsLoad) {
256 if (SavedExecReg) {
257 // Spill needed lanes
258 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
259 } else {
260 // Spill active lanes
261 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad,
262 /*IsKill*/ false);
263 // Spill inactive lanes
264 BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
265 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
266 BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
267 }
268 }
269};
270
271} // namespace llvm
272
273SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST)
274 : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour()), ST(ST),
275 SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) {
276
277 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&(static_cast <bool> (getSubRegIndexLaneMask(AMDGPU::sub0
).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU
::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask
(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger
() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
"getNumCoveredRegs() will not work with generated subreg masks!"
) ? void (0) : __assert_fail ("getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() && \"getNumCoveredRegs() will not work with generated subreg masks!\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 282, __extension__ __PRETTY_FUNCTION__))
278 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&(static_cast <bool> (getSubRegIndexLaneMask(AMDGPU::sub0
).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU
::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask
(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger
() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
"getNumCoveredRegs() will not work with generated subreg masks!"
) ? void (0) : __assert_fail ("getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() && \"getNumCoveredRegs() will not work with generated subreg masks!\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 282, __extension__ __PRETTY_FUNCTION__))
279 (getSubRegIndexLaneMask(AMDGPU::lo16) |(static_cast <bool> (getSubRegIndexLaneMask(AMDGPU::sub0
).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU
::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask
(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger
() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
"getNumCoveredRegs() will not work with generated subreg masks!"
) ? void (0) : __assert_fail ("getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() && \"getNumCoveredRegs() will not work with generated subreg masks!\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 282, __extension__ __PRETTY_FUNCTION__))
280 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==(static_cast <bool> (getSubRegIndexLaneMask(AMDGPU::sub0
).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU
::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask
(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger
() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
"getNumCoveredRegs() will not work with generated subreg masks!"
) ? void (0) : __assert_fail ("getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() && \"getNumCoveredRegs() will not work with generated subreg masks!\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 282, __extension__ __PRETTY_FUNCTION__))
281 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&(static_cast <bool> (getSubRegIndexLaneMask(AMDGPU::sub0
).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU
::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask
(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger
() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
"getNumCoveredRegs() will not work with generated subreg masks!"
) ? void (0) : __assert_fail ("getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() && \"getNumCoveredRegs() will not work with generated subreg masks!\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 282, __extension__ __PRETTY_FUNCTION__))
282 "getNumCoveredRegs() will not work with generated subreg masks!")(static_cast <bool> (getSubRegIndexLaneMask(AMDGPU::sub0
).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU
::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask
(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger
() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
"getNumCoveredRegs() will not work with generated subreg masks!"
) ? void (0) : __assert_fail ("getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 && getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) && (getSubRegIndexLaneMask(AMDGPU::lo16) | getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() == getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() && \"getNumCoveredRegs() will not work with generated subreg masks!\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 282, __extension__ __PRETTY_FUNCTION__))
;
283
284 RegPressureIgnoredUnits.resize(getNumRegUnits());
285 RegPressureIgnoredUnits.set(
286 *MCRegUnitIterator(MCRegister::from(AMDGPU::M0), this));
287 for (auto Reg : AMDGPU::VGPR_HI16RegClass)
288 RegPressureIgnoredUnits.set(*MCRegUnitIterator(Reg, this));
289
290 // HACK: Until this is fully tablegen'd.
291 static llvm::once_flag InitializeRegSplitPartsFlag;
292
293 static auto InitializeRegSplitPartsOnce = [this]() {
294 for (unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) {
295 unsigned Size = getSubRegIdxSize(Idx);
296 if (Size & 31)
297 continue;
298 std::vector<int16_t> &Vec = RegSplitParts[Size / 32 - 1];
299 unsigned Pos = getSubRegIdxOffset(Idx);
300 if (Pos % Size)
301 continue;
302 Pos /= Size;
303 if (Vec.empty()) {
304 unsigned MaxNumParts = 1024 / Size; // Maximum register is 1024 bits.
305 Vec.resize(MaxNumParts);
306 }
307 Vec[Pos] = Idx;
308 }
309 };
310
311 static llvm::once_flag InitializeSubRegFromChannelTableFlag;
312
313 static auto InitializeSubRegFromChannelTableOnce = [this]() {
314 for (auto &Row : SubRegFromChannelTable)
315 Row.fill(AMDGPU::NoSubRegister);
316 for (uint16_t Idx = 1; Idx < getNumSubRegIndices(); ++Idx) {
317 unsigned Width = AMDGPUSubRegIdxRanges[Idx].Size / 32;
318 unsigned Offset = AMDGPUSubRegIdxRanges[Idx].Offset / 32;
319 assert(Width < SubRegFromChannelTableWidthMap.size())(static_cast <bool> (Width < SubRegFromChannelTableWidthMap
.size()) ? void (0) : __assert_fail ("Width < SubRegFromChannelTableWidthMap.size()"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 319, __extension__ __PRETTY_FUNCTION__))
;
320 Width = SubRegFromChannelTableWidthMap[Width];
321 if (Width == 0)
322 continue;
323 unsigned TableIdx = Width - 1;
324 assert(TableIdx < SubRegFromChannelTable.size())(static_cast <bool> (TableIdx < SubRegFromChannelTable
.size()) ? void (0) : __assert_fail ("TableIdx < SubRegFromChannelTable.size()"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 324, __extension__ __PRETTY_FUNCTION__))
;
325 assert(Offset < SubRegFromChannelTable[TableIdx].size())(static_cast <bool> (Offset < SubRegFromChannelTable
[TableIdx].size()) ? void (0) : __assert_fail ("Offset < SubRegFromChannelTable[TableIdx].size()"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 325, __extension__ __PRETTY_FUNCTION__))
;
326 SubRegFromChannelTable[TableIdx][Offset] = Idx;
327 }
328 };
329
330 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
331 llvm::call_once(InitializeSubRegFromChannelTableFlag,
332 InitializeSubRegFromChannelTableOnce);
333}
334
335void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved,
336 MCRegister Reg) const {
337 MCRegAliasIterator R(Reg, this, true);
338
339 for (; R.isValid(); ++R)
340 Reserved.set(*R);
341}
342
343// Forced to be here by one .inc
344const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs(
345 const MachineFunction *MF) const {
346 CallingConv::ID CC = MF->getFunction().getCallingConv();
347 switch (CC) {
348 case CallingConv::C:
349 case CallingConv::Fast:
350 case CallingConv::Cold:
351 case CallingConv::AMDGPU_Gfx:
352 return MF->getSubtarget<GCNSubtarget>().hasGFX90AInsts()
353 ? CSR_AMDGPU_HighRegs_With_AGPRs_SaveList
354 : CSR_AMDGPU_HighRegs_SaveList;
355 default: {
356 // Dummy to not crash RegisterClassInfo.
357 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
358 return &NoCalleeSavedReg;
359 }
360 }
361}
362
363const MCPhysReg *
364SIRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const {
365 return nullptr;
366}
367
368const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
369 CallingConv::ID CC) const {
370 switch (CC) {
371 case CallingConv::C:
372 case CallingConv::Fast:
373 case CallingConv::Cold:
374 case CallingConv::AMDGPU_Gfx:
375 return MF.getSubtarget<GCNSubtarget>().hasGFX90AInsts()
376 ? CSR_AMDGPU_HighRegs_With_AGPRs_RegMask
377 : CSR_AMDGPU_HighRegs_RegMask;
378 default:
379 return nullptr;
380 }
381}
382
383const uint32_t *SIRegisterInfo::getNoPreservedMask() const {
384 return CSR_AMDGPU_NoRegs_RegMask;
385}
386
387Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
388 const SIFrameLowering *TFI =
389 MF.getSubtarget<GCNSubtarget>().getFrameLowering();
390 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
391 // During ISel lowering we always reserve the stack pointer in entry
392 // functions, but never actually want to reference it when accessing our own
393 // frame. If we need a frame pointer we use it, but otherwise we can just use
394 // an immediate "0" which we represent by returning NoRegister.
395 if (FuncInfo->isEntryFunction()) {
396 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg() : Register();
397 }
398 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg()
399 : FuncInfo->getStackPtrOffsetReg();
400}
401
402bool SIRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
403 // When we need stack realignment, we can't reference off of the
404 // stack pointer, so we reserve a base pointer.
405 const MachineFrameInfo &MFI = MF.getFrameInfo();
406 return MFI.getNumFixedObjects() && shouldRealignStack(MF);
407}
408
409Register SIRegisterInfo::getBaseRegister() const { return AMDGPU::SGPR34; }
410
411const uint32_t *SIRegisterInfo::getAllVGPRRegMask() const {
412 return CSR_AMDGPU_AllVGPRs_RegMask;
413}
414
415const uint32_t *SIRegisterInfo::getAllAGPRRegMask() const {
416 return CSR_AMDGPU_AllAGPRs_RegMask;
417}
418
419const uint32_t *SIRegisterInfo::getAllVectorRegMask() const {
420 return CSR_AMDGPU_AllVectorRegs_RegMask;
421}
422
423const uint32_t *SIRegisterInfo::getAllAllocatableSRegMask() const {
424 return CSR_AMDGPU_AllAllocatableSRegs_RegMask;
425}
426
427unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel,
428 unsigned NumRegs) {
429 assert(NumRegs < SubRegFromChannelTableWidthMap.size())(static_cast <bool> (NumRegs < SubRegFromChannelTableWidthMap
.size()) ? void (0) : __assert_fail ("NumRegs < SubRegFromChannelTableWidthMap.size()"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 429, __extension__ __PRETTY_FUNCTION__))
;
430 unsigned NumRegIndex = SubRegFromChannelTableWidthMap[NumRegs];
431 assert(NumRegIndex && "Not implemented")(static_cast <bool> (NumRegIndex && "Not implemented"
) ? void (0) : __assert_fail ("NumRegIndex && \"Not implemented\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 431, __extension__ __PRETTY_FUNCTION__))
;
432 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].size())(static_cast <bool> (Channel < SubRegFromChannelTable
[NumRegIndex - 1].size()) ? void (0) : __assert_fail ("Channel < SubRegFromChannelTable[NumRegIndex - 1].size()"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 432, __extension__ __PRETTY_FUNCTION__))
;
433 return SubRegFromChannelTable[NumRegIndex - 1][Channel];
434}
435
436MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg(
437 const MachineFunction &MF) const {
438 unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
439 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
440 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass);
441}
442
443BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
444 BitVector Reserved(getNumRegs());
445 Reserved.set(AMDGPU::MODE);
446
447 // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
448 // this seems likely to result in bugs, so I'm marking them as reserved.
449 reserveRegisterTuples(Reserved, AMDGPU::EXEC);
450 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
451
452 // M0 has to be reserved so that llvm accepts it as a live-in into a block.
453 reserveRegisterTuples(Reserved, AMDGPU::M0);
454
455 // Reserve src_vccz, src_execz, src_scc.
456 reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
457 reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
458 reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
459
460 // Reserve the memory aperture registers.
461 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
462 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
463 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
464 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
465
466 // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
467 reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
468
469 // Reserve xnack_mask registers - support is not implemented in Codegen.
470 reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
471
472 // Reserve lds_direct register - support is not implemented in Codegen.
473 reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
474
475 // Reserve Trap Handler registers - support is not implemented in Codegen.
476 reserveRegisterTuples(Reserved, AMDGPU::TBA);
477 reserveRegisterTuples(Reserved, AMDGPU::TMA);
478 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
479 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
480 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
481 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
482 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
483 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
484 reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
485 reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
486
487 // Reserve null register - it shall never be allocated
488 reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL);
489
490 // Disallow vcc_hi allocation in wave32. It may be allocated but most likely
491 // will result in bugs.
492 if (isWave32) {
493 Reserved.set(AMDGPU::VCC);
494 Reserved.set(AMDGPU::VCC_HI);
495 }
496
497 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
498 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
499 for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
500 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
501 reserveRegisterTuples(Reserved, Reg);
502 }
503
504 unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
505 // TODO: In an entry function without calls and AGPRs used it is possible
506 // to use the whole register budget for VGPRs. Even more it shall
507 // be possible to estimate maximum AGPR/VGPR pressure and split
508 // register file accordingly.
509 if (ST.hasGFX90AInsts())
510 MaxNumVGPRs /= 2;
511 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
512 for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
513 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
514 reserveRegisterTuples(Reserved, Reg);
515 Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
516 reserveRegisterTuples(Reserved, Reg);
517 }
518
519 for (auto Reg : AMDGPU::SReg_32RegClass) {
520 Reserved.set(getSubReg(Reg, AMDGPU::hi16));
521 Register Low = getSubReg(Reg, AMDGPU::lo16);
522 // This is to prevent BB vcc liveness errors.
523 if (!AMDGPU::SGPR_LO16RegClass.contains(Low))
524 Reserved.set(Low);
525 }
526
527 for (auto Reg : AMDGPU::AGPR_32RegClass) {
528 Reserved.set(getSubReg(Reg, AMDGPU::hi16));
529 }
530
531 // Reserve all the rest AGPRs if there are no instructions to use it.
532 if (!ST.hasMAIInsts()) {
533 for (unsigned i = 0; i < MaxNumVGPRs; ++i) {
534 unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
535 reserveRegisterTuples(Reserved, Reg);
536 }
537 }
538
539 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
540
541 Register ScratchRSrcReg = MFI->getScratchRSrcReg();
542 if (ScratchRSrcReg != AMDGPU::NoRegister) {
543 // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
544 // to spill.
545 // TODO: May need to reserve a VGPR if doing LDS spilling.
546 reserveRegisterTuples(Reserved, ScratchRSrcReg);
547 }
548
549 // We have to assume the SP is needed in case there are calls in the function,
550 // which is detected after the function is lowered. If we aren't really going
551 // to need SP, don't bother reserving it.
552 MCRegister StackPtrReg = MFI->getStackPtrOffsetReg();
553
554 if (StackPtrReg) {
555 reserveRegisterTuples(Reserved, StackPtrReg);
556 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg))(static_cast <bool> (!isSubRegister(ScratchRSrcReg, StackPtrReg
)) ? void (0) : __assert_fail ("!isSubRegister(ScratchRSrcReg, StackPtrReg)"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 556, __extension__ __PRETTY_FUNCTION__))
;
557 }
558
559 MCRegister FrameReg = MFI->getFrameOffsetReg();
560 if (FrameReg) {
561 reserveRegisterTuples(Reserved, FrameReg);
562 assert(!isSubRegister(ScratchRSrcReg, FrameReg))(static_cast <bool> (!isSubRegister(ScratchRSrcReg, FrameReg
)) ? void (0) : __assert_fail ("!isSubRegister(ScratchRSrcReg, FrameReg)"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 562, __extension__ __PRETTY_FUNCTION__))
;
563 }
564
565 if (hasBasePointer(MF)) {
566 MCRegister BasePtrReg = getBaseRegister();
567 reserveRegisterTuples(Reserved, BasePtrReg);
568 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg))(static_cast <bool> (!isSubRegister(ScratchRSrcReg, BasePtrReg
)) ? void (0) : __assert_fail ("!isSubRegister(ScratchRSrcReg, BasePtrReg)"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 568, __extension__ __PRETTY_FUNCTION__))
;
569 }
570
571 for (auto Reg : MFI->WWMReservedRegs) {
572 reserveRegisterTuples(Reserved, Reg.first);
573 }
574
575 // FIXME: Stop using reserved registers for this.
576 for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
577 reserveRegisterTuples(Reserved, Reg);
578
579 for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
580 reserveRegisterTuples(Reserved, Reg);
581
582 for (auto SSpill : MFI->getSGPRSpillVGPRs())
583 reserveRegisterTuples(Reserved, SSpill.VGPR);
584
585 return Reserved;
586}
587
588bool SIRegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
589 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
590 // On entry, the base address is 0, so it can't possibly need any more
591 // alignment.
592
593 // FIXME: Should be able to specify the entry frame alignment per calling
594 // convention instead.
595 if (Info->isEntryFunction())
596 return false;
597
598 return TargetRegisterInfo::shouldRealignStack(MF);
599}
600
601bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
602 const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
603 if (Info->isEntryFunction()) {
604 const MachineFrameInfo &MFI = Fn.getFrameInfo();
605 return MFI.hasStackObjects() || MFI.hasCalls();
606 }
607
608 // May need scavenger for dealing with callee saved registers.
609 return true;
610}
611
612bool SIRegisterInfo::requiresFrameIndexScavenging(
613 const MachineFunction &MF) const {
614 // Do not use frame virtual registers. They used to be used for SGPRs, but
615 // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the
616 // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a
617 // spill.
618 return false;
619}
620
621bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
622 const MachineFunction &MF) const {
623 const MachineFrameInfo &MFI = MF.getFrameInfo();
624 return MFI.hasStackObjects();
625}
626
627bool SIRegisterInfo::requiresVirtualBaseRegisters(
628 const MachineFunction &) const {
629 // There are no special dedicated stack or frame pointers.
630 return true;
631}
632
633int64_t SIRegisterInfo::getScratchInstrOffset(const MachineInstr *MI) const {
634 assert(SIInstrInfo::isMUBUF(*MI) || SIInstrInfo::isFLATScratch(*MI))(static_cast <bool> (SIInstrInfo::isMUBUF(*MI) || SIInstrInfo
::isFLATScratch(*MI)) ? void (0) : __assert_fail ("SIInstrInfo::isMUBUF(*MI) || SIInstrInfo::isFLATScratch(*MI)"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 634, __extension__ __PRETTY_FUNCTION__))
;
635
636 int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
637 AMDGPU::OpName::offset);
638 return MI->getOperand(OffIdx).getImm();
639}
640
641int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
642 int Idx) const {
643 if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI))
644 return 0;
645
646 assert((Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),(static_cast <bool> ((Idx == AMDGPU::getNamedOperandIdx
(MI->getOpcode(), AMDGPU::OpName::vaddr) || (Idx == AMDGPU
::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr
))) && "Should never see frame index on non-address operand"
) ? void (0) : __assert_fail ("(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr) || (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr))) && \"Should never see frame index on non-address operand\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 650, __extension__ __PRETTY_FUNCTION__))
647 AMDGPU::OpName::vaddr) ||(static_cast <bool> ((Idx == AMDGPU::getNamedOperandIdx
(MI->getOpcode(), AMDGPU::OpName::vaddr) || (Idx == AMDGPU
::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr
))) && "Should never see frame index on non-address operand"
) ? void (0) : __assert_fail ("(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr) || (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr))) && \"Should never see frame index on non-address operand\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 650, __extension__ __PRETTY_FUNCTION__))
648 (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),(static_cast <bool> ((Idx == AMDGPU::getNamedOperandIdx
(MI->getOpcode(), AMDGPU::OpName::vaddr) || (Idx == AMDGPU
::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr
))) && "Should never see frame index on non-address operand"
) ? void (0) : __assert_fail ("(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr) || (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr))) && \"Should never see frame index on non-address operand\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 650, __extension__ __PRETTY_FUNCTION__))
649 AMDGPU::OpName::saddr))) &&(static_cast <bool> ((Idx == AMDGPU::getNamedOperandIdx
(MI->getOpcode(), AMDGPU::OpName::vaddr) || (Idx == AMDGPU
::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr
))) && "Should never see frame index on non-address operand"
) ? void (0) : __assert_fail ("(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr) || (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr))) && \"Should never see frame index on non-address operand\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 650, __extension__ __PRETTY_FUNCTION__))
650 "Should never see frame index on non-address operand")(static_cast <bool> ((Idx == AMDGPU::getNamedOperandIdx
(MI->getOpcode(), AMDGPU::OpName::vaddr) || (Idx == AMDGPU
::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr
))) && "Should never see frame index on non-address operand"
) ? void (0) : __assert_fail ("(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr) || (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr))) && \"Should never see frame index on non-address operand\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 650, __extension__ __PRETTY_FUNCTION__))
;
651
652 return getScratchInstrOffset(MI);
653}
654
655bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
656 if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI))
657 return false;
658
659 int64_t FullOffset = Offset + getScratchInstrOffset(MI);
660
661 if (SIInstrInfo::isMUBUF(*MI))
662 return !SIInstrInfo::isLegalMUBUFImmOffset(FullOffset);
663
664 const SIInstrInfo *TII = ST.getInstrInfo();
665 return !TII->isLegalFLATOffset(FullOffset, AMDGPUAS::PRIVATE_ADDRESS,
666 SIInstrFlags::FlatScratch);
667}
668
669Register SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
670 int FrameIdx,
671 int64_t Offset) const {
672 MachineBasicBlock::iterator Ins = MBB->begin();
673 DebugLoc DL; // Defaults to "unknown"
674
675 if (Ins != MBB->end())
676 DL = Ins->getDebugLoc();
677
678 MachineFunction *MF = MBB->getParent();
679 const SIInstrInfo *TII = ST.getInstrInfo();
680 MachineRegisterInfo &MRI = MF->getRegInfo();
681 unsigned MovOpc = ST.enableFlatScratch() ? AMDGPU::S_MOV_B32
682 : AMDGPU::V_MOV_B32_e32;
683
684 Register BaseReg = MRI.createVirtualRegister(
685 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XEXEC_HIRegClass
686 : &AMDGPU::VGPR_32RegClass);
687
688 if (Offset == 0) {
689 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), BaseReg)
690 .addFrameIndex(FrameIdx);
691 return BaseReg;
692 }
693
694 Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
695
696 Register FIReg = MRI.createVirtualRegister(
697 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XM0RegClass
698 : &AMDGPU::VGPR_32RegClass);
699
700 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
701 .addImm(Offset);
702 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), FIReg)
703 .addFrameIndex(FrameIdx);
704
705 if (ST.enableFlatScratch() ) {
706 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_U32), BaseReg)
707 .addReg(OffsetReg, RegState::Kill)
708 .addReg(FIReg);
709 return BaseReg;
710 }
711
712 TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
713 .addReg(OffsetReg, RegState::Kill)
714 .addReg(FIReg)
715 .addImm(0); // clamp bit
716
717 return BaseReg;
718}
719
720void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
721 int64_t Offset) const {
722 const SIInstrInfo *TII = ST.getInstrInfo();
723 bool IsFlat = TII->isFLATScratch(MI);
724
725#ifndef NDEBUG
726 // FIXME: Is it possible to be storing a frame index to itself?
727 bool SeenFI = false;
728 for (const MachineOperand &MO: MI.operands()) {
729 if (MO.isFI()) {
730 if (SeenFI)
731 llvm_unreachable("should not see multiple frame indices")::llvm::llvm_unreachable_internal("should not see multiple frame indices"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 731)
;
732
733 SeenFI = true;
734 }
735 }
736#endif
737
738 MachineOperand *FIOp =
739 TII->getNamedOperand(MI, IsFlat ? AMDGPU::OpName::saddr
740 : AMDGPU::OpName::vaddr);
741
742 MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
743 int64_t NewOffset = OffsetOp->getImm() + Offset;
744
745 assert(FIOp && FIOp->isFI() && "frame index must be address operand")(static_cast <bool> (FIOp && FIOp->isFI() &&
"frame index must be address operand") ? void (0) : __assert_fail
("FIOp && FIOp->isFI() && \"frame index must be address operand\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 745, __extension__ __PRETTY_FUNCTION__))
;
746 assert(TII->isMUBUF(MI) || TII->isFLATScratch(MI))(static_cast <bool> (TII->isMUBUF(MI) || TII->isFLATScratch
(MI)) ? void (0) : __assert_fail ("TII->isMUBUF(MI) || TII->isFLATScratch(MI)"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 746, __extension__ __PRETTY_FUNCTION__))
;
747
748 if (IsFlat) {
749 assert(TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,(static_cast <bool> (TII->isLegalFLATOffset(NewOffset
, AMDGPUAS::PRIVATE_ADDRESS, SIInstrFlags::FlatScratch) &&
"offset should be legal") ? void (0) : __assert_fail ("TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, SIInstrFlags::FlatScratch) && \"offset should be legal\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 751, __extension__ __PRETTY_FUNCTION__))
750 SIInstrFlags::FlatScratch) &&(static_cast <bool> (TII->isLegalFLATOffset(NewOffset
, AMDGPUAS::PRIVATE_ADDRESS, SIInstrFlags::FlatScratch) &&
"offset should be legal") ? void (0) : __assert_fail ("TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, SIInstrFlags::FlatScratch) && \"offset should be legal\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 751, __extension__ __PRETTY_FUNCTION__))
751 "offset should be legal")(static_cast <bool> (TII->isLegalFLATOffset(NewOffset
, AMDGPUAS::PRIVATE_ADDRESS, SIInstrFlags::FlatScratch) &&
"offset should be legal") ? void (0) : __assert_fail ("TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, SIInstrFlags::FlatScratch) && \"offset should be legal\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 751, __extension__ __PRETTY_FUNCTION__))
;
752 FIOp->ChangeToRegister(BaseReg, false);
753 OffsetOp->setImm(NewOffset);
754 return;
755 }
756
757#ifndef NDEBUG
758 MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
759 assert(SOffset->isImm() && SOffset->getImm() == 0)(static_cast <bool> (SOffset->isImm() && SOffset
->getImm() == 0) ? void (0) : __assert_fail ("SOffset->isImm() && SOffset->getImm() == 0"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 759, __extension__ __PRETTY_FUNCTION__))
;
760#endif
761
762 assert(SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) &&(static_cast <bool> (SIInstrInfo::isLegalMUBUFImmOffset
(NewOffset) && "offset should be legal") ? void (0) :
__assert_fail ("SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) && \"offset should be legal\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 763, __extension__ __PRETTY_FUNCTION__))
763 "offset should be legal")(static_cast <bool> (SIInstrInfo::isLegalMUBUFImmOffset
(NewOffset) && "offset should be legal") ? void (0) :
__assert_fail ("SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) && \"offset should be legal\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 763, __extension__ __PRETTY_FUNCTION__))
;
764
765 FIOp->ChangeToRegister(BaseReg, false);
766 OffsetOp->setImm(NewOffset);
767}
768
769bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
770 Register BaseReg,
771 int64_t Offset) const {
772 if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI))
773 return false;
774
775 int64_t NewOffset = Offset + getScratchInstrOffset(MI);
776
777 if (SIInstrInfo::isMUBUF(*MI))
778 return SIInstrInfo::isLegalMUBUFImmOffset(NewOffset);
779
780 const SIInstrInfo *TII = ST.getInstrInfo();
781 return TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
782 SIInstrFlags::FlatScratch);
783}
784
785const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
786 const MachineFunction &MF, unsigned Kind) const {
787 // This is inaccurate. It depends on the instruction and address space. The
788 // only place where we should hit this is for dealing with frame indexes /
789 // private accesses, so this is correct in that case.
790 return &AMDGPU::VGPR_32RegClass;
791}
792
793static unsigned getNumSubRegsForSpillOp(unsigned Op) {
794
795 switch (Op) {
796 case AMDGPU::SI_SPILL_S1024_SAVE:
797 case AMDGPU::SI_SPILL_S1024_RESTORE:
798 case AMDGPU::SI_SPILL_V1024_SAVE:
799 case AMDGPU::SI_SPILL_V1024_RESTORE:
800 case AMDGPU::SI_SPILL_A1024_SAVE:
801 case AMDGPU::SI_SPILL_A1024_RESTORE:
802 return 32;
803 case AMDGPU::SI_SPILL_S512_SAVE:
804 case AMDGPU::SI_SPILL_S512_RESTORE:
805 case AMDGPU::SI_SPILL_V512_SAVE:
806 case AMDGPU::SI_SPILL_V512_RESTORE:
807 case AMDGPU::SI_SPILL_A512_SAVE:
808 case AMDGPU::SI_SPILL_A512_RESTORE:
809 return 16;
810 case AMDGPU::SI_SPILL_S256_SAVE:
811 case AMDGPU::SI_SPILL_S256_RESTORE:
812 case AMDGPU::SI_SPILL_V256_SAVE:
813 case AMDGPU::SI_SPILL_V256_RESTORE:
814 case AMDGPU::SI_SPILL_A256_SAVE:
815 case AMDGPU::SI_SPILL_A256_RESTORE:
816 return 8;
817 case AMDGPU::SI_SPILL_S192_SAVE:
818 case AMDGPU::SI_SPILL_S192_RESTORE:
819 case AMDGPU::SI_SPILL_V192_SAVE:
820 case AMDGPU::SI_SPILL_V192_RESTORE:
821 case AMDGPU::SI_SPILL_A192_SAVE:
822 case AMDGPU::SI_SPILL_A192_RESTORE:
823 return 6;
824 case AMDGPU::SI_SPILL_S160_SAVE:
825 case AMDGPU::SI_SPILL_S160_RESTORE:
826 case AMDGPU::SI_SPILL_V160_SAVE:
827 case AMDGPU::SI_SPILL_V160_RESTORE:
828 case AMDGPU::SI_SPILL_A160_SAVE:
829 case AMDGPU::SI_SPILL_A160_RESTORE:
830 return 5;
831 case AMDGPU::SI_SPILL_S128_SAVE:
832 case AMDGPU::SI_SPILL_S128_RESTORE:
833 case AMDGPU::SI_SPILL_V128_SAVE:
834 case AMDGPU::SI_SPILL_V128_RESTORE:
835 case AMDGPU::SI_SPILL_A128_SAVE:
836 case AMDGPU::SI_SPILL_A128_RESTORE:
837 return 4;
838 case AMDGPU::SI_SPILL_S96_SAVE:
839 case AMDGPU::SI_SPILL_S96_RESTORE:
840 case AMDGPU::SI_SPILL_V96_SAVE:
841 case AMDGPU::SI_SPILL_V96_RESTORE:
842 case AMDGPU::SI_SPILL_A96_SAVE:
843 case AMDGPU::SI_SPILL_A96_RESTORE:
844 return 3;
845 case AMDGPU::SI_SPILL_S64_SAVE:
846 case AMDGPU::SI_SPILL_S64_RESTORE:
847 case AMDGPU::SI_SPILL_V64_SAVE:
848 case AMDGPU::SI_SPILL_V64_RESTORE:
849 case AMDGPU::SI_SPILL_A64_SAVE:
850 case AMDGPU::SI_SPILL_A64_RESTORE:
851 return 2;
852 case AMDGPU::SI_SPILL_S32_SAVE:
853 case AMDGPU::SI_SPILL_S32_RESTORE:
854 case AMDGPU::SI_SPILL_V32_SAVE:
855 case AMDGPU::SI_SPILL_V32_RESTORE:
856 case AMDGPU::SI_SPILL_A32_SAVE:
857 case AMDGPU::SI_SPILL_A32_RESTORE:
858 return 1;
859 default: llvm_unreachable("Invalid spill opcode")::llvm::llvm_unreachable_internal("Invalid spill opcode", "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 859)
;
860 }
861}
862
863static int getOffsetMUBUFStore(unsigned Opc) {
864 switch (Opc) {
865 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
866 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
867 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
868 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
869 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
870 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
871 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
872 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
873 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
874 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
875 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
876 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
877 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
878 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
879 default:
880 return -1;
881 }
882}
883
884static int getOffsetMUBUFLoad(unsigned Opc) {
885 switch (Opc) {
886 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
887 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
888 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
889 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
890 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
891 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
892 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
893 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
894 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
895 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
896 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
897 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
898 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
899 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
900 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
901 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
902 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
903 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
904 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
905 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
906 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
907 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
908 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
909 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
910 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
911 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
912 default:
913 return -1;
914 }
915}
916
917static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST,
918 MachineBasicBlock &MBB,
919 MachineBasicBlock::iterator MI,
920 int Index, unsigned Lane,
921 unsigned ValueReg, bool IsKill) {
922 MachineFunction *MF = MBB.getParent();
923 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
924 const SIInstrInfo *TII = ST.getInstrInfo();
925
926 MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane);
927
928 if (Reg == AMDGPU::NoRegister)
929 return MachineInstrBuilder();
930
931 bool IsStore = MI->mayStore();
932 MachineRegisterInfo &MRI = MF->getRegInfo();
933 auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
934
935 unsigned Dst = IsStore ? Reg : ValueReg;
936 unsigned Src = IsStore ? ValueReg : Reg;
937 unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
938 : AMDGPU::V_ACCVGPR_READ_B32_e64;
939
940 auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
941 .addReg(Src, getKillRegState(IsKill));
942 MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
943 return MIB;
944}
945
946// This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
947// need to handle the case where an SGPR may need to be spilled while spilling.
948static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST,
949 MachineFrameInfo &MFI,
950 MachineBasicBlock::iterator MI,
951 int Index,
952 int64_t Offset) {
953 const SIInstrInfo *TII = ST.getInstrInfo();
954 MachineBasicBlock *MBB = MI->getParent();
955 const DebugLoc &DL = MI->getDebugLoc();
956 bool IsStore = MI->mayStore();
957
958 unsigned Opc = MI->getOpcode();
959 int LoadStoreOp = IsStore ?
960 getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
961 if (LoadStoreOp == -1)
962 return false;
963
964 const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
965 if (spillVGPRtoAGPR(ST, *MBB, MI, Index, 0, Reg->getReg(), false).getInstr())
966 return true;
967
968 MachineInstrBuilder NewMI =
969 BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
970 .add(*Reg)
971 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
972 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
973 .addImm(Offset)
974 .addImm(0) // cpol
975 .addImm(0) // tfe
976 .addImm(0) // swz
977 .cloneMemRefs(*MI);
978
979 const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
980 AMDGPU::OpName::vdata_in);
981 if (VDataIn)
982 NewMI.add(*VDataIn);
983 return true;
984}
985
986static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII,
987 unsigned LoadStoreOp,
988 unsigned EltSize) {
989 bool IsStore = TII->get(LoadStoreOp).mayStore();
990 bool UseST =
991 AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0 &&
992 AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::saddr) < 0;
993
994 switch (EltSize) {
995 case 4:
996 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
997 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
998 break;
999 case 8:
1000 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1001 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1002 break;
1003 case 12:
1004 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1005 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1006 break;
1007 case 16:
1008 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1009 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1010 break;
1011 default:
1012 llvm_unreachable("Unexpected spill load/store size!")::llvm::llvm_unreachable_internal("Unexpected spill load/store size!"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1012)
;
1013 }
1014
1015 if (UseST)
1016 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
1017
1018 return LoadStoreOp;
1019}
1020
1021void SIRegisterInfo::buildSpillLoadStore(
1022 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1023 unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
1024 MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO,
1025 RegScavenger *RS, LivePhysRegs *LiveRegs) const {
1026 assert((!RS || !LiveRegs) && "Only RS or LiveRegs can be set but not both")(static_cast <bool> ((!RS || !LiveRegs) && "Only RS or LiveRegs can be set but not both"
) ? void (0) : __assert_fail ("(!RS || !LiveRegs) && \"Only RS or LiveRegs can be set but not both\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1026, __extension__ __PRETTY_FUNCTION__))
;
1027
1028 MachineFunction *MF = MBB.getParent();
1029 const SIInstrInfo *TII = ST.getInstrInfo();
1030 const MachineFrameInfo &MFI = MF->getFrameInfo();
1031 const SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
1032
1033 const MCInstrDesc *Desc = &TII->get(LoadStoreOp);
1034 const DebugLoc &DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1035 bool IsStore = Desc->mayStore();
1036 bool IsFlat = TII->isFLATScratch(LoadStoreOp);
1037
1038 bool Scavenged = false;
1039 MCRegister SOffset = ScratchOffsetReg;
1040
1041 const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
1042 // On gfx90a+ AGPR is a regular VGPR acceptable for loads and stores.
1043 const bool IsAGPR = !ST.hasGFX90AInsts() && hasAGPRs(RC);
1044 const unsigned RegWidth = AMDGPU::getRegBitWidth(RC->getID()) / 8;
1045
1046 // Always use 4 byte operations for AGPRs because we need to scavenge
1047 // a temporary VGPR.
1048 unsigned EltSize = (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) : 4u;
1049 unsigned NumSubRegs = RegWidth / EltSize;
1050 unsigned Size = NumSubRegs * EltSize;
1051 unsigned RemSize = RegWidth - Size;
1052 unsigned NumRemSubRegs = RemSize ? 1 : 0;
1053 int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
1054 int64_t MaxOffset = Offset + Size + RemSize - EltSize;
1055 int64_t ScratchOffsetRegDelta = 0;
1056
1057 if (IsFlat && EltSize > 4) {
1058 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
1059 Desc = &TII->get(LoadStoreOp);
1060 }
1061
1062 Align Alignment = MFI.getObjectAlign(Index);
1063 const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
1064
1065 assert((IsFlat || ((Offset % EltSize) == 0)) &&(static_cast <bool> ((IsFlat || ((Offset % EltSize) == 0
)) && "unexpected VGPR spill offset") ? void (0) : __assert_fail
("(IsFlat || ((Offset % EltSize) == 0)) && \"unexpected VGPR spill offset\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1066, __extension__ __PRETTY_FUNCTION__))
1066 "unexpected VGPR spill offset")(static_cast <bool> ((IsFlat || ((Offset % EltSize) == 0
)) && "unexpected VGPR spill offset") ? void (0) : __assert_fail
("(IsFlat || ((Offset % EltSize) == 0)) && \"unexpected VGPR spill offset\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1066, __extension__ __PRETTY_FUNCTION__))
;
1067
1068 bool IsOffsetLegal =
1069 IsFlat ? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
1070 SIInstrFlags::FlatScratch)
1071 : SIInstrInfo::isLegalMUBUFImmOffset(MaxOffset);
1072 if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) {
1073 SOffset = MCRegister();
1074
1075 // We currently only support spilling VGPRs to EltSize boundaries, meaning
1076 // we can simplify the adjustment of Offset here to just scale with
1077 // WavefrontSize.
1078 if (!IsFlat)
1079 Offset *= ST.getWavefrontSize();
1080
1081 // We don't have access to the register scavenger if this function is called
1082 // during PEI::scavengeFrameVirtualRegs() so use LiveRegs in this case.
1083 if (RS) {
1084 SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false);
1085 } else if (LiveRegs) {
1086 for (MCRegister Reg : AMDGPU::SGPR_32RegClass) {
1087 if (LiveRegs->available(MF->getRegInfo(), Reg)) {
1088 SOffset = Reg;
1089 break;
1090 }
1091 }
1092 }
1093
1094 if (!SOffset) {
1095 // There are no free SGPRs, and since we are in the process of spilling
1096 // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
1097 // on SI/CI and on VI it is true until we implement spilling using scalar
1098 // stores), we have no way to free up an SGPR. Our solution here is to
1099 // add the offset directly to the ScratchOffset or StackPtrOffset
1100 // register, and then subtract the offset after the spill to return the
1101 // register to it's original value.
1102 if (!ScratchOffsetReg)
1103 ScratchOffsetReg = FuncInfo->getStackPtrOffsetReg();
1104 SOffset = ScratchOffsetReg;
1105 ScratchOffsetRegDelta = Offset;
1106 } else {
1107 Scavenged = true;
1108 }
1109
1110 if (!SOffset)
1111 report_fatal_error("could not scavenge SGPR to spill in entry function");
1112
1113 if (ScratchOffsetReg == AMDGPU::NoRegister) {
1114 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset);
1115 } else {
1116 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
1117 .addReg(ScratchOffsetReg)
1118 .addImm(Offset);
1119 }
1120
1121 Offset = 0;
1122 }
1123
1124 if (IsFlat && SOffset == AMDGPU::NoRegister) {
1125 assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0(static_cast <bool> (AMDGPU::getNamedOperandIdx(LoadStoreOp
, AMDGPU::OpName::vaddr) < 0 && "Unexpected vaddr for flat scratch with a FI operand"
) ? void (0) : __assert_fail ("AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0 && \"Unexpected vaddr for flat scratch with a FI operand\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1126, __extension__ __PRETTY_FUNCTION__))
1126 && "Unexpected vaddr for flat scratch with a FI operand")(static_cast <bool> (AMDGPU::getNamedOperandIdx(LoadStoreOp
, AMDGPU::OpName::vaddr) < 0 && "Unexpected vaddr for flat scratch with a FI operand"
) ? void (0) : __assert_fail ("AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0 && \"Unexpected vaddr for flat scratch with a FI operand\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1126, __extension__ __PRETTY_FUNCTION__))
;
1127
1128 assert(ST.hasFlatScratchSTMode())(static_cast <bool> (ST.hasFlatScratchSTMode()) ? void (
0) : __assert_fail ("ST.hasFlatScratchSTMode()", "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1128, __extension__ __PRETTY_FUNCTION__))
;
1129 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
1130 Desc = &TII->get(LoadStoreOp);
1131 }
1132
1133 Register TmpReg;
1134
1135 for (unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
1136 ++i, RegOffset += EltSize) {
1137 if (i == NumSubRegs) {
1138 EltSize = RemSize;
1139 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
1140 }
1141 Desc = &TII->get(LoadStoreOp);
1142
1143 unsigned NumRegs = EltSize / 4;
1144 Register SubReg = e == 1
1145 ? ValueReg
1146 : Register(getSubReg(ValueReg,
1147 getSubRegFromChannel(RegOffset / 4, NumRegs)));
1148
1149 unsigned SOffsetRegState = 0;
1150 unsigned SrcDstRegState = getDefRegState(!IsStore);
1151 if (i + 1 == e) {
1152 SOffsetRegState |= getKillRegState(Scavenged);
1153 // The last implicit use carries the "Kill" flag.
1154 SrcDstRegState |= getKillRegState(IsKill);
1155 }
1156
1157 // Make sure the whole register is defined if there are undef components by
1158 // adding an implicit def of the super-reg on the first instruction.
1159 bool NeedSuperRegDef = e > 1 && IsStore && i == 0;
1160 bool NeedSuperRegImpOperand = e > 1;
1161
1162 unsigned Lane = RegOffset / 4;
1163 unsigned LaneE = (RegOffset + EltSize) / 4;
1164 for ( ; Lane != LaneE; ++Lane) {
1165 bool IsSubReg = e > 1 || EltSize > 4;
1166 Register Sub = IsSubReg
1167 ? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane)))
1168 : ValueReg;
1169 auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill);
1170 if (!MIB.getInstr())
1171 break;
1172 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == 0)) {
1173 MIB.addReg(ValueReg, RegState::ImplicitDefine);
1174 NeedSuperRegDef = false;
1175 }
1176 if (IsSubReg || NeedSuperRegImpOperand) {
1177 NeedSuperRegImpOperand = true;
1178 unsigned State = SrcDstRegState;
1179 if (Lane + 1 != LaneE)
1180 State &= ~RegState::Kill;
1181 MIB.addReg(ValueReg, RegState::Implicit | State);
1182 }
1183 }
1184
1185 if (Lane == LaneE) // Fully spilled into AGPRs.
1186 continue;
1187
1188 // Offset in bytes from the beginning of the ValueReg to its portion we
1189 // still need to spill. It may differ from RegOffset if a portion of
1190 // current SubReg has been already spilled into AGPRs by the loop above.
1191 unsigned RemRegOffset = Lane * 4;
1192 unsigned RemEltSize = EltSize - (RemRegOffset - RegOffset);
1193 if (RemEltSize != EltSize) { // Partially spilled to AGPRs
1194 assert(IsFlat && EltSize > 4)(static_cast <bool> (IsFlat && EltSize > 4) ?
void (0) : __assert_fail ("IsFlat && EltSize > 4"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1194, __extension__ __PRETTY_FUNCTION__))
;
1195
1196 unsigned NumRegs = RemEltSize / 4;
1197 SubReg = Register(getSubReg(ValueReg,
1198 getSubRegFromChannel(RemRegOffset / 4, NumRegs)));
1199 unsigned Opc = getFlatScratchSpillOpcode(TII, LoadStoreOp, RemEltSize);
1200 Desc = &TII->get(Opc);
1201 }
1202
1203 unsigned FinalReg = SubReg;
1204
1205 if (IsAGPR) {
1206 assert(EltSize == 4)(static_cast <bool> (EltSize == 4) ? void (0) : __assert_fail
("EltSize == 4", "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1206, __extension__ __PRETTY_FUNCTION__))
;
1207
1208 if (!TmpReg) {
1209 assert(RS && "Needs to have RegScavenger to spill an AGPR!")(static_cast <bool> (RS && "Needs to have RegScavenger to spill an AGPR!"
) ? void (0) : __assert_fail ("RS && \"Needs to have RegScavenger to spill an AGPR!\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1209, __extension__ __PRETTY_FUNCTION__))
;
1210 // FIXME: change to scavengeRegisterBackwards()
1211 TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
1212 RS->setRegUsed(TmpReg);
1213 }
1214 if (IsStore) {
1215 auto AccRead = BuildMI(MBB, MI, DL,
1216 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TmpReg)
1217 .addReg(SubReg, getKillRegState(IsKill));
1218 if (NeedSuperRegDef)
1219 AccRead.addReg(ValueReg, RegState::ImplicitDefine);
1220 AccRead->setAsmPrinterFlag(MachineInstr::ReloadReuse);
1221 }
1222 SubReg = TmpReg;
1223 }
1224
1225 MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RemRegOffset);
1226 MachineMemOperand *NewMMO =
1227 MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize,
1228 commonAlignment(Alignment, RemRegOffset));
1229
1230 auto MIB =
1231 BuildMI(MBB, MI, DL, *Desc)
1232 .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill));
1233 if (!IsFlat)
1234 MIB.addReg(FuncInfo->getScratchRSrcReg());
1235
1236 if (SOffset == AMDGPU::NoRegister) {
1237 if (!IsFlat)
1238 MIB.addImm(0);
1239 } else {
1240 MIB.addReg(SOffset, SOffsetRegState);
1241 }
1242 MIB.addImm(Offset + RemRegOffset)
1243 .addImm(0); // cpol
1244 if (!IsFlat)
1245 MIB.addImm(0) // tfe
1246 .addImm(0); // swz
1247 MIB.addMemOperand(NewMMO);
1248
1249 if (!IsAGPR && NeedSuperRegDef)
1250 MIB.addReg(ValueReg, RegState::ImplicitDefine);
1251
1252 if (!IsStore && TmpReg != AMDGPU::NoRegister) {
1253 MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64),
1254 FinalReg)
1255 .addReg(TmpReg, RegState::Kill);
1256 MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
1257 }
1258
1259 if (NeedSuperRegImpOperand)
1260 MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
1261 }
1262
1263 if (ScratchOffsetRegDelta != 0) {
1264 // Subtract the offset we added to the ScratchOffset register.
1265 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), SOffset)
1266 .addReg(SOffset)
1267 .addImm(ScratchOffsetRegDelta);
1268 }
1269}
1270
1271void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index,
1272 int Offset, bool IsLoad,
1273 bool IsKill) const {
1274 // Load/store VGPR
1275 MachineFrameInfo &FrameInfo = SB.MF.getFrameInfo();
1276 assert(FrameInfo.getStackID(Index) != TargetStackID::SGPRSpill)(static_cast <bool> (FrameInfo.getStackID(Index) != TargetStackID
::SGPRSpill) ? void (0) : __assert_fail ("FrameInfo.getStackID(Index) != TargetStackID::SGPRSpill"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1276, __extension__ __PRETTY_FUNCTION__))
;
1277
1278 Register FrameReg =
1279 FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(SB.MF)
1280 ? getBaseRegister()
1281 : getFrameRegister(SB.MF);
1282
1283 Align Alignment = FrameInfo.getObjectAlign(Index);
1284 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SB.MF, Index);
1285 MachineMemOperand *MMO = SB.MF.getMachineMemOperand(
1286 PtrInfo, IsLoad ? MachineMemOperand::MOLoad : MachineMemOperand::MOStore,
1287 SB.EltSize, Alignment);
1288
1289 if (IsLoad) {
1290 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
1291 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1292 buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg,
1293 Offset * SB.EltSize, MMO, SB.RS);
1294 } else {
1295 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1296 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1297 buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg,
1298 Offset * SB.EltSize, MMO, SB.RS);
1299 // This only ever adds one VGPR spill
1300 SB.MFI.addToSpilledVGPRs(1);
1301 }
1302}
1303
1304bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
1305 int Index,
1306 RegScavenger *RS,
1307 bool OnlyToVGPR) const {
1308 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
1309
1310 ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills =
1311 SB.MFI.getSGPRToVGPRSpills(Index);
1312 bool SpillToVGPR = !VGPRSpills.empty();
1313 if (OnlyToVGPR && !SpillToVGPR)
1314 return false;
1315
1316 assert(SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() &&(static_cast <bool> (SpillToVGPR || (SB.SuperReg != SB.
MFI.getStackPtrOffsetReg() && SB.SuperReg != SB.MFI.getFrameOffsetReg
())) ? void (0) : __assert_fail ("SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() && SB.SuperReg != SB.MFI.getFrameOffsetReg())"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1317, __extension__ __PRETTY_FUNCTION__))
1317 SB.SuperReg != SB.MFI.getFrameOffsetReg()))(static_cast <bool> (SpillToVGPR || (SB.SuperReg != SB.
MFI.getStackPtrOffsetReg() && SB.SuperReg != SB.MFI.getFrameOffsetReg
())) ? void (0) : __assert_fail ("SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() && SB.SuperReg != SB.MFI.getFrameOffsetReg())"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1317, __extension__ __PRETTY_FUNCTION__))
;
1318
1319 if (SpillToVGPR) {
1320 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
1321 Register SubReg =
1322 SB.NumSubRegs == 1
1323 ? SB.SuperReg
1324 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1325 SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
1326
1327 bool UseKill = SB.IsKill && i == SB.NumSubRegs - 1;
1328
1329 // Mark the "old value of vgpr" input undef only if this is the first sgpr
1330 // spill to this specific vgpr in the first basic block.
1331 auto MIB = BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
1332 Spill.VGPR)
1333 .addReg(SubReg, getKillRegState(UseKill))
1334 .addImm(Spill.Lane)
1335 .addReg(Spill.VGPR);
1336
1337 if (i == 0 && SB.NumSubRegs > 1) {
1338 // We may be spilling a super-register which is only partially defined,
1339 // and need to ensure later spills think the value is defined.
1340 MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
1341 }
1342
1343 if (SB.NumSubRegs > 1)
1344 MIB.addReg(SB.SuperReg, getKillRegState(UseKill) | RegState::Implicit);
1345
1346 // FIXME: Since this spills to another register instead of an actual
1347 // frame index, we should delete the frame index when all references to
1348 // it are fixed.
1349 }
1350 } else {
1351 SB.prepare();
1352
1353 // SubReg carries the "Kill" flag when SubReg == SB.SuperReg.
1354 unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
1355
1356 // Per VGPR helper data
1357 auto PVD = SB.getPerVGPRData();
1358
1359 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1360 unsigned TmpVGPRFlags = RegState::Undef;
1361
1362 // Write sub registers into the VGPR
1363 for (unsigned i = Offset * PVD.PerVGPR,
1364 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1365 i < e; ++i) {
1366 Register SubReg =
1367 SB.NumSubRegs == 1
1368 ? SB.SuperReg
1369 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1370
1371 MachineInstrBuilder WriteLane =
1372 BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
1373 SB.TmpVGPR)
1374 .addReg(SubReg, SubKillState)
1375 .addImm(i % PVD.PerVGPR)
1376 .addReg(SB.TmpVGPR, TmpVGPRFlags);
1377 TmpVGPRFlags = 0;
1378
1379 // There could be undef components of a spilled super register.
1380 // TODO: Can we detect this and skip the spill?
1381 if (SB.NumSubRegs > 1) {
1382 // The last implicit use of the SB.SuperReg carries the "Kill" flag.
1383 unsigned SuperKillState = 0;
1384 if (i + 1 == SB.NumSubRegs)
1385 SuperKillState |= getKillRegState(SB.IsKill);
1386 WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
1387 }
1388 }
1389
1390 // Write out VGPR
1391 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ false);
1392 }
1393
1394 SB.restore();
1395 }
1396
1397 MI->eraseFromParent();
1398 SB.MFI.addToSpilledSGPRs(SB.NumSubRegs);
1399 return true;
1400}
1401
1402bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
1403 int Index,
1404 RegScavenger *RS,
1405 bool OnlyToVGPR) const {
1406 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
1407
1408 ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills =
1409 SB.MFI.getSGPRToVGPRSpills(Index);
1410 bool SpillToVGPR = !VGPRSpills.empty();
5
Assuming the condition is false
1411 if (OnlyToVGPR
5.1
'OnlyToVGPR' is false
5.1
'OnlyToVGPR' is false
&& !SpillToVGPR)
1412 return false;
1413
1414 if (SpillToVGPR
5.2
'SpillToVGPR' is false
5.2
'SpillToVGPR' is false
) {
6
Taking false branch
1415 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
1416 Register SubReg =
1417 SB.NumSubRegs == 1
1418 ? SB.SuperReg
1419 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1420
1421 SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
1422 auto MIB =
1423 BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
1424 .addReg(Spill.VGPR)
1425 .addImm(Spill.Lane);
1426 if (SB.NumSubRegs > 1 && i == 0)
1427 MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
1428 }
1429 } else {
1430 SB.prepare();
7
Calling 'SGPRSpillBuilder::prepare'
1431
1432 // Per VGPR helper data
1433 auto PVD = SB.getPerVGPRData();
1434
1435 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1436 // Load in VGPR data
1437 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ true);
1438
1439 // Unpack lanes
1440 for (unsigned i = Offset * PVD.PerVGPR,
1441 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1442 i < e; ++i) {
1443 Register SubReg =
1444 SB.NumSubRegs == 1
1445 ? SB.SuperReg
1446 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1447
1448 bool LastSubReg = (i + 1 == e);
1449 auto MIB = BuildMI(SB.MBB, MI, SB.DL,
1450 SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
1451 .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
1452 .addImm(i);
1453 if (SB.NumSubRegs > 1 && i == 0)
1454 MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
1455 }
1456 }
1457
1458 SB.restore();
1459 }
1460
1461 MI->eraseFromParent();
1462 return true;
1463}
1464
1465/// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
1466/// a VGPR and the stack slot can be safely eliminated when all other users are
1467/// handled.
1468bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
1469 MachineBasicBlock::iterator MI,
1470 int FI,
1471 RegScavenger *RS) const {
1472 switch (MI->getOpcode()) {
1473 case AMDGPU::SI_SPILL_S1024_SAVE:
1474 case AMDGPU::SI_SPILL_S512_SAVE:
1475 case AMDGPU::SI_SPILL_S256_SAVE:
1476 case AMDGPU::SI_SPILL_S192_SAVE:
1477 case AMDGPU::SI_SPILL_S160_SAVE:
1478 case AMDGPU::SI_SPILL_S128_SAVE:
1479 case AMDGPU::SI_SPILL_S96_SAVE:
1480 case AMDGPU::SI_SPILL_S64_SAVE:
1481 case AMDGPU::SI_SPILL_S32_SAVE:
1482 return spillSGPR(MI, FI, RS, true);
1483 case AMDGPU::SI_SPILL_S1024_RESTORE:
1484 case AMDGPU::SI_SPILL_S512_RESTORE:
1485 case AMDGPU::SI_SPILL_S256_RESTORE:
1486 case AMDGPU::SI_SPILL_S192_RESTORE:
1487 case AMDGPU::SI_SPILL_S160_RESTORE:
1488 case AMDGPU::SI_SPILL_S128_RESTORE:
1489 case AMDGPU::SI_SPILL_S96_RESTORE:
1490 case AMDGPU::SI_SPILL_S64_RESTORE:
1491 case AMDGPU::SI_SPILL_S32_RESTORE:
1492 return restoreSGPR(MI, FI, RS, true);
1493 default:
1494 llvm_unreachable("not an SGPR spill instruction")::llvm::llvm_unreachable_internal("not an SGPR spill instruction"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1494)
;
1495 }
1496}
1497
1498void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
1499 int SPAdj, unsigned FIOperandNum,
1500 RegScavenger *RS) const {
1501 MachineFunction *MF = MI->getParent()->getParent();
1502 MachineBasicBlock *MBB = MI->getParent();
1503 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
1504 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
1505 const SIInstrInfo *TII = ST.getInstrInfo();
1506 DebugLoc DL = MI->getDebugLoc();
1507
1508 assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?")(static_cast <bool> (SPAdj == 0 && "unhandled SP adjustment in call sequence?"
) ? void (0) : __assert_fail ("SPAdj == 0 && \"unhandled SP adjustment in call sequence?\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1508, __extension__ __PRETTY_FUNCTION__))
;
1
Assuming 'SPAdj' is equal to 0
2
'?' condition is true
1509
1510 MachineOperand &FIOp = MI->getOperand(FIOperandNum);
1511 int Index = MI->getOperand(FIOperandNum).getIndex();
1512
1513 Register FrameReg = FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(*MF)
1514 ? getBaseRegister()
1515 : getFrameRegister(*MF);
1516
1517 switch (MI->getOpcode()) {
3
Control jumps to 'case SI_SPILL_S32_RESTORE:' at line 1541
1518 // SGPR register spill
1519 case AMDGPU::SI_SPILL_S1024_SAVE:
1520 case AMDGPU::SI_SPILL_S512_SAVE:
1521 case AMDGPU::SI_SPILL_S256_SAVE:
1522 case AMDGPU::SI_SPILL_S192_SAVE:
1523 case AMDGPU::SI_SPILL_S160_SAVE:
1524 case AMDGPU::SI_SPILL_S128_SAVE:
1525 case AMDGPU::SI_SPILL_S96_SAVE:
1526 case AMDGPU::SI_SPILL_S64_SAVE:
1527 case AMDGPU::SI_SPILL_S32_SAVE: {
1528 spillSGPR(MI, Index, RS);
1529 break;
1530 }
1531
1532 // SGPR register restore
1533 case AMDGPU::SI_SPILL_S1024_RESTORE:
1534 case AMDGPU::SI_SPILL_S512_RESTORE:
1535 case AMDGPU::SI_SPILL_S256_RESTORE:
1536 case AMDGPU::SI_SPILL_S192_RESTORE:
1537 case AMDGPU::SI_SPILL_S160_RESTORE:
1538 case AMDGPU::SI_SPILL_S128_RESTORE:
1539 case AMDGPU::SI_SPILL_S96_RESTORE:
1540 case AMDGPU::SI_SPILL_S64_RESTORE:
1541 case AMDGPU::SI_SPILL_S32_RESTORE: {
1542 restoreSGPR(MI, Index, RS);
4
Calling 'SIRegisterInfo::restoreSGPR'
1543 break;
1544 }
1545
1546 // VGPR register spill
1547 case AMDGPU::SI_SPILL_V1024_SAVE:
1548 case AMDGPU::SI_SPILL_V512_SAVE:
1549 case AMDGPU::SI_SPILL_V256_SAVE:
1550 case AMDGPU::SI_SPILL_V192_SAVE:
1551 case AMDGPU::SI_SPILL_V160_SAVE:
1552 case AMDGPU::SI_SPILL_V128_SAVE:
1553 case AMDGPU::SI_SPILL_V96_SAVE:
1554 case AMDGPU::SI_SPILL_V64_SAVE:
1555 case AMDGPU::SI_SPILL_V32_SAVE:
1556 case AMDGPU::SI_SPILL_A1024_SAVE:
1557 case AMDGPU::SI_SPILL_A512_SAVE:
1558 case AMDGPU::SI_SPILL_A256_SAVE:
1559 case AMDGPU::SI_SPILL_A192_SAVE:
1560 case AMDGPU::SI_SPILL_A160_SAVE:
1561 case AMDGPU::SI_SPILL_A128_SAVE:
1562 case AMDGPU::SI_SPILL_A96_SAVE:
1563 case AMDGPU::SI_SPILL_A64_SAVE:
1564 case AMDGPU::SI_SPILL_A32_SAVE: {
1565 const MachineOperand *VData = TII->getNamedOperand(*MI,
1566 AMDGPU::OpName::vdata);
1567 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==(static_cast <bool> (TII->getNamedOperand(*MI, AMDGPU
::OpName::soffset)->getReg() == MFI->getStackPtrOffsetReg
()) ? void (0) : __assert_fail ("TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == MFI->getStackPtrOffsetReg()"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1568, __extension__ __PRETTY_FUNCTION__))
1568 MFI->getStackPtrOffsetReg())(static_cast <bool> (TII->getNamedOperand(*MI, AMDGPU
::OpName::soffset)->getReg() == MFI->getStackPtrOffsetReg
()) ? void (0) : __assert_fail ("TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == MFI->getStackPtrOffsetReg()"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1568, __extension__ __PRETTY_FUNCTION__))
;
1569
1570 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1571 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1572 auto *MBB = MI->getParent();
1573 buildSpillLoadStore(
1574 *MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
1575 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1576 *MI->memoperands_begin(), RS);
1577 MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1578 MI->eraseFromParent();
1579 break;
1580 }
1581 case AMDGPU::SI_SPILL_V32_RESTORE:
1582 case AMDGPU::SI_SPILL_V64_RESTORE:
1583 case AMDGPU::SI_SPILL_V96_RESTORE:
1584 case AMDGPU::SI_SPILL_V128_RESTORE:
1585 case AMDGPU::SI_SPILL_V160_RESTORE:
1586 case AMDGPU::SI_SPILL_V192_RESTORE:
1587 case AMDGPU::SI_SPILL_V256_RESTORE:
1588 case AMDGPU::SI_SPILL_V512_RESTORE:
1589 case AMDGPU::SI_SPILL_V1024_RESTORE:
1590 case AMDGPU::SI_SPILL_A32_RESTORE:
1591 case AMDGPU::SI_SPILL_A64_RESTORE:
1592 case AMDGPU::SI_SPILL_A96_RESTORE:
1593 case AMDGPU::SI_SPILL_A128_RESTORE:
1594 case AMDGPU::SI_SPILL_A160_RESTORE:
1595 case AMDGPU::SI_SPILL_A192_RESTORE:
1596 case AMDGPU::SI_SPILL_A256_RESTORE:
1597 case AMDGPU::SI_SPILL_A512_RESTORE:
1598 case AMDGPU::SI_SPILL_A1024_RESTORE: {
1599 const MachineOperand *VData = TII->getNamedOperand(*MI,
1600 AMDGPU::OpName::vdata);
1601 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==(static_cast <bool> (TII->getNamedOperand(*MI, AMDGPU
::OpName::soffset)->getReg() == MFI->getStackPtrOffsetReg
()) ? void (0) : __assert_fail ("TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == MFI->getStackPtrOffsetReg()"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1602, __extension__ __PRETTY_FUNCTION__))
1602 MFI->getStackPtrOffsetReg())(static_cast <bool> (TII->getNamedOperand(*MI, AMDGPU
::OpName::soffset)->getReg() == MFI->getStackPtrOffsetReg
()) ? void (0) : __assert_fail ("TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == MFI->getStackPtrOffsetReg()"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1602, __extension__ __PRETTY_FUNCTION__))
;
1603
1604 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
1605 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1606 auto *MBB = MI->getParent();
1607 buildSpillLoadStore(
1608 *MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
1609 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1610 *MI->memoperands_begin(), RS);
1611 MI->eraseFromParent();
1612 break;
1613 }
1614
1615 default: {
1616 // Other access to frame index
1617 const DebugLoc &DL = MI->getDebugLoc();
1618
1619 int64_t Offset = FrameInfo.getObjectOffset(Index);
1620 if (ST.enableFlatScratch()) {
1621 if (TII->isFLATScratch(*MI)) {
1622 assert((int16_t)FIOperandNum ==(static_cast <bool> ((int16_t)FIOperandNum == AMDGPU::getNamedOperandIdx
(MI->getOpcode(), AMDGPU::OpName::saddr)) ? void (0) : __assert_fail
("(int16_t)FIOperandNum == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr)"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1624, __extension__ __PRETTY_FUNCTION__))
1623 AMDGPU::getNamedOperandIdx(MI->getOpcode(),(static_cast <bool> ((int16_t)FIOperandNum == AMDGPU::getNamedOperandIdx
(MI->getOpcode(), AMDGPU::OpName::saddr)) ? void (0) : __assert_fail
("(int16_t)FIOperandNum == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr)"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1624, __extension__ __PRETTY_FUNCTION__))
1624 AMDGPU::OpName::saddr))(static_cast <bool> ((int16_t)FIOperandNum == AMDGPU::getNamedOperandIdx
(MI->getOpcode(), AMDGPU::OpName::saddr)) ? void (0) : __assert_fail
("(int16_t)FIOperandNum == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr)"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1624, __extension__ __PRETTY_FUNCTION__))
;
1625
1626 // The offset is always swizzled, just replace it
1627 if (FrameReg)
1628 FIOp.ChangeToRegister(FrameReg, false);
1629
1630 if (!Offset)
1631 return;
1632
1633 MachineOperand *OffsetOp =
1634 TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
1635 int64_t NewOffset = Offset + OffsetOp->getImm();
1636 if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
1637 SIInstrFlags::FlatScratch)) {
1638 OffsetOp->setImm(NewOffset);
1639 if (FrameReg)
1640 return;
1641 Offset = 0;
1642 }
1643
1644 assert(!TII->getNamedOperand(*MI, AMDGPU::OpName::vaddr) &&(static_cast <bool> (!TII->getNamedOperand(*MI, AMDGPU
::OpName::vaddr) && "Unexpected vaddr for flat scratch with a FI operand"
) ? void (0) : __assert_fail ("!TII->getNamedOperand(*MI, AMDGPU::OpName::vaddr) && \"Unexpected vaddr for flat scratch with a FI operand\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1645, __extension__ __PRETTY_FUNCTION__))
1645 "Unexpected vaddr for flat scratch with a FI operand")(static_cast <bool> (!TII->getNamedOperand(*MI, AMDGPU
::OpName::vaddr) && "Unexpected vaddr for flat scratch with a FI operand"
) ? void (0) : __assert_fail ("!TII->getNamedOperand(*MI, AMDGPU::OpName::vaddr) && \"Unexpected vaddr for flat scratch with a FI operand\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1645, __extension__ __PRETTY_FUNCTION__))
;
1646
1647 // On GFX10 we have ST mode to use no registers for an address.
1648 // Otherwise we need to materialize 0 into an SGPR.
1649 if (!Offset && ST.hasFlatScratchSTMode()) {
1650 unsigned Opc = MI->getOpcode();
1651 unsigned NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc);
1652 MI->RemoveOperand(
1653 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
1654 MI->setDesc(TII->get(NewOpc));
1655 return;
1656 }
1657 }
1658
1659 if (!FrameReg) {
1660 FIOp.ChangeToImmediate(Offset);
1661 if (TII->isImmOperandLegal(*MI, FIOperandNum, FIOp))
1662 return;
1663 }
1664
1665 // We need to use register here. Check if we can use an SGPR or need
1666 // a VGPR.
1667 FIOp.ChangeToRegister(AMDGPU::M0, false);
1668 bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, &FIOp);
1669
1670 if (!Offset && FrameReg && UseSGPR) {
1671 FIOp.setReg(FrameReg);
1672 return;
1673 }
1674
1675 const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass
1676 : &AMDGPU::VGPR_32RegClass;
1677
1678 Register TmpReg = RS->scavengeRegister(RC, MI, 0, !UseSGPR);
1679 FIOp.setReg(TmpReg);
1680 FIOp.setIsKill(true);
1681
1682 if ((!FrameReg || !Offset) && TmpReg) {
1683 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1684 auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg);
1685 if (FrameReg)
1686 MIB.addReg(FrameReg);
1687 else
1688 MIB.addImm(Offset);
1689
1690 return;
1691 }
1692
1693 Register TmpSReg =
1694 UseSGPR ? TmpReg
1695 : RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0,
1696 !UseSGPR);
1697
1698 // TODO: for flat scratch another attempt can be made with a VGPR index
1699 // if no SGPRs can be scavenged.
1700 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
1701 report_fatal_error("Cannot scavenge register in FI elimination!");
1702
1703 if (!TmpSReg) {
1704 // Use frame register and restore it after.
1705 TmpSReg = FrameReg;
1706 FIOp.setReg(FrameReg);
1707 FIOp.setIsKill(false);
1708 }
1709
1710 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), TmpSReg)
1711 .addReg(FrameReg)
1712 .addImm(Offset);
1713
1714 if (!UseSGPR)
1715 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1716 .addReg(TmpSReg, RegState::Kill);
1717
1718 if (TmpSReg == FrameReg) {
1719 // Undo frame register modification.
1720 BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_SUB_U32),
1721 FrameReg)
1722 .addReg(FrameReg)
1723 .addImm(Offset);
1724 }
1725
1726 return;
1727 }
1728
1729 bool IsMUBUF = TII->isMUBUF(*MI);
1730
1731 if (!IsMUBUF && !MFI->isEntryFunction()) {
1732 // Convert to a swizzled stack address by scaling by the wave size.
1733 //
1734 // In an entry function/kernel the offset is already swizzled.
1735
1736 bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1737 Register ResultReg =
1738 IsCopy ? MI->getOperand(0).getReg()
1739 : RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
1740
1741 int64_t Offset = FrameInfo.getObjectOffset(Index);
1742 if (Offset == 0) {
1743 // XXX - This never happens because of emergency scavenging slot at 0?
1744 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1745 .addImm(ST.getWavefrontSizeLog2())
1746 .addReg(FrameReg);
1747 } else {
1748 if (auto MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) {
1749 // Reuse ResultReg in intermediate step.
1750 Register ScaledReg = ResultReg;
1751
1752 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
1753 ScaledReg)
1754 .addImm(ST.getWavefrontSizeLog2())
1755 .addReg(FrameReg);
1756
1757 const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
1758
1759 // TODO: Fold if use instruction is another add of a constant.
1760 if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
1761 // FIXME: This can fail
1762 MIB.addImm(Offset);
1763 MIB.addReg(ScaledReg, RegState::Kill);
1764 if (!IsVOP2)
1765 MIB.addImm(0); // clamp bit
1766 } else {
1767 assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&(static_cast <bool> (MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64
&& "Need to reuse carry out register") ? void (0) : __assert_fail
("MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 && \"Need to reuse carry out register\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1768, __extension__ __PRETTY_FUNCTION__))
1768 "Need to reuse carry out register")(static_cast <bool> (MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64
&& "Need to reuse carry out register") ? void (0) : __assert_fail
("MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 && \"Need to reuse carry out register\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1768, __extension__ __PRETTY_FUNCTION__))
;
1769
1770 // Use scavenged unused carry out as offset register.
1771 Register ConstOffsetReg;
1772 if (!isWave32)
1773 ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
1774 else
1775 ConstOffsetReg = MIB.getReg(1);
1776
1777 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1778 .addImm(Offset);
1779 MIB.addReg(ConstOffsetReg, RegState::Kill);
1780 MIB.addReg(ScaledReg, RegState::Kill);
1781 MIB.addImm(0); // clamp bit
1782 }
1783 } else {
1784 // We have to produce a carry out, and there isn't a free SGPR pair
1785 // for it. We can keep the whole computation on the SALU to avoid
1786 // clobbering an additional register at the cost of an extra mov.
1787
1788 // We may have 1 free scratch SGPR even though a carry out is
1789 // unavailable. Only one additional mov is needed.
1790 Register TmpScaledReg =
1791 RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
1792 Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
1793
1794 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
1795 .addReg(FrameReg)
1796 .addImm(ST.getWavefrontSizeLog2());
1797 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), ScaledReg)
1798 .addReg(ScaledReg, RegState::Kill)
1799 .addImm(Offset);
1800 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
1801 .addReg(ScaledReg, RegState::Kill);
1802
1803 // If there were truly no free SGPRs, we need to undo everything.
1804 if (!TmpScaledReg.isValid()) {
1805 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScaledReg)
1806 .addReg(ScaledReg, RegState::Kill)
1807 .addImm(Offset);
1808 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
1809 .addReg(FrameReg)
1810 .addImm(ST.getWavefrontSizeLog2());
1811 }
1812 }
1813 }
1814
1815 // Don't introduce an extra copy if we're just materializing in a mov.
1816 if (IsCopy)
1817 MI->eraseFromParent();
1818 else
1819 FIOp.ChangeToRegister(ResultReg, false, false, true);
1820 return;
1821 }
1822
1823 if (IsMUBUF) {
1824 // Disable offen so we don't need a 0 vgpr base.
1825 assert(static_cast<int>(FIOperandNum) ==(static_cast <bool> (static_cast<int>(FIOperandNum
) == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName
::vaddr)) ? void (0) : __assert_fail ("static_cast<int>(FIOperandNum) == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr)"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1827, __extension__ __PRETTY_FUNCTION__))
1826 AMDGPU::getNamedOperandIdx(MI->getOpcode(),(static_cast <bool> (static_cast<int>(FIOperandNum
) == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName
::vaddr)) ? void (0) : __assert_fail ("static_cast<int>(FIOperandNum) == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr)"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1827, __extension__ __PRETTY_FUNCTION__))
1827 AMDGPU::OpName::vaddr))(static_cast <bool> (static_cast<int>(FIOperandNum
) == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName
::vaddr)) ? void (0) : __assert_fail ("static_cast<int>(FIOperandNum) == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr)"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1827, __extension__ __PRETTY_FUNCTION__))
;
1828
1829 auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
1830 assert((SOffset.isImm() && SOffset.getImm() == 0))(static_cast <bool> ((SOffset.isImm() && SOffset
.getImm() == 0)) ? void (0) : __assert_fail ("(SOffset.isImm() && SOffset.getImm() == 0)"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 1830, __extension__ __PRETTY_FUNCTION__))
;
1831
1832 if (FrameReg != AMDGPU::NoRegister)
1833 SOffset.ChangeToRegister(FrameReg, false);
1834
1835 int64_t Offset = FrameInfo.getObjectOffset(Index);
1836 int64_t OldImm
1837 = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1838 int64_t NewOffset = OldImm + Offset;
1839
1840 if (SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) &&
1841 buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
1842 MI->eraseFromParent();
1843 return;
1844 }
1845 }
1846
1847 // If the offset is simply too big, don't convert to a scratch wave offset
1848 // relative index.
1849
1850 FIOp.ChangeToImmediate(Offset);
1851 if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1852 Register TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
1853 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1854 .addImm(Offset);
1855 FIOp.ChangeToRegister(TmpReg, false, false, true);
1856 }
1857 }
1858 }
1859}
1860
1861StringRef SIRegisterInfo::getRegAsmName(MCRegister Reg) const {
1862 return AMDGPUInstPrinter::getRegisterName(Reg);
1863}
1864
1865static const TargetRegisterClass *
1866getAnyVGPRClassForBitWidth(unsigned BitWidth) {
1867 if (BitWidth <= 64)
1868 return &AMDGPU::VReg_64RegClass;
1869 if (BitWidth <= 96)
1870 return &AMDGPU::VReg_96RegClass;
1871 if (BitWidth <= 128)
1872 return &AMDGPU::VReg_128RegClass;
1873 if (BitWidth <= 160)
1874 return &AMDGPU::VReg_160RegClass;
1875 if (BitWidth <= 192)
1876 return &AMDGPU::VReg_192RegClass;
1877 if (BitWidth <= 256)
1878 return &AMDGPU::VReg_256RegClass;
1879 if (BitWidth <= 512)
1880 return &AMDGPU::VReg_512RegClass;
1881 if (BitWidth <= 1024)
1882 return &AMDGPU::VReg_1024RegClass;
1883
1884 return nullptr;
1885}
1886
1887static const TargetRegisterClass *
1888getAlignedVGPRClassForBitWidth(unsigned BitWidth) {
1889 if (BitWidth <= 64)
1890 return &AMDGPU::VReg_64_Align2RegClass;
1891 if (BitWidth <= 96)
1892 return &AMDGPU::VReg_96_Align2RegClass;
1893 if (BitWidth <= 128)
1894 return &AMDGPU::VReg_128_Align2RegClass;
1895 if (BitWidth <= 160)
1896 return &AMDGPU::VReg_160_Align2RegClass;
1897 if (BitWidth <= 192)
1898 return &AMDGPU::VReg_192_Align2RegClass;
1899 if (BitWidth <= 256)
1900 return &AMDGPU::VReg_256_Align2RegClass;
1901 if (BitWidth <= 512)
1902 return &AMDGPU::VReg_512_Align2RegClass;
1903 if (BitWidth <= 1024)
1904 return &AMDGPU::VReg_1024_Align2RegClass;
1905
1906 return nullptr;
1907}
1908
1909const TargetRegisterClass *
1910SIRegisterInfo::getVGPRClassForBitWidth(unsigned BitWidth) const {
1911 if (BitWidth == 1)
1912 return &AMDGPU::VReg_1RegClass;
1913 if (BitWidth <= 16)
1914 return &AMDGPU::VGPR_LO16RegClass;
1915 if (BitWidth <= 32)
1916 return &AMDGPU::VGPR_32RegClass;
1917 return ST.needsAlignedVGPRs() ? getAlignedVGPRClassForBitWidth(BitWidth)
1918 : getAnyVGPRClassForBitWidth(BitWidth);
1919}
1920
1921static const TargetRegisterClass *
1922getAnyAGPRClassForBitWidth(unsigned BitWidth) {
1923 if (BitWidth <= 64)
1924 return &AMDGPU::AReg_64RegClass;
1925 if (BitWidth <= 96)
1926 return &AMDGPU::AReg_96RegClass;
1927 if (BitWidth <= 128)
1928 return &AMDGPU::AReg_128RegClass;
1929 if (BitWidth <= 160)
1930 return &AMDGPU::AReg_160RegClass;
1931 if (BitWidth <= 192)
1932 return &AMDGPU::AReg_192RegClass;
1933 if (BitWidth <= 256)
1934 return &AMDGPU::AReg_256RegClass;
1935 if (BitWidth <= 512)
1936 return &AMDGPU::AReg_512RegClass;
1937 if (BitWidth <= 1024)
1938 return &AMDGPU::AReg_1024RegClass;
1939
1940 return nullptr;
1941}
1942
1943static const TargetRegisterClass *
1944getAlignedAGPRClassForBitWidth(unsigned BitWidth) {
1945 if (BitWidth <= 64)
1946 return &AMDGPU::AReg_64_Align2RegClass;
1947 if (BitWidth <= 96)
1948 return &AMDGPU::AReg_96_Align2RegClass;
1949 if (BitWidth <= 128)
1950 return &AMDGPU::AReg_128_Align2RegClass;
1951 if (BitWidth <= 160)
1952 return &AMDGPU::AReg_160_Align2RegClass;
1953 if (BitWidth <= 192)
1954 return &AMDGPU::AReg_192_Align2RegClass;
1955 if (BitWidth <= 256)
1956 return &AMDGPU::AReg_256_Align2RegClass;
1957 if (BitWidth <= 512)
1958 return &AMDGPU::AReg_512_Align2RegClass;
1959 if (BitWidth <= 1024)
1960 return &AMDGPU::AReg_1024_Align2RegClass;
1961
1962 return nullptr;
1963}
1964
1965const TargetRegisterClass *
1966SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) const {
1967 if (BitWidth <= 16)
1968 return &AMDGPU::AGPR_LO16RegClass;
1969 if (BitWidth <= 32)
1970 return &AMDGPU::AGPR_32RegClass;
1971 return ST.needsAlignedVGPRs() ? getAlignedAGPRClassForBitWidth(BitWidth)
1972 : getAnyAGPRClassForBitWidth(BitWidth);
1973}
1974
1975const TargetRegisterClass *
1976SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) {
1977 if (BitWidth <= 16)
1978 return &AMDGPU::SGPR_LO16RegClass;
1979 if (BitWidth <= 32)
1980 return &AMDGPU::SReg_32RegClass;
1981 if (BitWidth <= 64)
1982 return &AMDGPU::SReg_64RegClass;
1983 if (BitWidth <= 96)
1984 return &AMDGPU::SGPR_96RegClass;
1985 if (BitWidth <= 128)
1986 return &AMDGPU::SGPR_128RegClass;
1987 if (BitWidth <= 160)
1988 return &AMDGPU::SGPR_160RegClass;
1989 if (BitWidth <= 192)
1990 return &AMDGPU::SGPR_192RegClass;
1991 if (BitWidth <= 256)
1992 return &AMDGPU::SGPR_256RegClass;
1993 if (BitWidth <= 512)
1994 return &AMDGPU::SGPR_512RegClass;
1995 if (BitWidth <= 1024)
1996 return &AMDGPU::SGPR_1024RegClass;
1997
1998 return nullptr;
1999}
2000
2001// FIXME: This is very slow. It might be worth creating a map from physreg to
2002// register class.
2003const TargetRegisterClass *
2004SIRegisterInfo::getPhysRegClass(MCRegister Reg) const {
2005 static const TargetRegisterClass *const BaseClasses[] = {
2006 &AMDGPU::VGPR_LO16RegClass,
2007 &AMDGPU::VGPR_HI16RegClass,
2008 &AMDGPU::SReg_LO16RegClass,
2009 &AMDGPU::AGPR_LO16RegClass,
2010 &AMDGPU::VGPR_32RegClass,
2011 &AMDGPU::SReg_32RegClass,
2012 &AMDGPU::AGPR_32RegClass,
2013 &AMDGPU::AGPR_32RegClass,
2014 &AMDGPU::VReg_64_Align2RegClass,
2015 &AMDGPU::VReg_64RegClass,
2016 &AMDGPU::SReg_64RegClass,
2017 &AMDGPU::AReg_64_Align2RegClass,
2018 &AMDGPU::AReg_64RegClass,
2019 &AMDGPU::VReg_96_Align2RegClass,
2020 &AMDGPU::VReg_96RegClass,
2021 &AMDGPU::SReg_96RegClass,
2022 &AMDGPU::AReg_96_Align2RegClass,
2023 &AMDGPU::AReg_96RegClass,
2024 &AMDGPU::VReg_128_Align2RegClass,
2025 &AMDGPU::VReg_128RegClass,
2026 &AMDGPU::SReg_128RegClass,
2027 &AMDGPU::AReg_128_Align2RegClass,
2028 &AMDGPU::AReg_128RegClass,
2029 &AMDGPU::VReg_160_Align2RegClass,
2030 &AMDGPU::VReg_160RegClass,
2031 &AMDGPU::SReg_160RegClass,
2032 &AMDGPU::AReg_160_Align2RegClass,
2033 &AMDGPU::AReg_160RegClass,
2034 &AMDGPU::VReg_192_Align2RegClass,
2035 &AMDGPU::VReg_192RegClass,
2036 &AMDGPU::SReg_192RegClass,
2037 &AMDGPU::AReg_192_Align2RegClass,
2038 &AMDGPU::AReg_192RegClass,
2039 &AMDGPU::VReg_256_Align2RegClass,
2040 &AMDGPU::VReg_256RegClass,
2041 &AMDGPU::SReg_256RegClass,
2042 &AMDGPU::AReg_256_Align2RegClass,
2043 &AMDGPU::AReg_256RegClass,
2044 &AMDGPU::VReg_512_Align2RegClass,
2045 &AMDGPU::VReg_512RegClass,
2046 &AMDGPU::SReg_512RegClass,
2047 &AMDGPU::AReg_512_Align2RegClass,
2048 &AMDGPU::AReg_512RegClass,
2049 &AMDGPU::SReg_1024RegClass,
2050 &AMDGPU::VReg_1024_Align2RegClass,
2051 &AMDGPU::VReg_1024RegClass,
2052 &AMDGPU::AReg_1024_Align2RegClass,
2053 &AMDGPU::AReg_1024RegClass,
2054 &AMDGPU::SCC_CLASSRegClass,
2055 &AMDGPU::Pseudo_SReg_32RegClass,
2056 &AMDGPU::Pseudo_SReg_128RegClass,
2057 };
2058
2059 for (const TargetRegisterClass *BaseClass : BaseClasses) {
2060 if (BaseClass->contains(Reg)) {
2061 return BaseClass;
2062 }
2063 }
2064 return nullptr;
2065}
2066
2067bool SIRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI,
2068 Register Reg) const {
2069 const TargetRegisterClass *RC;
2070 if (Reg.isVirtual())
2071 RC = MRI.getRegClass(Reg);
2072 else
2073 RC = getPhysRegClass(Reg);
2074 return isSGPRClass(RC);
2075}
2076
2077// TODO: It might be helpful to have some target specific flags in
2078// TargetRegisterClass to mark which classes are VGPRs to make this trivial.
2079bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
2080 unsigned Size = getRegSizeInBits(*RC);
2081 if (Size == 16) {
2082 return getCommonSubClass(&AMDGPU::VGPR_LO16RegClass, RC) != nullptr ||
2083 getCommonSubClass(&AMDGPU::VGPR_HI16RegClass, RC) != nullptr;
2084 }
2085 const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size);
2086 if (!VRC) {
2087 assert(Size < 32 && "Invalid register class size")(static_cast <bool> (Size < 32 && "Invalid register class size"
) ? void (0) : __assert_fail ("Size < 32 && \"Invalid register class size\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2087, __extension__ __PRETTY_FUNCTION__))
;
2088 return false;
2089 }
2090 return getCommonSubClass(VRC, RC) != nullptr;
2091}
2092
2093bool SIRegisterInfo::hasAGPRs(const TargetRegisterClass *RC) const {
2094 unsigned Size = getRegSizeInBits(*RC);
2095 if (Size < 16)
2096 return false;
2097 const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size);
2098 if (!ARC) {
2099 assert(getVGPRClassForBitWidth(Size) && "Invalid register class size")(static_cast <bool> (getVGPRClassForBitWidth(Size) &&
"Invalid register class size") ? void (0) : __assert_fail ("getVGPRClassForBitWidth(Size) && \"Invalid register class size\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2099, __extension__ __PRETTY_FUNCTION__))
;
2100 return false;
2101 }
2102 return getCommonSubClass(ARC, RC) != nullptr;
2103}
2104
2105const TargetRegisterClass *
2106SIRegisterInfo::getEquivalentVGPRClass(const TargetRegisterClass *SRC) const {
2107 unsigned Size = getRegSizeInBits(*SRC);
2108 const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size);
2109 assert(VRC && "Invalid register class size")(static_cast <bool> (VRC && "Invalid register class size"
) ? void (0) : __assert_fail ("VRC && \"Invalid register class size\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2109, __extension__ __PRETTY_FUNCTION__))
;
2110 return VRC;
2111}
2112
2113const TargetRegisterClass *
2114SIRegisterInfo::getEquivalentAGPRClass(const TargetRegisterClass *SRC) const {
2115 unsigned Size = getRegSizeInBits(*SRC);
2116 const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size);
2117 assert(ARC && "Invalid register class size")(static_cast <bool> (ARC && "Invalid register class size"
) ? void (0) : __assert_fail ("ARC && \"Invalid register class size\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2117, __extension__ __PRETTY_FUNCTION__))
;
2118 return ARC;
2119}
2120
2121const TargetRegisterClass *
2122SIRegisterInfo::getEquivalentSGPRClass(const TargetRegisterClass *VRC) const {
2123 unsigned Size = getRegSizeInBits(*VRC);
2124 if (Size == 32)
2125 return &AMDGPU::SGPR_32RegClass;
2126 const TargetRegisterClass *SRC = getSGPRClassForBitWidth(Size);
2127 assert(SRC && "Invalid register class size")(static_cast <bool> (SRC && "Invalid register class size"
) ? void (0) : __assert_fail ("SRC && \"Invalid register class size\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2127, __extension__ __PRETTY_FUNCTION__))
;
2128 return SRC;
2129}
2130
2131const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
2132 const TargetRegisterClass *RC, unsigned SubIdx) const {
2133 if (SubIdx == AMDGPU::NoSubRegister)
2134 return RC;
2135
2136 // We can assume that each lane corresponds to one 32-bit register.
2137 unsigned Size = getNumChannelsFromSubReg(SubIdx) * 32;
2138 if (isSGPRClass(RC)) {
2139 if (Size == 32)
2140 RC = &AMDGPU::SGPR_32RegClass;
2141 else
2142 RC = getSGPRClassForBitWidth(Size);
2143 } else if (hasAGPRs(RC)) {
2144 RC = getAGPRClassForBitWidth(Size);
2145 } else {
2146 RC = getVGPRClassForBitWidth(Size);
2147 }
2148 assert(RC && "Invalid sub-register class size")(static_cast <bool> (RC && "Invalid sub-register class size"
) ? void (0) : __assert_fail ("RC && \"Invalid sub-register class size\""
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2148, __extension__ __PRETTY_FUNCTION__))
;
2149 return RC;
2150}
2151
2152const TargetRegisterClass *
2153SIRegisterInfo::getCompatibleSubRegClass(const TargetRegisterClass *SuperRC,
2154 const TargetRegisterClass *SubRC,
2155 unsigned SubIdx) const {
2156 // Ensure this subregister index is aligned in the super register.
2157 const TargetRegisterClass *MatchRC =
2158 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
2159 return MatchRC && MatchRC->hasSubClassEq(SuperRC) ? MatchRC : nullptr;
2160}
2161
2162bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
2163 if (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
2164 OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST)
2165 return !ST.hasMFMAInlineLiteralBug();
2166
2167 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2168 OpType <= AMDGPU::OPERAND_SRC_LAST;
2169}
2170
2171bool SIRegisterInfo::shouldRewriteCopySrc(
2172 const TargetRegisterClass *DefRC,
2173 unsigned DefSubReg,
2174 const TargetRegisterClass *SrcRC,
2175 unsigned SrcSubReg) const {
2176 // We want to prefer the smallest register class possible, so we don't want to
2177 // stop and rewrite on anything that looks like a subregister
2178 // extract. Operations mostly don't care about the super register class, so we
2179 // only want to stop on the most basic of copies between the same register
2180 // class.
2181 //
2182 // e.g. if we have something like
2183 // %0 = ...
2184 // %1 = ...
2185 // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
2186 // %3 = COPY %2, sub0
2187 //
2188 // We want to look through the COPY to find:
2189 // => %3 = COPY %0
2190
2191 // Plain copy.
2192 return getCommonSubClass(DefRC, SrcRC) != nullptr;
2193}
2194
2195bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
2196 // TODO: 64-bit operands have extending behavior from 32-bit literal.
2197 return OpType >= AMDGPU::OPERAND_REG_IMM_FIRST &&
2198 OpType <= AMDGPU::OPERAND_REG_IMM_LAST;
2199}
2200
2201/// Returns a lowest register that is not used at any point in the function.
2202/// If all registers are used, then this function will return
2203/// AMDGPU::NoRegister. If \p ReserveHighestVGPR = true, then return
2204/// highest unused register.
2205MCRegister SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
2206 const TargetRegisterClass *RC,
2207 const MachineFunction &MF,
2208 bool ReserveHighestVGPR) const {
2209 if (ReserveHighestVGPR) {
2210 for (MCRegister Reg : reverse(*RC))
2211 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
2212 return Reg;
2213 } else {
2214 for (MCRegister Reg : *RC)
2215 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
2216 return Reg;
2217 }
2218 return MCRegister();
2219}
2220
2221ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
2222 unsigned EltSize) const {
2223 const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC->MC);
2224 assert(RegBitWidth >= 32 && RegBitWidth <= 1024)(static_cast <bool> (RegBitWidth >= 32 && RegBitWidth
<= 1024) ? void (0) : __assert_fail ("RegBitWidth >= 32 && RegBitWidth <= 1024"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2224, __extension__ __PRETTY_FUNCTION__))
;
2225
2226 const unsigned RegDWORDs = RegBitWidth / 32;
2227 const unsigned EltDWORDs = EltSize / 4;
2228 assert(RegSplitParts.size() + 1 >= EltDWORDs)(static_cast <bool> (RegSplitParts.size() + 1 >= EltDWORDs
) ? void (0) : __assert_fail ("RegSplitParts.size() + 1 >= EltDWORDs"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2228, __extension__ __PRETTY_FUNCTION__))
;
2229
2230 const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1];
2231 const unsigned NumParts = RegDWORDs / EltDWORDs;
2232
2233 return makeArrayRef(Parts.data(), NumParts);
2234}
2235
2236const TargetRegisterClass*
2237SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
2238 Register Reg) const {
2239 return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegClass(Reg);
2240}
2241
2242bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
2243 Register Reg) const {
2244 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
2245 // Registers without classes are unaddressable, SGPR-like registers.
2246 return RC && hasVGPRs(RC);
2247}
2248
2249bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI,
2250 Register Reg) const {
2251 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
2252
2253 // Registers without classes are unaddressable, SGPR-like registers.
2254 return RC && hasAGPRs(RC);
2255}
2256
2257bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
2258 const TargetRegisterClass *SrcRC,
2259 unsigned SubReg,
2260 const TargetRegisterClass *DstRC,
2261 unsigned DstSubReg,
2262 const TargetRegisterClass *NewRC,
2263 LiveIntervals &LIS) const {
2264 unsigned SrcSize = getRegSizeInBits(*SrcRC);
2265 unsigned DstSize = getRegSizeInBits(*DstRC);
2266 unsigned NewSize = getRegSizeInBits(*NewRC);
2267
2268 // Do not increase size of registers beyond dword, we would need to allocate
2269 // adjacent registers and constraint regalloc more than needed.
2270
2271 // Always allow dword coalescing.
2272 if (SrcSize <= 32 || DstSize <= 32)
2273 return true;
2274
2275 return NewSize <= DstSize || NewSize <= SrcSize;
2276}
2277
2278unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
2279 MachineFunction &MF) const {
2280 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
2281
2282 unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
2283 MF.getFunction());
2284 switch (RC->getID()) {
2285 default:
2286 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
2287 case AMDGPU::VGPR_32RegClassID:
2288 case AMDGPU::VGPR_LO16RegClassID:
2289 case AMDGPU::VGPR_HI16RegClassID:
2290 return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
2291 case AMDGPU::SGPR_32RegClassID:
2292 case AMDGPU::SGPR_LO16RegClassID:
2293 return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
2294 }
2295}
2296
2297unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
2298 unsigned Idx) const {
2299 if (Idx == AMDGPU::RegisterPressureSets::VGPR_32 ||
2300 Idx == AMDGPU::RegisterPressureSets::AGPR_32)
2301 return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
2302 const_cast<MachineFunction &>(MF));
2303
2304 if (Idx == AMDGPU::RegisterPressureSets::SReg_32)
2305 return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
2306 const_cast<MachineFunction &>(MF));
2307
2308 llvm_unreachable("Unexpected register pressure set!")::llvm::llvm_unreachable_internal("Unexpected register pressure set!"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2308)
;
2309}
2310
2311const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
2312 static const int Empty[] = { -1 };
2313
2314 if (RegPressureIgnoredUnits[RegUnit])
2315 return Empty;
2316
2317 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
2318}
2319
2320MCRegister SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const {
2321 // Not a callee saved register.
2322 return AMDGPU::SGPR30_SGPR31;
2323}
2324
2325const TargetRegisterClass *
2326SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
2327 const RegisterBank &RB,
2328 const MachineRegisterInfo &MRI) const {
2329 switch (RB.getID()) {
2330 case AMDGPU::VGPRRegBankID:
2331 return getVGPRClassForBitWidth(std::max(32u, Size));
2332 case AMDGPU::VCCRegBankID:
2333 assert(Size == 1)(static_cast <bool> (Size == 1) ? void (0) : __assert_fail
("Size == 1", "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2333, __extension__ __PRETTY_FUNCTION__))
;
2334 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
2335 : &AMDGPU::SReg_64_XEXECRegClass;
2336 case AMDGPU::SGPRRegBankID:
2337 return getSGPRClassForBitWidth(std::max(32u, Size));
2338 case AMDGPU::AGPRRegBankID:
2339 return getAGPRClassForBitWidth(std::max(32u, Size));
2340 default:
2341 llvm_unreachable("unknown register bank")::llvm::llvm_unreachable_internal("unknown register bank", "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2341)
;
2342 }
2343}
2344
2345const TargetRegisterClass *
2346SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
2347 const MachineRegisterInfo &MRI) const {
2348 const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg());
2349 if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank*>())
2350 return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI);
2351
2352 const TargetRegisterClass *RC = RCOrRB.get<const TargetRegisterClass*>();
2353 return getAllocatableClass(RC);
2354}
2355
2356MCRegister SIRegisterInfo::getVCC() const {
2357 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
2358}
2359
2360const TargetRegisterClass *SIRegisterInfo::getVGPR64Class() const {
2361 // VGPR tuples have an alignment requirement on gfx90a variants.
2362 return ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass
2363 : &AMDGPU::VReg_64RegClass;
2364}
2365
2366const TargetRegisterClass *
2367SIRegisterInfo::getRegClass(unsigned RCID) const {
2368 switch ((int)RCID) {
2369 case AMDGPU::SReg_1RegClassID:
2370 return getBoolRC();
2371 case AMDGPU::SReg_1_XEXECRegClassID:
2372 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
2373 : &AMDGPU::SReg_64_XEXECRegClass;
2374 case -1:
2375 return nullptr;
2376 default:
2377 return AMDGPUGenRegisterInfo::getRegClass(RCID);
2378 }
2379}
2380
2381// Find reaching register definition
2382MachineInstr *SIRegisterInfo::findReachingDef(Register Reg, unsigned SubReg,
2383 MachineInstr &Use,
2384 MachineRegisterInfo &MRI,
2385 LiveIntervals *LIS) const {
2386 auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
2387 SlotIndex UseIdx = LIS->getInstructionIndex(Use);
2388 SlotIndex DefIdx;
2389
2390 if (Reg.isVirtual()) {
2391 if (!LIS->hasInterval(Reg))
2392 return nullptr;
2393 LiveInterval &LI = LIS->getInterval(Reg);
2394 LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
2395 : MRI.getMaxLaneMaskForVReg(Reg);
2396 VNInfo *V = nullptr;
2397 if (LI.hasSubRanges()) {
2398 for (auto &S : LI.subranges()) {
2399 if ((S.LaneMask & SubLanes) == SubLanes) {
2400 V = S.getVNInfoAt(UseIdx);
2401 break;
2402 }
2403 }
2404 } else {
2405 V = LI.getVNInfoAt(UseIdx);
2406 }
2407 if (!V)
2408 return nullptr;
2409 DefIdx = V->def;
2410 } else {
2411 // Find last def.
2412 for (MCRegUnitIterator Units(Reg.asMCReg(), this); Units.isValid();
2413 ++Units) {
2414 LiveRange &LR = LIS->getRegUnit(*Units);
2415 if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
2416 if (!DefIdx.isValid() ||
2417 MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
2418 LIS->getInstructionFromIndex(V->def)))
2419 DefIdx = V->def;
2420 } else {
2421 return nullptr;
2422 }
2423 }
2424 }
2425
2426 MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
2427
2428 if (!Def || !MDT.dominates(Def, &Use))
2429 return nullptr;
2430
2431 assert(Def->modifiesRegister(Reg, this))(static_cast <bool> (Def->modifiesRegister(Reg, this
)) ? void (0) : __assert_fail ("Def->modifiesRegister(Reg, this)"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2431, __extension__ __PRETTY_FUNCTION__))
;
2432
2433 return Def;
2434}
2435
2436MCPhysReg SIRegisterInfo::get32BitRegister(MCPhysReg Reg) const {
2437 assert(getRegSizeInBits(*getPhysRegClass(Reg)) <= 32)(static_cast <bool> (getRegSizeInBits(*getPhysRegClass(
Reg)) <= 32) ? void (0) : __assert_fail ("getRegSizeInBits(*getPhysRegClass(Reg)) <= 32"
, "/build/llvm-toolchain-snapshot-13~++20210506100649+6304c0836a4d/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp"
, 2437, __extension__ __PRETTY_FUNCTION__))
;
2438
2439 for (const TargetRegisterClass &RC : { AMDGPU::VGPR_32RegClass,
2440 AMDGPU::SReg_32RegClass,
2441 AMDGPU::AGPR_32RegClass } ) {
2442 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
2443 return Super;
2444 }
2445 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
2446 &AMDGPU::VGPR_32RegClass)) {
2447 return Super;
2448 }
2449
2450 return AMDGPU::NoRegister;
2451}
2452
2453bool SIRegisterInfo::isProperlyAlignedRC(const TargetRegisterClass &RC) const {
2454 if (!ST.needsAlignedVGPRs())
2455 return true;
2456
2457 if (hasVGPRs(&RC))
2458 return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC)));
2459 if (hasAGPRs(&RC))
2460 return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC)));
2461
2462 return true;
2463}
2464
2465bool SIRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const {
2466 switch (PhysReg) {
2467 case AMDGPU::SGPR_NULL:
2468 case AMDGPU::SRC_SHARED_BASE:
2469 case AMDGPU::SRC_PRIVATE_BASE:
2470 case AMDGPU::SRC_SHARED_LIMIT:
2471 case AMDGPU::SRC_PRIVATE_LIMIT:
2472 return true;
2473 default:
2474 return false;
2475 }
2476}
2477
2478ArrayRef<MCPhysReg>
2479SIRegisterInfo::getAllSGPR128(const MachineFunction &MF) const {
2480 return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
2481 ST.getMaxNumSGPRs(MF) / 4);
2482}
2483
2484ArrayRef<MCPhysReg>
2485SIRegisterInfo::getAllSGPR64(const MachineFunction &MF) const {
2486 return makeArrayRef(AMDGPU::SGPR_64RegClass.begin(),
2487 ST.getMaxNumSGPRs(MF) / 2);
2488}
2489
2490ArrayRef<MCPhysReg>
2491SIRegisterInfo::getAllSGPR32(const MachineFunction &MF) const {
2492 return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF));
2493}

/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/stl_algobase.h

1// Core algorithmic facilities -*- C++ -*-
2
3// Copyright (C) 2001-2020 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/*
26 *
27 * Copyright (c) 1994
28 * Hewlett-Packard Company
29 *
30 * Permission to use, copy, modify, distribute and sell this software
31 * and its documentation for any purpose is hereby granted without fee,
32 * provided that the above copyright notice appear in all copies and
33 * that both that copyright notice and this permission notice appear
34 * in supporting documentation. Hewlett-Packard Company makes no
35 * representations about the suitability of this software for any
36 * purpose. It is provided "as is" without express or implied warranty.
37 *
38 *
39 * Copyright (c) 1996-1998
40 * Silicon Graphics Computer Systems, Inc.
41 *
42 * Permission to use, copy, modify, distribute and sell this software
43 * and its documentation for any purpose is hereby granted without fee,
44 * provided that the above copyright notice appear in all copies and
45 * that both that copyright notice and this permission notice appear
46 * in supporting documentation. Silicon Graphics makes no
47 * representations about the suitability of this software for any
48 * purpose. It is provided "as is" without express or implied warranty.
49 */
50
51/** @file bits/stl_algobase.h
52 * This is an internal header file, included by other library headers.
53 * Do not attempt to use it directly. @headername{algorithm}
54 */
55
56#ifndef _STL_ALGOBASE_H1
57#define _STL_ALGOBASE_H1 1
58
59#include <bits/c++config.h>
60#include <bits/functexcept.h>
61#include <bits/cpp_type_traits.h>
62#include <ext/type_traits.h>
63#include <ext/numeric_traits.h>
64#include <bits/stl_pair.h>
65#include <bits/stl_iterator_base_types.h>
66#include <bits/stl_iterator_base_funcs.h>
67#include <bits/stl_iterator.h>
68#include <bits/concept_check.h>
69#include <debug/debug.h>
70#include <bits/move.h> // For std::swap
71#include <bits/predefined_ops.h>
72#if __cplusplus201402L >= 201103L
73# include <type_traits>
74#endif
75#if __cplusplus201402L > 201703L
76# include <compare>
77#endif
78
79namespace std _GLIBCXX_VISIBILITY(default)__attribute__ ((__visibility__ ("default")))
80{
81_GLIBCXX_BEGIN_NAMESPACE_VERSION
82
83 /*
84 * A constexpr wrapper for __builtin_memcmp.
85 * @param __num The number of elements of type _Tp (not bytes).
86 */
87 template<typename _Tp, typename _Up>
88 _GLIBCXX14_CONSTEXPRconstexpr
89 inline int
90 __memcmp(const _Tp* __first1, const _Up* __first2, size_t __num)
91 {
92#if __cplusplus201402L >= 201103L
93 static_assert(sizeof(_Tp) == sizeof(_Up), "can be compared with memcmp");
94#endif
95#ifdef __cpp_lib_is_constant_evaluated
96 if (std::is_constant_evaluated())
97 {
98 for(; __num > 0; ++__first1, ++__first2, --__num)
99 if (*__first1 != *__first2)
100 return *__first1 < *__first2 ? -1 : 1;
101 return 0;
102 }
103 else
104#endif
105 return __builtin_memcmp(__first1, __first2, sizeof(_Tp) * __num);
106 }
107
108#if __cplusplus201402L < 201103L
109 // See http://gcc.gnu.org/ml/libstdc++/2004-08/msg00167.html: in a
110 // nutshell, we are partially implementing the resolution of DR 187,
111 // when it's safe, i.e., the value_types are equal.
112 template<bool _BoolType>
113 struct __iter_swap
114 {
115 template<typename _ForwardIterator1, typename _ForwardIterator2>
116 static void
117 iter_swap(_ForwardIterator1 __a, _ForwardIterator2 __b)
118 {
119 typedef typename iterator_traits<_ForwardIterator1>::value_type
120 _ValueType1;
121 _ValueType1 __tmp = *__a;
122 *__a = *__b;
123 *__b = __tmp;
124 }
125 };
126
127 template<>
128 struct __iter_swap<true>
129 {
130 template<typename _ForwardIterator1, typename _ForwardIterator2>
131 static void
132 iter_swap(_ForwardIterator1 __a, _ForwardIterator2 __b)
133 {
134 swap(*__a, *__b);
135 }
136 };
137#endif // C++03
138
139 /**
140 * @brief Swaps the contents of two iterators.
141 * @ingroup mutating_algorithms
142 * @param __a An iterator.
143 * @param __b Another iterator.
144 * @return Nothing.
145 *
146 * This function swaps the values pointed to by two iterators, not the
147 * iterators themselves.
148 */
149 template<typename _ForwardIterator1, typename _ForwardIterator2>
150 _GLIBCXX20_CONSTEXPR
151 inline void
152 iter_swap(_ForwardIterator1 __a, _ForwardIterator2 __b)
153 {
154 // concept requirements
155 __glibcxx_function_requires(_Mutable_ForwardIteratorConcept<
156 _ForwardIterator1>)
157 __glibcxx_function_requires(_Mutable_ForwardIteratorConcept<
158 _ForwardIterator2>)
159
160#if __cplusplus201402L < 201103L
161 typedef typename iterator_traits<_ForwardIterator1>::value_type
162 _ValueType1;
163 typedef typename iterator_traits<_ForwardIterator2>::value_type
164 _ValueType2;
165
166 __glibcxx_function_requires(_ConvertibleConcept<_ValueType1,
167 _ValueType2>)
168 __glibcxx_function_requires(_ConvertibleConcept<_ValueType2,
169 _ValueType1>)
170
171 typedef typename iterator_traits<_ForwardIterator1>::reference
172 _ReferenceType1;
173 typedef typename iterator_traits<_ForwardIterator2>::reference
174 _ReferenceType2;
175 std::__iter_swap<__are_same<_ValueType1, _ValueType2>::__value
176 && __are_same<_ValueType1&, _ReferenceType1>::__value
177 && __are_same<_ValueType2&, _ReferenceType2>::__value>::
178 iter_swap(__a, __b);
179#else
180 // _GLIBCXX_RESOLVE_LIB_DEFECTS
181 // 187. iter_swap underspecified
182 swap(*__a, *__b);
183#endif
184 }
185
186 /**
187 * @brief Swap the elements of two sequences.
188 * @ingroup mutating_algorithms
189 * @param __first1 A forward iterator.
190 * @param __last1 A forward iterator.
191 * @param __first2 A forward iterator.
192 * @return An iterator equal to @p first2+(last1-first1).
193 *
194 * Swaps each element in the range @p [first1,last1) with the
195 * corresponding element in the range @p [first2,(last1-first1)).
196 * The ranges must not overlap.
197 */
198 template<typename _ForwardIterator1, typename _ForwardIterator2>
199 _GLIBCXX20_CONSTEXPR
200 _ForwardIterator2
201 swap_ranges(_ForwardIterator1 __first1, _ForwardIterator1 __last1,
202 _ForwardIterator2 __first2)
203 {
204 // concept requirements
205 __glibcxx_function_requires(_Mutable_ForwardIteratorConcept<
206 _ForwardIterator1>)
207 __glibcxx_function_requires(_Mutable_ForwardIteratorConcept<
208 _ForwardIterator2>)
209 __glibcxx_requires_valid_range(__first1, __last1);
210
211 for (; __first1 != __last1; ++__first1, (void)++__first2)
212 std::iter_swap(__first1, __first2);
213 return __first2;
214 }
215
216 /**
217 * @brief This does what you think it does.
218 * @ingroup sorting_algorithms
219 * @param __a A thing of arbitrary type.
220 * @param __b Another thing of arbitrary type.
221 * @return The lesser of the parameters.
222 *
223 * This is the simple classic generic implementation. It will work on
224 * temporary expressions, since they are only evaluated once, unlike a
225 * preprocessor macro.
226 */
227 template<typename _Tp>
228 _GLIBCXX14_CONSTEXPRconstexpr
229 inline const _Tp&
230 min(const _Tp& __a, const _Tp& __b)
231 {
232 // concept requirements
233 __glibcxx_function_requires(_LessThanComparableConcept<_Tp>)
234 //return __b < __a ? __b : __a;
235 if (__b < __a)
20
Assuming '__b' is >= '__a'
21
Taking false branch
236 return __b;
237 return __a;
22
Returning the value 64 (reference to 'Data.PerVGPR')
238 }
239
240 /**
241 * @brief This does what you think it does.
242 * @ingroup sorting_algorithms
243 * @param __a A thing of arbitrary type.
244 * @param __b Another thing of arbitrary type.
245 * @return The greater of the parameters.
246 *
247 * This is the simple classic generic implementation. It will work on
248 * temporary expressions, since they are only evaluated once, unlike a
249 * preprocessor macro.
250 */
251 template<typename _Tp>
252 _GLIBCXX14_CONSTEXPRconstexpr
253 inline const _Tp&
254 max(const _Tp& __a, const _Tp& __b)
255 {
256 // concept requirements
257 __glibcxx_function_requires(_LessThanComparableConcept<_Tp>)
258 //return __a < __b ? __b : __a;
259 if (__a < __b)
260 return __b;
261 return __a;
262 }
263
264 /**
265 * @brief This does what you think it does.
266 * @ingroup sorting_algorithms
267 * @param __a A thing of arbitrary type.
268 * @param __b Another thing of arbitrary type.
269 * @param __comp A @link comparison_functors comparison functor@endlink.
270 * @return The lesser of the parameters.
271 *
272 * This will work on temporary expressions, since they are only evaluated
273 * once, unlike a preprocessor macro.
274 */
275 template<typename _Tp, typename _Compare>
276 _GLIBCXX14_CONSTEXPRconstexpr
277 inline const _Tp&
278 min(const _Tp& __a, const _Tp& __b, _Compare __comp)
279 {
280 //return __comp(__b, __a) ? __b : __a;
281 if (__comp(__b, __a))
282 return __b;
283 return __a;
284 }
285
286 /**
287 * @brief This does what you think it does.
288 * @ingroup sorting_algorithms
289 * @param __a A thing of arbitrary type.
290 * @param __b Another thing of arbitrary type.
291 * @param __comp A @link comparison_functors comparison functor@endlink.
292 * @return The greater of the parameters.
293 *
294 * This will work on temporary expressions, since they are only evaluated
295 * once, unlike a preprocessor macro.
296 */
297 template<typename _Tp, typename _Compare>
298 _GLIBCXX14_CONSTEXPRconstexpr
299 inline const _Tp&
300 max(const _Tp& __a, const _Tp& __b, _Compare __comp)
301 {
302 //return __comp(__a, __b) ? __b : __a;
303 if (__comp(__a, __b))
304 return __b;
305 return __a;
306 }
307
308 // Fallback implementation of the function in bits/stl_iterator.h used to
309 // remove the __normal_iterator wrapper. See copy, fill, ...
310 template<typename _Iterator>
311 _GLIBCXX20_CONSTEXPR
312 inline _Iterator
313 __niter_base(_Iterator __it)
314 _GLIBCXX_NOEXCEPT_IF(std::is_nothrow_copy_constructible<_Iterator>::value)noexcept(std::is_nothrow_copy_constructible<_Iterator>::
value)
315 { return __it; }
316
317 // Reverse the __niter_base transformation to get a
318 // __normal_iterator back again (this assumes that __normal_iterator
319 // is only used to wrap random access iterators, like pointers).
320 template<typename _From, typename _To>
321 _GLIBCXX20_CONSTEXPR
322 inline _From
323 __niter_wrap(_From __from, _To __res)
324 { return __from + (__res - std::__niter_base(__from)); }
325
326 // No need to wrap, iterator already has the right type.
327 template<typename _Iterator>
328 _GLIBCXX20_CONSTEXPR
329 inline _Iterator
330 __niter_wrap(const _Iterator&, _Iterator __res)
331 { return __res; }
332
333 // All of these auxiliary structs serve two purposes. (1) Replace
334 // calls to copy with memmove whenever possible. (Memmove, not memcpy,
335 // because the input and output ranges are permitted to overlap.)
336 // (2) If we're using random access iterators, then write the loop as
337 // a for loop with an explicit count.
338
339 template<bool _IsMove, bool _IsSimple, typename _Category>
340 struct __copy_move
341 {
342 template<typename _II, typename _OI>
343 _GLIBCXX20_CONSTEXPR
344 static _OI
345 __copy_m(_II __first, _II __last, _OI __result)
346 {
347 for (; __first != __last; ++__result, (void)++__first)
348 *__result = *__first;
349 return __result;
350 }
351 };
352
353#if __cplusplus201402L >= 201103L
354 template<typename _Category>
355 struct __copy_move<true, false, _Category>
356 {
357 template<typename _II, typename _OI>
358 _GLIBCXX20_CONSTEXPR
359 static _OI
360 __copy_m(_II __first, _II __last, _OI __result)
361 {
362 for (; __first != __last; ++__result, (void)++__first)
363 *__result = std::move(*__first);
364 return __result;
365 }
366 };
367#endif
368
369 template<>
370 struct __copy_move<false, false, random_access_iterator_tag>
371 {
372 template<typename _II, typename _OI>
373 _GLIBCXX20_CONSTEXPR
374 static _OI
375 __copy_m(_II __first, _II __last, _OI __result)
376 {
377 typedef typename iterator_traits<_II>::difference_type _Distance;
378 for(_Distance __n = __last - __first; __n > 0; --__n)
379 {
380 *__result = *__first;
381 ++__first;
382 ++__result;
383 }
384 return __result;
385 }
386 };
387
388#if __cplusplus201402L >= 201103L
389 template<>
390 struct __copy_move<true, false, random_access_iterator_tag>
391 {
392 template<typename _II, typename _OI>
393 _GLIBCXX20_CONSTEXPR
394 static _OI
395 __copy_m(_II __first, _II __last, _OI __result)
396 {
397 typedef typename iterator_traits<_II>::difference_type _Distance;
398 for(_Distance __n = __last - __first; __n > 0; --__n)
399 {
400 *__result = std::move(*__first);
401 ++__first;
402 ++__result;
403 }
404 return __result;
405 }
406 };
407#endif
408
409 template<bool _IsMove>
410 struct __copy_move<_IsMove, true, random_access_iterator_tag>
411 {
412 template<typename _Tp>
413 _GLIBCXX20_CONSTEXPR
414 static _Tp*
415 __copy_m(const _Tp* __first, const _Tp* __last, _Tp* __result)
416 {
417#if __cplusplus201402L >= 201103L
418 using __assignable = conditional<_IsMove,
419 is_move_assignable<_Tp>,
420 is_copy_assignable<_Tp>>;
421 // trivial types can have deleted assignment
422 static_assert( __assignable::type::value, "type is not assignable" );
423#endif
424 const ptrdiff_t _Num = __last - __first;
425 if (_Num)
426 __builtin_memmove(__result, __first, sizeof(_Tp) * _Num);
427 return __result + _Num;
428 }
429 };
430
431 // Helpers for streambuf iterators (either istream or ostream).
432 // NB: avoid including <iosfwd>, relatively large.
433 template<typename _CharT>
434 struct char_traits;
435
436 template<typename _CharT, typename _Traits>
437 class istreambuf_iterator;
438
439 template<typename _CharT, typename _Traits>
440 class ostreambuf_iterator;
441
442 template<bool _IsMove, typename _CharT>
443 typename __gnu_cxx::__enable_if<__is_char<_CharT>::__value,
444 ostreambuf_iterator<_CharT, char_traits<_CharT> > >::__type
445 __copy_move_a2(_CharT*, _CharT*,
446 ostreambuf_iterator<_CharT, char_traits<_CharT> >);
447
448 template<bool _IsMove, typename _CharT>
449 typename __gnu_cxx::__enable_if<__is_char<_CharT>::__value,
450 ostreambuf_iterator<_CharT, char_traits<_CharT> > >::__type
451 __copy_move_a2(const _CharT*, const _CharT*,
452 ostreambuf_iterator<_CharT, char_traits<_CharT> >);
453
454 template<bool _IsMove, typename _CharT>
455 typename __gnu_cxx::__enable_if<__is_char<_CharT>::__value,
456 _CharT*>::__type
457 __copy_move_a2(istreambuf_iterator<_CharT, char_traits<_CharT> >,
458 istreambuf_iterator<_CharT, char_traits<_CharT> >, _CharT*);
459
460 template<bool _IsMove, typename _II, typename _OI>
461 _GLIBCXX20_CONSTEXPR
462 inline _OI
463 __copy_move_a2(_II __first, _II __last, _OI __result)
464 {
465 typedef typename iterator_traits<_II>::iterator_category _Category;
466#ifdef __cpp_lib_is_constant_evaluated
467 if (std::is_constant_evaluated())
468 return std::__copy_move<_IsMove, false, _Category>::
469 __copy_m(__first, __last, __result);
470#endif
471 return std::__copy_move<_IsMove, __memcpyable<_OI, _II>::__value,
472 _Category>::__copy_m(__first, __last, __result);
473 }
474
475_GLIBCXX_BEGIN_NAMESPACE_CONTAINER
476
477 template<typename _Tp, typename _Ref, typename _Ptr>
478 struct _Deque_iterator;
479
480_GLIBCXX_END_NAMESPACE_CONTAINER
481
482 template<bool _IsMove,
483 typename _Tp, typename _Ref, typename _Ptr, typename _OI>
484 _OI
485 __copy_move_a1(_GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Ref, _Ptr>,
486 _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Ref, _Ptr>,
487 _OI);
488
489 template<bool _IsMove,
490 typename _ITp, typename _IRef, typename _IPtr, typename _OTp>
491 _GLIBCXX_STD_Cstd::_Deque_iterator<_OTp, _OTp&, _OTp*>
492 __copy_move_a1(_GLIBCXX_STD_Cstd::_Deque_iterator<_ITp, _IRef, _IPtr>,
493 _GLIBCXX_STD_Cstd::_Deque_iterator<_ITp, _IRef, _IPtr>,
494 _GLIBCXX_STD_Cstd::_Deque_iterator<_OTp, _OTp&, _OTp*>);
495
496 template<bool _IsMove, typename _II, typename _Tp>
497 typename __gnu_cxx::__enable_if<
498 __is_random_access_iter<_II>::__value,
499 _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Tp&, _Tp*> >::__type
500 __copy_move_a1(_II, _II, _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Tp&, _Tp*>);
501
502 template<bool _IsMove, typename _II, typename _OI>
503 _GLIBCXX20_CONSTEXPR
504 inline _OI
505 __copy_move_a1(_II __first, _II __last, _OI __result)
506 { return std::__copy_move_a2<_IsMove>(__first, __last, __result); }
507
508 template<bool _IsMove, typename _II, typename _OI>
509 _GLIBCXX20_CONSTEXPR
510 inline _OI
511 __copy_move_a(_II __first, _II __last, _OI __result)
512 {
513 return std::__niter_wrap(__result,
514 std::__copy_move_a1<_IsMove>(std::__niter_base(__first),
515 std::__niter_base(__last),
516 std::__niter_base(__result)));
517 }
518
519 template<bool _IsMove,
520 typename _Ite, typename _Seq, typename _Cat, typename _OI>
521 _OI
522 __copy_move_a(const ::__gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>&,
523 const ::__gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>&,
524 _OI);
525
526 template<bool _IsMove,
527 typename _II, typename _Ite, typename _Seq, typename _Cat>
528 __gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>
529 __copy_move_a(_II, _II,
530 const ::__gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>&);
531
532 template<bool _IsMove,
533 typename _IIte, typename _ISeq, typename _ICat,
534 typename _OIte, typename _OSeq, typename _OCat>
535 ::__gnu_debug::_Safe_iterator<_OIte, _OSeq, _OCat>
536 __copy_move_a(const ::__gnu_debug::_Safe_iterator<_IIte, _ISeq, _ICat>&,
537 const ::__gnu_debug::_Safe_iterator<_IIte, _ISeq, _ICat>&,
538 const ::__gnu_debug::_Safe_iterator<_OIte, _OSeq, _OCat>&);
539
540 /**
541 * @brief Copies the range [first,last) into result.
542 * @ingroup mutating_algorithms
543 * @param __first An input iterator.
544 * @param __last An input iterator.
545 * @param __result An output iterator.
546 * @return result + (last - first)
547 *
548 * This inline function will boil down to a call to @c memmove whenever
549 * possible. Failing that, if random access iterators are passed, then the
550 * loop count will be known (and therefore a candidate for compiler
551 * optimizations such as unrolling). Result may not be contained within
552 * [first,last); the copy_backward function should be used instead.
553 *
554 * Note that the end of the output range is permitted to be contained
555 * within [first,last).
556 */
557 template<typename _II, typename _OI>
558 _GLIBCXX20_CONSTEXPR
559 inline _OI
560 copy(_II __first, _II __last, _OI __result)
561 {
562 // concept requirements
563 __glibcxx_function_requires(_InputIteratorConcept<_II>)
564 __glibcxx_function_requires(_OutputIteratorConcept<_OI,
565 typename iterator_traits<_II>::value_type>)
566 __glibcxx_requires_can_increment_range(__first, __last, __result);
567
568 return std::__copy_move_a<__is_move_iterator<_II>::__value>
569 (std::__miter_base(__first), std::__miter_base(__last), __result);
570 }
571
572#if __cplusplus201402L >= 201103L
573 /**
574 * @brief Moves the range [first,last) into result.
575 * @ingroup mutating_algorithms
576 * @param __first An input iterator.
577 * @param __last An input iterator.
578 * @param __result An output iterator.
579 * @return result + (last - first)
580 *
581 * This inline function will boil down to a call to @c memmove whenever
582 * possible. Failing that, if random access iterators are passed, then the
583 * loop count will be known (and therefore a candidate for compiler
584 * optimizations such as unrolling). Result may not be contained within
585 * [first,last); the move_backward function should be used instead.
586 *
587 * Note that the end of the output range is permitted to be contained
588 * within [first,last).
589 */
590 template<typename _II, typename _OI>
591 _GLIBCXX20_CONSTEXPR
592 inline _OI
593 move(_II __first, _II __last, _OI __result)
594 {
595 // concept requirements
596 __glibcxx_function_requires(_InputIteratorConcept<_II>)
597 __glibcxx_function_requires(_OutputIteratorConcept<_OI,
598 typename iterator_traits<_II>::value_type>)
599 __glibcxx_requires_can_increment_range(__first, __last, __result);
600
601 return std::__copy_move_a<true>(std::__miter_base(__first),
602 std::__miter_base(__last), __result);
603 }
604
605#define _GLIBCXX_MOVE3(_Tp, _Up, _Vp)std::move(_Tp, _Up, _Vp) std::move(_Tp, _Up, _Vp)
606#else
607#define _GLIBCXX_MOVE3(_Tp, _Up, _Vp)std::move(_Tp, _Up, _Vp) std::copy(_Tp, _Up, _Vp)
608#endif
609
610 template<bool _IsMove, bool _IsSimple, typename _Category>
611 struct __copy_move_backward
612 {
613 template<typename _BI1, typename _BI2>
614 _GLIBCXX20_CONSTEXPR
615 static _BI2
616 __copy_move_b(_BI1 __first, _BI1 __last, _BI2 __result)
617 {
618 while (__first != __last)
619 *--__result = *--__last;
620 return __result;
621 }
622 };
623
624#if __cplusplus201402L >= 201103L
625 template<typename _Category>
626 struct __copy_move_backward<true, false, _Category>
627 {
628 template<typename _BI1, typename _BI2>
629 _GLIBCXX20_CONSTEXPR
630 static _BI2
631 __copy_move_b(_BI1 __first, _BI1 __last, _BI2 __result)
632 {
633 while (__first != __last)
634 *--__result = std::move(*--__last);
635 return __result;
636 }
637 };
638#endif
639
640 template<>
641 struct __copy_move_backward<false, false, random_access_iterator_tag>
642 {
643 template<typename _BI1, typename _BI2>
644 _GLIBCXX20_CONSTEXPR
645 static _BI2
646 __copy_move_b(_BI1 __first, _BI1 __last, _BI2 __result)
647 {
648 typename iterator_traits<_BI1>::difference_type
649 __n = __last - __first;
650 for (; __n > 0; --__n)
651 *--__result = *--__last;
652 return __result;
653 }
654 };
655
656#if __cplusplus201402L >= 201103L
657 template<>
658 struct __copy_move_backward<true, false, random_access_iterator_tag>
659 {
660 template<typename _BI1, typename _BI2>
661 _GLIBCXX20_CONSTEXPR
662 static _BI2
663 __copy_move_b(_BI1 __first, _BI1 __last, _BI2 __result)
664 {
665 typename iterator_traits<_BI1>::difference_type
666 __n = __last - __first;
667 for (; __n > 0; --__n)
668 *--__result = std::move(*--__last);
669 return __result;
670 }
671 };
672#endif
673
674 template<bool _IsMove>
675 struct __copy_move_backward<_IsMove, true, random_access_iterator_tag>
676 {
677 template<typename _Tp>
678 _GLIBCXX20_CONSTEXPR
679 static _Tp*
680 __copy_move_b(const _Tp* __first, const _Tp* __last, _Tp* __result)
681 {
682#if __cplusplus201402L >= 201103L
683 using __assignable = conditional<_IsMove,
684 is_move_assignable<_Tp>,
685 is_copy_assignable<_Tp>>;
686 // trivial types can have deleted assignment
687 static_assert( __assignable::type::value, "type is not assignable" );
688#endif
689 const ptrdiff_t _Num = __last - __first;
690 if (_Num)
691 __builtin_memmove(__result - _Num, __first, sizeof(_Tp) * _Num);
692 return __result - _Num;
693 }
694 };
695
696 template<bool _IsMove, typename _BI1, typename _BI2>
697 _GLIBCXX20_CONSTEXPR
698 inline _BI2
699 __copy_move_backward_a2(_BI1 __first, _BI1 __last, _BI2 __result)
700 {
701 typedef typename iterator_traits<_BI1>::iterator_category _Category;
702#ifdef __cpp_lib_is_constant_evaluated
703 if (std::is_constant_evaluated())
704 return std::__copy_move_backward<_IsMove, false, _Category>::
705 __copy_move_b(__first, __last, __result);
706#endif
707 return std::__copy_move_backward<_IsMove,
708 __memcpyable<_BI2, _BI1>::__value,
709 _Category>::__copy_move_b(__first,
710 __last,
711 __result);
712 }
713
714 template<bool _IsMove, typename _BI1, typename _BI2>
715 _GLIBCXX20_CONSTEXPR
716 inline _BI2
717 __copy_move_backward_a1(_BI1 __first, _BI1 __last, _BI2 __result)
718 { return std::__copy_move_backward_a2<_IsMove>(__first, __last, __result); }
719
720 template<bool _IsMove,
721 typename _Tp, typename _Ref, typename _Ptr, typename _OI>
722 _OI
723 __copy_move_backward_a1(_GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Ref, _Ptr>,
724 _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Ref, _Ptr>,
725 _OI);
726
727 template<bool _IsMove,
728 typename _ITp, typename _IRef, typename _IPtr, typename _OTp>
729 _GLIBCXX_STD_Cstd::_Deque_iterator<_OTp, _OTp&, _OTp*>
730 __copy_move_backward_a1(
731 _GLIBCXX_STD_Cstd::_Deque_iterator<_ITp, _IRef, _IPtr>,
732 _GLIBCXX_STD_Cstd::_Deque_iterator<_ITp, _IRef, _IPtr>,
733 _GLIBCXX_STD_Cstd::_Deque_iterator<_OTp, _OTp&, _OTp*>);
734
735 template<bool _IsMove, typename _II, typename _Tp>
736 typename __gnu_cxx::__enable_if<
737 __is_random_access_iter<_II>::__value,
738 _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Tp&, _Tp*> >::__type
739 __copy_move_backward_a1(_II, _II,
740 _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Tp&, _Tp*>);
741
742 template<bool _IsMove, typename _II, typename _OI>
743 _GLIBCXX20_CONSTEXPR
744 inline _OI
745 __copy_move_backward_a(_II __first, _II __last, _OI __result)
746 {
747 return std::__niter_wrap(__result,
748 std::__copy_move_backward_a1<_IsMove>
749 (std::__niter_base(__first), std::__niter_base(__last),
750 std::__niter_base(__result)));
751 }
752
753 template<bool _IsMove,
754 typename _Ite, typename _Seq, typename _Cat, typename _OI>
755 _OI
756 __copy_move_backward_a(
757 const ::__gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>&,
758 const ::__gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>&,
759 _OI);
760
761 template<bool _IsMove,
762 typename _II, typename _Ite, typename _Seq, typename _Cat>
763 __gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>
764 __copy_move_backward_a(_II, _II,
765 const ::__gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>&);
766
767 template<bool _IsMove,
768 typename _IIte, typename _ISeq, typename _ICat,
769 typename _OIte, typename _OSeq, typename _OCat>
770 ::__gnu_debug::_Safe_iterator<_OIte, _OSeq, _OCat>
771 __copy_move_backward_a(
772 const ::__gnu_debug::_Safe_iterator<_IIte, _ISeq, _ICat>&,
773 const ::__gnu_debug::_Safe_iterator<_IIte, _ISeq, _ICat>&,
774 const ::__gnu_debug::_Safe_iterator<_OIte, _OSeq, _OCat>&);
775
776 /**
777 * @brief Copies the range [first,last) into result.
778 * @ingroup mutating_algorithms
779 * @param __first A bidirectional iterator.
780 * @param __last A bidirectional iterator.
781 * @param __result A bidirectional iterator.
782 * @return result - (last - first)
783 *
784 * The function has the same effect as copy, but starts at the end of the
785 * range and works its way to the start, returning the start of the result.
786 * This inline function will boil down to a call to @c memmove whenever
787 * possible. Failing that, if random access iterators are passed, then the
788 * loop count will be known (and therefore a candidate for compiler
789 * optimizations such as unrolling).
790 *
791 * Result may not be in the range (first,last]. Use copy instead. Note
792 * that the start of the output range may overlap [first,last).
793 */
794 template<typename _BI1, typename _BI2>
795 _GLIBCXX20_CONSTEXPR
796 inline _BI2
797 copy_backward(_BI1 __first, _BI1 __last, _BI2 __result)
798 {
799 // concept requirements
800 __glibcxx_function_requires(_BidirectionalIteratorConcept<_BI1>)
801 __glibcxx_function_requires(_Mutable_BidirectionalIteratorConcept<_BI2>)
802 __glibcxx_function_requires(_ConvertibleConcept<
803 typename iterator_traits<_BI1>::value_type,
804 typename iterator_traits<_BI2>::value_type>)
805 __glibcxx_requires_can_decrement_range(__first, __last, __result);
806
807 return std::__copy_move_backward_a<__is_move_iterator<_BI1>::__value>
808 (std::__miter_base(__first), std::__miter_base(__last), __result);
809 }
810
811#if __cplusplus201402L >= 201103L
812 /**
813 * @brief Moves the range [first,last) into result.
814 * @ingroup mutating_algorithms
815 * @param __first A bidirectional iterator.
816 * @param __last A bidirectional iterator.
817 * @param __result A bidirectional iterator.
818 * @return result - (last - first)
819 *
820 * The function has the same effect as move, but starts at the end of the
821 * range and works its way to the start, returning the start of the result.
822 * This inline function will boil down to a call to @c memmove whenever
823 * possible. Failing that, if random access iterators are passed, then the
824 * loop count will be known (and therefore a candidate for compiler
825 * optimizations such as unrolling).
826 *
827 * Result may not be in the range (first,last]. Use move instead. Note
828 * that the start of the output range may overlap [first,last).
829 */
830 template<typename _BI1, typename _BI2>
831 _GLIBCXX20_CONSTEXPR
832 inline _BI2
833 move_backward(_BI1 __first, _BI1 __last, _BI2 __result)
834 {
835 // concept requirements
836 __glibcxx_function_requires(_BidirectionalIteratorConcept<_BI1>)
837 __glibcxx_function_requires(_Mutable_BidirectionalIteratorConcept<_BI2>)
838 __glibcxx_function_requires(_ConvertibleConcept<
839 typename iterator_traits<_BI1>::value_type,
840 typename iterator_traits<_BI2>::value_type>)
841 __glibcxx_requires_can_decrement_range(__first, __last, __result);
842
843 return std::__copy_move_backward_a<true>(std::__miter_base(__first),
844 std::__miter_base(__last),
845 __result);
846 }
847
848#define _GLIBCXX_MOVE_BACKWARD3(_Tp, _Up, _Vp)std::move_backward(_Tp, _Up, _Vp) std::move_backward(_Tp, _Up, _Vp)
849#else
850#define _GLIBCXX_MOVE_BACKWARD3(_Tp, _Up, _Vp)std::move_backward(_Tp, _Up, _Vp) std::copy_backward(_Tp, _Up, _Vp)
851#endif
852
853 template<typename _ForwardIterator, typename _Tp>
854 _GLIBCXX20_CONSTEXPR
855 inline typename
856 __gnu_cxx::__enable_if<!__is_scalar<_Tp>::__value, void>::__type
857 __fill_a1(_ForwardIterator __first, _ForwardIterator __last,
858 const _Tp& __value)
859 {
860 for (; __first != __last; ++__first)
861 *__first = __value;
862 }
863
864 template<typename _ForwardIterator, typename _Tp>
865 _GLIBCXX20_CONSTEXPR
866 inline typename
867 __gnu_cxx::__enable_if<__is_scalar<_Tp>::__value, void>::__type
868 __fill_a1(_ForwardIterator __first, _ForwardIterator __last,
869 const _Tp& __value)
870 {
871 const _Tp __tmp = __value;
872 for (; __first != __last; ++__first)
873 *__first = __tmp;
874 }
875
876 // Specialization: for char types we can use memset.
877 template<typename _Tp>
878 _GLIBCXX20_CONSTEXPR
879 inline typename
880 __gnu_cxx::__enable_if<__is_byte<_Tp>::__value, void>::__type
881 __fill_a1(_Tp* __first, _Tp* __last, const _Tp& __c)
882 {
883 const _Tp __tmp = __c;
884#if __cpp_lib_is_constant_evaluated
885 if (std::is_constant_evaluated())
886 {
887 for (; __first != __last; ++__first)
888 *__first = __tmp;
889 return;
890 }
891#endif
892 if (const size_t __len = __last - __first)
893 __builtin_memset(__first, static_cast<unsigned char>(__tmp), __len);
894 }
895
896 template<typename _Ite, typename _Cont, typename _Tp>
897 _GLIBCXX20_CONSTEXPR
898 inline void
899 __fill_a1(::__gnu_cxx::__normal_iterator<_Ite, _Cont> __first,
900 ::__gnu_cxx::__normal_iterator<_Ite, _Cont> __last,
901 const _Tp& __value)
902 { std::__fill_a1(__first.base(), __last.base(), __value); }
903
904 template<typename _Tp, typename _VTp>
905 void
906 __fill_a1(const _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Tp&, _Tp*>&,
907 const _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Tp&, _Tp*>&,
908 const _VTp&);
909
910 template<typename _FIte, typename _Tp>
911 _GLIBCXX20_CONSTEXPR
912 inline void
913 __fill_a(_FIte __first, _FIte __last, const _Tp& __value)
914 { std::__fill_a1(__first, __last, __value); }
915
916 template<typename _Ite, typename _Seq, typename _Cat, typename _Tp>
917 void
918 __fill_a(const ::__gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>&,
919 const ::__gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>&,
920 const _Tp&);
921
922 /**
923 * @brief Fills the range [first,last) with copies of value.
924 * @ingroup mutating_algorithms
925 * @param __first A forward iterator.
926 * @param __last A forward iterator.
927 * @param __value A reference-to-const of arbitrary type.
928 * @return Nothing.
929 *
930 * This function fills a range with copies of the same value. For char
931 * types filling contiguous areas of memory, this becomes an inline call
932 * to @c memset or @c wmemset.
933 */
934 template<typename _ForwardIterator, typename _Tp>
935 _GLIBCXX20_CONSTEXPR
936 inline void
937 fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value)
938 {
939 // concept requirements
940 __glibcxx_function_requires(_Mutable_ForwardIteratorConcept<
941 _ForwardIterator>)
942 __glibcxx_requires_valid_range(__first, __last);
943
944 std::__fill_a(__first, __last, __value);
945 }
946
947 // Used by fill_n, generate_n, etc. to convert _Size to an integral type:
948 inline _GLIBCXX_CONSTEXPRconstexpr int
949 __size_to_integer(int __n) { return __n; }
950 inline _GLIBCXX_CONSTEXPRconstexpr unsigned
951 __size_to_integer(unsigned __n) { return __n; }
952 inline _GLIBCXX_CONSTEXPRconstexpr long
953 __size_to_integer(long __n) { return __n; }
954 inline _GLIBCXX_CONSTEXPRconstexpr unsigned long
955 __size_to_integer(unsigned long __n) { return __n; }
956 inline _GLIBCXX_CONSTEXPRconstexpr long long
957 __size_to_integer(long long __n) { return __n; }
958 inline _GLIBCXX_CONSTEXPRconstexpr unsigned long long
959 __size_to_integer(unsigned long long __n) { return __n; }
960
961#if defined(__GLIBCXX_TYPE_INT_N_0)
962 inline _GLIBCXX_CONSTEXPRconstexpr __GLIBCXX_TYPE_INT_N_0
963 __size_to_integer(__GLIBCXX_TYPE_INT_N_0 __n) { return __n; }
964 inline _GLIBCXX_CONSTEXPRconstexpr unsigned __GLIBCXX_TYPE_INT_N_0
965 __size_to_integer(unsigned __GLIBCXX_TYPE_INT_N_0 __n) { return __n; }
966#endif
967#if defined(__GLIBCXX_TYPE_INT_N_1)
968 inline _GLIBCXX_CONSTEXPRconstexpr __GLIBCXX_TYPE_INT_N_1
969 __size_to_integer(__GLIBCXX_TYPE_INT_N_1 __n) { return __n; }
970 inline _GLIBCXX_CONSTEXPRconstexpr unsigned __GLIBCXX_TYPE_INT_N_1
971 __size_to_integer(unsigned __GLIBCXX_TYPE_INT_N_1 __n) { return __n; }
972#endif
973#if defined(__GLIBCXX_TYPE_INT_N_2)
974 inline _GLIBCXX_CONSTEXPRconstexpr __GLIBCXX_TYPE_INT_N_2
975 __size_to_integer(__GLIBCXX_TYPE_INT_N_2 __n) { return __n; }
976 inline _GLIBCXX_CONSTEXPRconstexpr unsigned __GLIBCXX_TYPE_INT_N_2
977 __size_to_integer(unsigned __GLIBCXX_TYPE_INT_N_2 __n) { return __n; }
978#endif
979#if defined(__GLIBCXX_TYPE_INT_N_3)
980 inline _GLIBCXX_CONSTEXPRconstexpr unsigned __GLIBCXX_TYPE_INT_N_3
981 __size_to_integer(__GLIBCXX_TYPE_INT_N_3 __n) { return __n; }
982 inline _GLIBCXX_CONSTEXPRconstexpr __GLIBCXX_TYPE_INT_N_3
983 __size_to_integer(unsigned __GLIBCXX_TYPE_INT_N_3 __n) { return __n; }
984#endif
985
986 inline _GLIBCXX_CONSTEXPRconstexpr long long
987 __size_to_integer(float __n) { return __n; }
988 inline _GLIBCXX_CONSTEXPRconstexpr long long
989 __size_to_integer(double __n) { return __n; }
990 inline _GLIBCXX_CONSTEXPRconstexpr long long
991 __size_to_integer(long double __n) { return __n; }
992#if !defined(__STRICT_ANSI__1) && defined(_GLIBCXX_USE_FLOAT1281) && !defined(__CUDACC__)
993 inline _GLIBCXX_CONSTEXPRconstexpr long long
994 __size_to_integer(__float128 __n) { return __n; }
995#endif
996
997 template<typename _OutputIterator, typename _Size, typename _Tp>
998 _GLIBCXX20_CONSTEXPR
999 inline typename
1000 __gnu_cxx::__enable_if<!__is_scalar<_Tp>::__value, _OutputIterator>::__type
1001 __fill_n_a1(_OutputIterator __first, _Size __n, const _Tp& __value)
1002 {
1003 for (; __n > 0; --__n, (void) ++__first)
1004 *__first = __value;
1005 return __first;
1006 }
1007
1008 template<typename _OutputIterator, typename _Size, typename _Tp>
1009 _GLIBCXX20_CONSTEXPR
1010 inline typename
1011 __gnu_cxx::__enable_if<__is_scalar<_Tp>::__value, _OutputIterator>::__type
1012 __fill_n_a1(_OutputIterator __first, _Size __n, const _Tp& __value)
1013 {
1014 const _Tp __tmp = __value;
1015 for (; __n > 0; --__n, (void) ++__first)
1016 *__first = __tmp;
1017 return __first;
1018 }
1019
1020 template<typename _Ite, typename _Seq, typename _Cat, typename _Size,
1021 typename _Tp>
1022 ::__gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>
1023 __fill_n_a(const ::__gnu_debug::_Safe_iterator<_Ite, _Seq, _Cat>& __first,
1024 _Size __n, const _Tp& __value,
1025 std::input_iterator_tag);
1026
1027 template<typename _OutputIterator, typename _Size, typename _Tp>
1028 _GLIBCXX20_CONSTEXPR
1029 inline _OutputIterator
1030 __fill_n_a(_OutputIterator __first, _Size __n, const _Tp& __value,
1031 std::output_iterator_tag)
1032 {
1033#if __cplusplus201402L >= 201103L
1034 static_assert(is_integral<_Size>{}, "fill_n must pass integral size");
1035#endif
1036 return __fill_n_a1(__first, __n, __value);
1037 }
1038
1039 template<typename _OutputIterator, typename _Size, typename _Tp>
1040 _GLIBCXX20_CONSTEXPR
1041 inline _OutputIterator
1042 __fill_n_a(_OutputIterator __first, _Size __n, const _Tp& __value,
1043 std::input_iterator_tag)
1044 {
1045#if __cplusplus201402L >= 201103L
1046 static_assert(is_integral<_Size>{}, "fill_n must pass integral size");
1047#endif
1048 return __fill_n_a1(__first, __n, __value);
1049 }
1050
1051 template<typename _OutputIterator, typename _Size, typename _Tp>
1052 _GLIBCXX20_CONSTEXPR
1053 inline _OutputIterator
1054 __fill_n_a(_OutputIterator __first, _Size __n, const _Tp& __value,
1055 std::random_access_iterator_tag)
1056 {
1057#if __cplusplus201402L >= 201103L
1058 static_assert(is_integral<_Size>{}, "fill_n must pass integral size");
1059#endif
1060 if (__n <= 0)
1061 return __first;
1062
1063 __glibcxx_requires_can_increment(__first, __n);
1064
1065 std::__fill_a(__first, __first + __n, __value);
1066 return __first + __n;
1067 }
1068
1069 /**
1070 * @brief Fills the range [first,first+n) with copies of value.
1071 * @ingroup mutating_algorithms
1072 * @param __first An output iterator.
1073 * @param __n The count of copies to perform.
1074 * @param __value A reference-to-const of arbitrary type.
1075 * @return The iterator at first+n.
1076 *
1077 * This function fills a range with copies of the same value. For char
1078 * types filling contiguous areas of memory, this becomes an inline call
1079 * to @c memset or @c wmemset.
1080 *
1081 * If @p __n is negative, the function does nothing.
1082 */
1083 // _GLIBCXX_RESOLVE_LIB_DEFECTS
1084 // DR 865. More algorithms that throw away information
1085 // DR 426. search_n(), fill_n(), and generate_n() with negative n
1086 template<typename _OI, typename _Size, typename _Tp>
1087 _GLIBCXX20_CONSTEXPR
1088 inline _OI
1089 fill_n(_OI __first, _Size __n, const _Tp& __value)
1090 {
1091 // concept requirements
1092 __glibcxx_function_requires(_OutputIteratorConcept<_OI, _Tp>)
1093
1094 return std::__fill_n_a(__first, std::__size_to_integer(__n), __value,
1095 std::__iterator_category(__first));
1096 }
1097
1098 template<bool _BoolType>
1099 struct __equal
1100 {
1101 template<typename _II1, typename _II2>
1102 _GLIBCXX20_CONSTEXPR
1103 static bool
1104 equal(_II1 __first1, _II1 __last1, _II2 __first2)
1105 {
1106 for (; __first1 != __last1; ++__first1, (void) ++__first2)
1107 if (!(*__first1 == *__first2))
1108 return false;
1109 return true;
1110 }
1111 };
1112
1113 template<>
1114 struct __equal<true>
1115 {
1116 template<typename _Tp>
1117 _GLIBCXX20_CONSTEXPR
1118 static bool
1119 equal(const _Tp* __first1, const _Tp* __last1, const _Tp* __first2)
1120 {
1121 if (const size_t __len = (__last1 - __first1))
1122 return !std::__memcmp(__first1, __first2, __len);
1123 return true;
1124 }
1125 };
1126
1127 template<typename _Tp, typename _Ref, typename _Ptr, typename _II>
1128 typename __gnu_cxx::__enable_if<
1129 __is_random_access_iter<_II>::__value, bool>::__type
1130 __equal_aux1(_GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Ref, _Ptr>,
1131 _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Ref, _Ptr>,
1132 _II);
1133
1134 template<typename _Tp1, typename _Ref1, typename _Ptr1,
1135 typename _Tp2, typename _Ref2, typename _Ptr2>
1136 bool
1137 __equal_aux1(_GLIBCXX_STD_Cstd::_Deque_iterator<_Tp1, _Ref1, _Ptr1>,
1138 _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp1, _Ref1, _Ptr1>,
1139 _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp2, _Ref2, _Ptr2>);
1140
1141 template<typename _II, typename _Tp, typename _Ref, typename _Ptr>
1142 typename __gnu_cxx::__enable_if<
1143 __is_random_access_iter<_II>::__value, bool>::__type
1144 __equal_aux1(_II, _II,
1145 _GLIBCXX_STD_Cstd::_Deque_iterator<_Tp, _Ref, _Ptr>);
1146
1147 template<typename _II1, typename _II2>
1148 _GLIBCXX20_CONSTEXPR
1149 inline bool
1150 __equal_aux1(_II1 __first1, _II1 __last1, _II2 __first2)
1151 {
1152 typedef typename iterator_traits<_II1>::value_type _ValueType1;
1153 const bool __simple = ((__is_integer<_ValueType1>::__value
1154 || __is_pointer<_ValueType1>::__value)
1155 && __memcmpable<_II1, _II2>::__value);
1156 return std::__equal<__simple>::equal(__first1, __last1, __first2);
1157 }
1158
1159 template<typename _II1, typename _II2>
1160 _GLIBCXX20_CONSTEXPR
1161 inline bool
1162 __equal_aux(_II1 __first1, _II1 __last1, _II2 __first2)
1163 {
1164 return std::__equal_aux1(std::__niter_base(__first1),
1165 std::__niter_base(__last1),
1166 std::__niter_base(__first2));
1167 }
1168
1169 template<typename _II1, typename _Seq1, typename _Cat1, typename _II2>
1170 bool
1171 __equal_aux(const ::__gnu_debug::_Safe_iterator<_II1, _Seq1, _Cat1>&,
1172 const ::__gnu_debug::_Safe_iterator<_II1, _Seq1, _Cat1>&,
1173 _II2);
1174
1175 template<typename _II1, typename _II2, typename _Seq2, typename _Cat2>
1176 bool
1177 __equal_aux(_II1, _II1,
1178 const ::__gnu_debug::_Safe_iterator<_II2, _Seq2, _Cat2>&);
1179
1180 template<typename _II1, typename _Seq1, typename _Cat1,
1181 typename _II2, typename _Seq2, typename _Cat2>
1182 bool
1183 __equal_aux(const ::__gnu_debug::_Safe_iterator<_II1, _Seq1, _Cat1>&,
1184 const ::__gnu_debug::_Safe_iterator<_II1, _Seq1, _Cat1>&,
1185 const ::__gnu_debug::_Safe_iterator<_II2, _Seq2, _Cat2>&);
1186
1187 template<typename, typename>
1188 struct __lc_rai
1189 {
1190 template<typename _II1, typename _II2>
1191 _GLIBCXX20_CONSTEXPR
1192 static _II1
1193 __newlast1(_II1, _II1 __last1, _II2, _II2)
1194 { return __last1; }
1195
1196 template<typename _II>
1197 _GLIBCXX20_CONSTEXPR
1198 static bool
1199 __cnd2(_II __first, _II __last)
1200 { return __first != __last; }
1201 };
1202
1203 template<>
1204 struct __lc_rai<random_access_iterator_tag, random_access_iterator_tag>
1205 {
1206 template<typename _RAI1, typename _RAI2>
1207 _GLIBCXX20_CONSTEXPR
1208 static _RAI1
1209 __newlast1(_RAI1 __first1, _RAI1 __last1,
1210 _RAI2 __first2, _RAI2 __last2)
1211 {
1212 const typename iterator_traits<_RAI1>::difference_type
1213 __diff1 = __last1 - __first1;
1214 const typename iterator_traits<_RAI2>::difference_type
1215 __diff2 = __last2 - __first2;
1216 return __diff2 < __diff1 ? __first1 + __diff2 : __last1;
1217 }
1218
1219 template<typename _RAI>
1220 static _GLIBCXX20_CONSTEXPR bool
1221 __cnd2(_RAI, _RAI)
1222 { return true; }
1223 };
1224
1225 template<typename _II1, typename _II2, typename _Compare>
1226 _GLIBCXX20_CONSTEXPR
1227 bool
1228 __lexicographical_compare_impl(_II1 __first1, _II1 __last1,
1229 _II2 __first2, _II2 __last2,
1230 _Compare __comp)
1231 {
1232 typedef typename iterator_traits<_II1>::iterator_category _Category1;
1233 typedef typename iterator_traits<_II2>::iterator_category _Category2;
1234 typedef std::__lc_rai<_Category1, _Category2> __rai_type;
1235
1236 __last1 = __rai_type::__newlast1(__first1, __last1, __first2, __last2);
1237 for (; __first1 != __last1 && __rai_type::__cnd2(__first2, __last2);
1238 ++__first1, (void)++__first2)
1239 {
1240 if (__comp(__first1, __first2))
1241 return true;
1242 if (__comp(__first2, __first1))
1243 return false;
1244 }
1245 return __first1 == __last1 && __first2 != __last2;
1246 }
1247
1248 template<bool _BoolType>
1249 struct __lexicographical_compare
1250 {
1251 template<typename _II1, typename _II2>
1252 _GLIBCXX20_CONSTEXPR
1253 static bool
1254 __lc(_II1 __first1, _II1 __last1, _II2 __first2, _II2 __last2)
1255 {
1256 using __gnu_cxx::__ops::__iter_less_iter;
1257 return std::__lexicographical_compare_impl(__first1, __last1,
1258 __first2, __last2,
1259 __iter_less_iter());
1260 }
1261 };
1262
1263 template<>
1264 struct __lexicographical_compare<true>
1265 {
1266 template<typename _Tp, typename _Up>
1267 _GLIBCXX20_CONSTEXPR
1268 static bool
1269 __lc(const _Tp* __first1, const _Tp* __last1,
1270 const _Up* __first2, const _Up* __last2)
1271 {
1272 const size_t __len1 = __last1 - __first1;
1273 const size_t __len2 = __last2 - __first2;
1274 if (const size_t __len = std::min(__len1, __len2))
1275 if (int __result = std::__memcmp(__first1, __first2, __len))
1276 return __result < 0;
1277 return __len1 < __len2;
1278 }
1279 };
1280
1281 template<typename _II1, typename _II2>
1282 _GLIBCXX20_CONSTEXPR
1283 inline bool
1284 __lexicographical_compare_aux(_II1 __first1, _II1 __last1,
1285 _II2 __first2, _II2 __last2)
1286 {
1287 typedef typename iterator_traits<_II1>::value_type _ValueType1;
1288 typedef typename iterator_traits<_II2>::value_type _ValueType2;
1289 const bool __simple =
1290 (__is_memcmp_ordered_with<_ValueType1, _ValueType2>::__value
1291 && __is_pointer<_II1>::__value
1292 && __is_pointer<_II2>::__value
1293#if __cplusplus201402L > 201703L && __cpp_lib_concepts
1294 // For C++20 iterator_traits<volatile T*>::value_type is non-volatile
1295 // so __is_byte<T> could be true, but we can't use memcmp with
1296 // volatile data.
1297 && !is_volatile_v<remove_reference_t<iter_reference_t<_II1>>>
1298 && !is_volatile_v<remove_reference_t<iter_reference_t<_II2>>>
1299#endif
1300 );
1301
1302 return std::__lexicographical_compare<__simple>::__lc(__first1, __last1,
1303 __first2, __last2);
1304 }
1305
1306 template<typename _ForwardIterator, typename _Tp, typename _Compare>
1307 _GLIBCXX20_CONSTEXPR
1308 _ForwardIterator
1309 __lower_bound(_ForwardIterator __first, _ForwardIterator __last,
1310 const _Tp& __val, _Compare __comp)
1311 {
1312 typedef typename iterator_traits<_ForwardIterator>::difference_type
1313 _DistanceType;
1314
1315 _DistanceType __len = std::distance(__first, __last);
1316
1317 while (__len > 0)
1318 {
1319 _DistanceType __half = __len >> 1;
1320 _ForwardIterator __middle = __first;
1321 std::advance(__middle, __half);
1322 if (__comp(__middle, __val))
1323 {
1324 __first = __middle;
1325 ++__first;
1326 __len = __len - __half - 1;
1327 }
1328 else
1329 __len = __half;
1330 }
1331 return __first;
1332 }
1333
1334 /**
1335 * @brief Finds the first position in which @a val could be inserted
1336 * without changing the ordering.
1337 * @param __first An iterator.
1338 * @param __last Another iterator.
1339 * @param __val The search term.
1340 * @return An iterator pointing to the first element <em>not less
1341 * than</em> @a val, or end() if every element is less than
1342 * @a val.
1343 * @ingroup binary_search_algorithms
1344 */
1345 template<typename _ForwardIterator, typename _Tp>
1346 _GLIBCXX20_CONSTEXPR
1347 inline _ForwardIterator
1348 lower_bound(_ForwardIterator __first, _ForwardIterator __last,
1349 const _Tp& __val)
1350 {
1351 // concept requirements
1352 __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator>)
1353 __glibcxx_function_requires(_LessThanOpConcept<
1354 typename iterator_traits<_ForwardIterator>::value_type, _Tp>)
1355 __glibcxx_requires_partitioned_lower(__first, __last, __val);
1356
1357 return std::__lower_bound(__first, __last, __val,
1358 __gnu_cxx::__ops::__iter_less_val());
1359 }
1360
1361 /// This is a helper function for the sort routines and for random.tcc.
1362 // Precondition: __n > 0.
1363 inline _GLIBCXX_CONSTEXPRconstexpr int
1364 __lg(int __n)
1365 { return (int)sizeof(int) * __CHAR_BIT__8 - 1 - __builtin_clz(__n); }
1366
1367 inline _GLIBCXX_CONSTEXPRconstexpr unsigned
1368 __lg(unsigned __n)
1369 { return (int)sizeof(int) * __CHAR_BIT__8 - 1 - __builtin_clz(__n); }
1370
1371 inline _GLIBCXX_CONSTEXPRconstexpr long
1372 __lg(long __n)
1373 { return (int)sizeof(long) * __CHAR_BIT__8 - 1 - __builtin_clzl(__n); }
1374
1375 inline _GLIBCXX_CONSTEXPRconstexpr unsigned long
1376 __lg(unsigned long __n)
1377 { return (int)sizeof(long) * __CHAR_BIT__8 - 1 - __builtin_clzl(__n); }
1378
1379 inline _GLIBCXX_CONSTEXPRconstexpr long long
1380 __lg(long long __n)
1381 { return (int)sizeof(long long) * __CHAR_BIT__8 - 1 - __builtin_clzll(__n); }
1382
1383 inline _GLIBCXX_CONSTEXPRconstexpr unsigned long long
1384 __lg(unsigned long long __n)
1385 { return (int)sizeof(long long) * __CHAR_BIT__8 - 1 - __builtin_clzll(__n); }
1386
1387_GLIBCXX_BEGIN_NAMESPACE_ALGO
1388
1389 /**
1390 * @brief Tests a range for element-wise equality.
1391 * @ingroup non_mutating_algorithms
1392 * @param __first1 An input iterator.
1393 * @param __last1 An input iterator.
1394 * @param __first2 An input iterator.
1395 * @return A boolean true or false.
1396 *
1397 * This compares the elements of two ranges using @c == and returns true or
1398 * false depending on whether all of the corresponding elements of the
1399 * ranges are equal.
1400 */
1401 template<typename _II1, typename _II2>
1402 _GLIBCXX20_CONSTEXPR
1403 inline bool
1404 equal(_II1 __first1, _II1 __last1, _II2 __first2)
1405 {
1406 // concept requirements
1407 __glibcxx_function_requires(_InputIteratorConcept<_II1>)
1408 __glibcxx_function_requires(_InputIteratorConcept<_II2>)
1409 __glibcxx_function_requires(_EqualOpConcept<
1410 typename iterator_traits<_II1>::value_type,
1411 typename iterator_traits<_II2>::value_type>)
1412 __glibcxx_requires_can_increment_range(__first1, __last1, __first2);
1413
1414 return std::__equal_aux(__first1, __last1, __first2);
1415 }
1416
1417 /**
1418 * @brief Tests a range for element-wise equality.
1419 * @ingroup non_mutating_algorithms
1420 * @param __first1 An input iterator.
1421 * @param __last1 An input iterator.
1422 * @param __first2 An input iterator.
1423 * @param __binary_pred A binary predicate @link functors
1424 * functor@endlink.
1425 * @return A boolean true or false.
1426 *
1427 * This compares the elements of two ranges using the binary_pred
1428 * parameter, and returns true or
1429 * false depending on whether all of the corresponding elements of the
1430 * ranges are equal.
1431 */
1432 template<typename _IIter1, typename _IIter2, typename _BinaryPredicate>
1433 _GLIBCXX20_CONSTEXPR
1434 inline bool
1435 equal(_IIter1 __first1, _IIter1 __last1,
1436 _IIter2 __first2, _BinaryPredicate __binary_pred)
1437 {
1438 // concept requirements
1439 __glibcxx_function_requires(_InputIteratorConcept<_IIter1>)
1440 __glibcxx_function_requires(_InputIteratorConcept<_IIter2>)
1441 __glibcxx_requires_valid_range(__first1, __last1);
1442
1443 for (; __first1 != __last1; ++__first1, (void)++__first2)
1444 if (!bool(__binary_pred(*__first1, *__first2)))
1445 return false;
1446 return true;
1447 }
1448
1449#if __cplusplus201402L >= 201103L
1450 // 4-iterator version of std::equal<It1, It2> for use in C++11.
1451 template<typename _II1, typename _II2>
1452 _GLIBCXX20_CONSTEXPR
1453 inline bool
1454 __equal4(_II1 __first1, _II1 __last1, _II2 __first2, _II2 __last2)
1455 {
1456 using _RATag = random_access_iterator_tag;
1457 using _Cat1 = typename iterator_traits<_II1>::iterator_category;
1458 using _Cat2 = typename iterator_traits<_II2>::iterator_category;
1459 using _RAIters = __and_<is_same<_Cat1, _RATag>, is_same<_Cat2, _RATag>>;
1460 if (_RAIters())
1461 {
1462 auto __d1 = std::distance(__first1, __last1);
1463 auto __d2 = std::distance(__first2, __last2);
1464 if (__d1 != __d2)
1465 return false;
1466 return _GLIBCXX_STD_Astd::equal(__first1, __last1, __first2);
1467 }
1468
1469 for (; __first1 != __last1 && __first2 != __last2;
1470 ++__first1, (void)++__first2)
1471 if (!(*__first1 == *__first2))
1472 return false;
1473 return __first1 == __last1 && __first2 == __last2;
1474 }
1475
1476 // 4-iterator version of std::equal<It1, It2, BinaryPred> for use in C++11.
1477 template<typename _II1, typename _II2, typename _BinaryPredicate>
1478 _GLIBCXX20_CONSTEXPR
1479 inline bool
1480 __equal4(_II1 __first1, _II1 __last1, _II2 __first2, _II2 __last2,
1481 _BinaryPredicate __binary_pred)
1482 {
1483 using _RATag = random_access_iterator_tag;
1484 using _Cat1 = typename iterator_traits<_II1>::iterator_category;
1485 using _Cat2 = typename iterator_traits<_II2>::iterator_category;
1486 using _RAIters = __and_<is_same<_Cat1, _RATag>, is_same<_Cat2, _RATag>>;
1487 if (_RAIters())
1488 {
1489 auto __d1 = std::distance(__first1, __last1);
1490 auto __d2 = std::distance(__first2, __last2);
1491 if (__d1 != __d2)
1492 return false;
1493 return _GLIBCXX_STD_Astd::equal(__first1, __last1, __first2,
1494 __binary_pred);
1495 }
1496
1497 for (; __first1 != __last1 && __first2 != __last2;
1498 ++__first1, (void)++__first2)
1499 if (!bool(__binary_pred(*__first1, *__first2)))
1500 return false;
1501 return __first1 == __last1 && __first2 == __last2;
1502 }
1503#endif // C++11
1504
1505#if __cplusplus201402L > 201103L
1506
1507#define __cpp_lib_robust_nonmodifying_seq_ops201304 201304
1508
1509 /**
1510 * @brief Tests a range for element-wise equality.
1511 * @ingroup non_mutating_algorithms
1512 * @param __first1 An input iterator.
1513 * @param __last1 An input iterator.
1514 * @param __first2 An input iterator.
1515 * @param __last2 An input iterator.
1516 * @return A boolean true or false.
1517 *
1518 * This compares the elements of two ranges using @c == and returns true or
1519 * false depending on whether all of the corresponding elements of the
1520 * ranges are equal.
1521 */
1522 template<typename _II1, typename _II2>
1523 _GLIBCXX20_CONSTEXPR
1524 inline bool
1525 equal(_II1 __first1, _II1 __last1, _II2 __first2, _II2 __last2)
1526 {
1527 // concept requirements
1528 __glibcxx_function_requires(_InputIteratorConcept<_II1>)
1529 __glibcxx_function_requires(_InputIteratorConcept<_II2>)
1530 __glibcxx_function_requires(_EqualOpConcept<
1531 typename iterator_traits<_II1>::value_type,
1532 typename iterator_traits<_II2>::value_type>)
1533 __glibcxx_requires_valid_range(__first1, __last1);
1534 __glibcxx_requires_valid_range(__first2, __last2);
1535
1536 return _GLIBCXX_STD_Astd::__equal4(__first1, __last1, __first2, __last2);
1537 }
1538
1539 /**
1540 * @brief Tests a range for element-wise equality.
1541 * @ingroup non_mutating_algorithms
1542 * @param __first1 An input iterator.
1543 * @param __last1 An input iterator.
1544 * @param __first2 An input iterator.
1545 * @param __last2 An input iterator.
1546 * @param __binary_pred A binary predicate @link functors
1547 * functor@endlink.
1548 * @return A boolean true or false.
1549 *
1550 * This compares the elements of two ranges using the binary_pred
1551 * parameter, and returns true or
1552 * false depending on whether all of the corresponding elements of the
1553 * ranges are equal.
1554 */
1555 template<typename _IIter1, typename _IIter2, typename _BinaryPredicate>
1556 _GLIBCXX20_CONSTEXPR
1557 inline bool
1558 equal(_IIter1 __first1, _IIter1 __last1,
1559 _IIter2 __first2, _IIter2 __last2, _BinaryPredicate __binary_pred)
1560 {
1561 // concept requirements
1562 __glibcxx_function_requires(_InputIteratorConcept<_IIter1>)
1563 __glibcxx_function_requires(_InputIteratorConcept<_IIter2>)
1564 __glibcxx_requires_valid_range(__first1, __last1);
1565 __glibcxx_requires_valid_range(__first2, __last2);
1566
1567 return _GLIBCXX_STD_Astd::__equal4(__first1, __last1, __first2, __last2,
1568 __binary_pred);
1569 }
1570#endif // C++14
1571
1572 /**
1573 * @brief Performs @b dictionary comparison on ranges.
1574 * @ingroup sorting_algorithms
1575 * @param __first1 An input iterator.
1576 * @param __last1 An input iterator.
1577 * @param __first2 An input iterator.
1578 * @param __last2 An input iterator.
1579 * @return A boolean true or false.
1580 *
1581 * <em>Returns true if the sequence of elements defined by the range
1582 * [first1,last1) is lexicographically less than the sequence of elements
1583 * defined by the range [first2,last2). Returns false otherwise.</em>
1584 * (Quoted from [25.3.8]/1.) If the iterators are all character pointers,
1585 * then this is an inline call to @c memcmp.
1586 */
1587 template<typename _II1, typename _II2>
1588 _GLIBCXX20_CONSTEXPR
1589 inline bool
1590 lexicographical_compare(_II1 __first1, _II1 __last1,
1591 _II2 __first2, _II2 __last2)
1592 {
1593#ifdef _GLIBCXX_CONCEPT_CHECKS
1594 // concept requirements
1595 typedef typename iterator_traits<_II1>::value_type _ValueType1;
1596 typedef typename iterator_traits<_II2>::value_type _ValueType2;
1597#endif
1598 __glibcxx_function_requires(_InputIteratorConcept<_II1>)
1599 __glibcxx_function_requires(_InputIteratorConcept<_II2>)
1600 __glibcxx_function_requires(_LessThanOpConcept<_ValueType1, _ValueType2>)
1601 __glibcxx_function_requires(_LessThanOpConcept<_ValueType2, _ValueType1>)
1602 __glibcxx_requires_valid_range(__first1, __last1);
1603 __glibcxx_requires_valid_range(__first2, __last2);
1604
1605 return std::__lexicographical_compare_aux(std::__niter_base(__first1),
1606 std::__niter_base(__last1),
1607 std::__niter_base(__first2),
1608 std::__niter_base(__last2));
1609 }
1610
1611 /**
1612 * @brief Performs @b dictionary comparison on ranges.
1613 * @ingroup sorting_algorithms
1614 * @param __first1 An input iterator.
1615 * @param __last1 An input iterator.
1616 * @param __first2 An input iterator.
1617 * @param __last2 An input iterator.
1618 * @param __comp A @link comparison_functors comparison functor@endlink.
1619 * @return A boolean true or false.
1620 *
1621 * The same as the four-parameter @c lexicographical_compare, but uses the
1622 * comp parameter instead of @c <.
1623 */
1624 template<typename _II1, typename _II2, typename _Compare>
1625 _GLIBCXX20_CONSTEXPR
1626 inline bool
1627 lexicographical_compare(_II1 __first1, _II1 __last1,
1628 _II2 __first2, _II2 __last2, _Compare __comp)
1629 {
1630 // concept requirements
1631 __glibcxx_function_requires(_InputIteratorConcept<_II1>)
1632 __glibcxx_function_requires(_InputIteratorConcept<_II2>)
1633 __glibcxx_requires_valid_range(__first1, __last1);
1634 __glibcxx_requires_valid_range(__first2, __last2);
1635
1636 return std::__lexicographical_compare_impl
1637 (__first1, __last1, __first2, __last2,
1638 __gnu_cxx::__ops::__iter_comp_iter(__comp));
1639 }
1640
1641#if __cpp_lib_three_way_comparison
1642 // Iter points to a contiguous range of unsigned narrow character type
1643 // or std::byte, suitable for comparison by memcmp.
1644 template<typename _Iter>
1645 concept __is_byte_iter = contiguous_iterator<_Iter>
1646 && __is_memcmp_ordered<iter_value_t<_Iter>>::__value;
1647
1648 // Return a struct with two members, initialized to the smaller of x and y
1649 // (or x if they compare equal) and the result of the comparison x <=> y.
1650 template<typename _Tp>
1651 constexpr auto
1652 __min_cmp(_Tp __x, _Tp __y)
1653 {
1654 struct _Res {
1655 _Tp _M_min;
1656 decltype(__x <=> __y) _M_cmp;
1657 };
1658 auto __c = __x <=> __y;
1659 if (__c > 0)
1660 return _Res{__y, __c};
1661 return _Res{__x, __c};
1662 }
1663
1664 /**
1665 * @brief Performs dictionary comparison on ranges.
1666 * @ingroup sorting_algorithms
1667 * @param __first1 An input iterator.
1668 * @param __last1 An input iterator.
1669 * @param __first2 An input iterator.
1670 * @param __last2 An input iterator.
1671 * @param __comp A @link comparison_functors comparison functor@endlink.
1672 * @return The comparison category that `__comp(*__first1, *__first2)`
1673 * returns.
1674 */
1675 template<typename _InputIter1, typename _InputIter2, typename _Comp>
1676 constexpr auto
1677 lexicographical_compare_three_way(_InputIter1 __first1,
1678 _InputIter1 __last1,
1679 _InputIter2 __first2,
1680 _InputIter2 __last2,
1681 _Comp __comp)
1682 -> decltype(__comp(*__first1, *__first2))
1683 {
1684 // concept requirements
1685 __glibcxx_function_requires(_InputIteratorConcept<_InputIter1>)
1686 __glibcxx_function_requires(_InputIteratorConcept<_InputIter2>)
1687 __glibcxx_requires_valid_range(__first1, __last1);
1688 __glibcxx_requires_valid_range(__first2, __last2);
1689
1690#if __cpp_lib_is_constant_evaluated
1691 using _Cat = decltype(__comp(*__first1, *__first2));
1692 static_assert(same_as<common_comparison_category_t<_Cat>, _Cat>);
1693
1694 if (!std::is_constant_evaluated())
1695 if constexpr (same_as<_Comp, __detail::_Synth3way>
1696 || same_as<_Comp, compare_three_way>)
1697 if constexpr (__is_byte_iter<_InputIter1>)
1698 if constexpr (__is_byte_iter<_InputIter2>)
1699 {
1700 const auto [__len, __lencmp]
1701 = std::__min_cmp(__last1 - __first1, __last2 - __first2);
1702 if (__len)
1703 {
1704 const auto __c
1705 = __builtin_memcmp(&*__first1, &*__first2, __len) <=> 0;
1706 if (__c != 0)
1707 return __c;
1708 }
1709 return __lencmp;
1710 }
1711#endif // is_constant_evaluated
1712 while (__first1 != __last1)
1713 {
1714 if (__first2 == __last2)
1715 return strong_ordering::greater;
1716 if (auto __cmp = __comp(*__first1, *__first2); __cmp != 0)
1717 return __cmp;
1718 ++__first1;
1719 ++__first2;
1720 }
1721 return (__first2 == __last2) <=> true; // See PR 94006
1722 }
1723
1724 template<typename _InputIter1, typename _InputIter2>
1725 constexpr auto
1726 lexicographical_compare_three_way(_InputIter1 __first1,
1727 _InputIter1 __last1,
1728 _InputIter2 __first2,
1729 _InputIter2 __last2)
1730 {
1731 return std::lexicographical_compare_three_way(__first1, __last1,
1732 __first2, __last2,
1733 compare_three_way{});
1734 }
1735#endif // three_way_comparison
1736
1737 template<typename _InputIterator1, typename _InputIterator2,
1738 typename _BinaryPredicate>
1739 _GLIBCXX20_CONSTEXPR
1740 pair<_InputIterator1, _InputIterator2>
1741 __mismatch(_InputIterator1 __first1, _InputIterator1 __last1,
1742 _InputIterator2 __first2, _BinaryPredicate __binary_pred)
1743 {
1744 while (__first1 != __last1 && __binary_pred(__first1, __first2))
1745 {
1746 ++__first1;
1747 ++__first2;
1748 }
1749 return pair<_InputIterator1, _InputIterator2>(__first1, __first2);
1750 }
1751
1752 /**
1753 * @brief Finds the places in ranges which don't match.
1754 * @ingroup non_mutating_algorithms
1755 * @param __first1 An input iterator.
1756 * @param __last1 An input iterator.
1757 * @param __first2 An input iterator.
1758 * @return A pair of iterators pointing to the first mismatch.
1759 *
1760 * This compares the elements of two ranges using @c == and returns a pair
1761 * of iterators. The first iterator points into the first range, the
1762 * second iterator points into the second range, and the elements pointed
1763 * to by the iterators are not equal.
1764 */
1765 template<typename _InputIterator1, typename _InputIterator2>
1766 _GLIBCXX20_CONSTEXPR
1767 inline pair<_InputIterator1, _InputIterator2>
1768 mismatch(_InputIterator1 __first1, _InputIterator1 __last1,
1769 _InputIterator2 __first2)
1770 {
1771 // concept requirements
1772 __glibcxx_function_requires(_InputIteratorConcept<_InputIterator1>)
1773 __glibcxx_function_requires(_InputIteratorConcept<_InputIterator2>)
1774 __glibcxx_function_requires(_EqualOpConcept<
1775 typename iterator_traits<_InputIterator1>::value_type,
1776 typename iterator_traits<_InputIterator2>::value_type>)
1777 __glibcxx_requires_valid_range(__first1, __last1);
1778
1779 return _GLIBCXX_STD_Astd::__mismatch(__first1, __last1, __first2,
1780 __gnu_cxx::__ops::__iter_equal_to_iter());
1781 }
1782
1783 /**
1784 * @brief Finds the places in ranges which don't match.
1785 * @ingroup non_mutating_algorithms
1786 * @param __first1 An input iterator.
1787 * @param __last1 An input iterator.
1788 * @param __first2 An input iterator.
1789 * @param __binary_pred A binary predicate @link functors
1790 * functor@endlink.
1791 * @return A pair of iterators pointing to the first mismatch.
1792 *
1793 * This compares the elements of two ranges using the binary_pred
1794 * parameter, and returns a pair
1795 * of iterators. The first iterator points into the first range, the
1796 * second iterator points into the second range, and the elements pointed
1797 * to by the iterators are not equal.
1798 */
1799 template<typename _InputIterator1, typename _InputIterator2,
1800 typename _BinaryPredicate>
1801 _GLIBCXX20_CONSTEXPR
1802 inline pair<_InputIterator1, _InputIterator2>
1803 mismatch(_InputIterator1 __first1, _InputIterator1 __last1,
1804 _InputIterator2 __first2, _BinaryPredicate __binary_pred)
1805 {
1806 // concept requirements
1807 __glibcxx_function_requires(_InputIteratorConcept<_InputIterator1>)
1808 __glibcxx_function_requires(_InputIteratorConcept<_InputIterator2>)
1809 __glibcxx_requires_valid_range(__first1, __last1);
1810
1811 return _GLIBCXX_STD_Astd::__mismatch(__first1, __last1, __first2,
1812 __gnu_cxx::__ops::__iter_comp_iter(__binary_pred));
1813 }
1814
1815#if __cplusplus201402L > 201103L
1816
1817 template<typename _InputIterator1, typename _InputIterator2,
1818 typename _BinaryPredicate>
1819 _GLIBCXX20_CONSTEXPR
1820 pair<_InputIterator1, _InputIterator2>
1821 __mismatch(_InputIterator1 __first1, _InputIterator1 __last1,
1822 _InputIterator2 __first2, _InputIterator2 __last2,
1823 _BinaryPredicate __binary_pred)
1824 {
1825 while (__first1 != __last1 && __first2 != __last2
1826 && __binary_pred(__first1, __first2))
1827 {
1828 ++__first1;
1829 ++__first2;
1830 }
1831 return pair<_InputIterator1, _InputIterator2>(__first1, __first2);
1832 }
1833
1834 /**
1835 * @brief Finds the places in ranges which don't match.
1836 * @ingroup non_mutating_algorithms
1837 * @param __first1 An input iterator.
1838 * @param __last1 An input iterator.
1839 * @param __first2 An input iterator.
1840 * @param __last2 An input iterator.
1841 * @return A pair of iterators pointing to the first mismatch.
1842 *
1843 * This compares the elements of two ranges using @c == and returns a pair
1844 * of iterators. The first iterator points into the first range, the
1845 * second iterator points into the second range, and the elements pointed
1846 * to by the iterators are not equal.
1847 */
1848 template<typename _InputIterator1, typename _InputIterator2>
1849 _GLIBCXX20_CONSTEXPR
1850 inline pair<_InputIterator1, _InputIterator2>
1851 mismatch(_InputIterator1 __first1, _InputIterator1 __last1,
1852 _InputIterator2 __first2, _InputIterator2 __last2)
1853 {
1854 // concept requirements
1855 __glibcxx_function_requires(_InputIteratorConcept<_InputIterator1>)
1856 __glibcxx_function_requires(_InputIteratorConcept<_InputIterator2>)
1857 __glibcxx_function_requires(_EqualOpConcept<
1858 typename iterator_traits<_InputIterator1>::value_type,
1859 typename iterator_traits<_InputIterator2>::value_type>)
1860 __glibcxx_requires_valid_range(__first1, __last1);
1861 __glibcxx_requires_valid_range(__first2, __last2);
1862
1863 return _GLIBCXX_STD_Astd::__mismatch(__first1, __last1, __first2, __last2,
1864 __gnu_cxx::__ops::__iter_equal_to_iter());
1865 }
1866
1867 /**
1868 * @brief Finds the places in ranges which don't match.
1869 * @ingroup non_mutating_algorithms
1870 * @param __first1 An input iterator.
1871 * @param __last1 An input iterator.
1872 * @param __first2 An input iterator.
1873 * @param __last2 An input iterator.
1874 * @param __binary_pred A binary predicate @link functors
1875 * functor@endlink.
1876 * @return A pair of iterators pointing to the first mismatch.
1877 *
1878 * This compares the elements of two ranges using the binary_pred
1879 * parameter, and returns a pair
1880 * of iterators. The first iterator points into the first range, the
1881 * second iterator points into the second range, and the elements pointed
1882 * to by the iterators are not equal.
1883 */
1884 template<typename _InputIterator1, typename _InputIterator2,
1885 typename _BinaryPredicate>
1886 _GLIBCXX20_CONSTEXPR
1887 inline pair<_InputIterator1, _InputIterator2>
1888 mismatch(_InputIterator1 __first1, _InputIterator1 __last1,
1889 _InputIterator2 __first2, _InputIterator2 __last2,
1890 _BinaryPredicate __binary_pred)
1891 {
1892 // concept requirements
1893 __glibcxx_function_requires(_InputIteratorConcept<_InputIterator1>)
1894 __glibcxx_function_requires(_InputIteratorConcept<_InputIterator2>)
1895 __glibcxx_requires_valid_range(__first1, __last1);
1896 __glibcxx_requires_valid_range(__first2, __last2);
1897
1898 return _GLIBCXX_STD_Astd::__mismatch(__first1, __last1, __first2, __last2,
1899 __gnu_cxx::__ops::__iter_comp_iter(__binary_pred));
1900 }
1901#endif
1902
1903_GLIBCXX_END_NAMESPACE_ALGO
1904
1905 /// This is an overload used by find algos for the Input Iterator case.
1906 template<typename _InputIterator, typename _Predicate>
1907 _GLIBCXX20_CONSTEXPR
1908 inline _InputIterator
1909 __find_if(_InputIterator __first, _InputIterator __last,
1910 _Predicate __pred, input_iterator_tag)
1911 {
1912 while (__first != __last && !__pred(__first))
1913 ++__first;
1914 return __first;
1915 }
1916
1917 /// This is an overload used by find algos for the RAI case.
1918 template<typename _RandomAccessIterator, typename _Predicate>
1919 _GLIBCXX20_CONSTEXPR
1920 _RandomAccessIterator
1921 __find_if(_RandomAccessIterator __first, _RandomAccessIterator __last,
1922 _Predicate __pred, random_access_iterator_tag)
1923 {
1924 typename iterator_traits<_RandomAccessIterator>::difference_type
1925 __trip_count = (__last - __first) >> 2;
1926
1927 for (; __trip_count > 0; --__trip_count)
1928 {
1929 if (__pred(__first))
1930 return __first;
1931 ++__first;
1932
1933 if (__pred(__first))
1934 return __first;
1935 ++__first;
1936
1937 if (__pred(__first))
1938 return __first;
1939 ++__first;
1940
1941 if (__pred(__first))
1942 return __first;
1943 ++__first;
1944 }
1945
1946 switch (__last - __first)
1947 {
1948 case 3:
1949 if (__pred(__first))
1950 return __first;
1951 ++__first;
1952 // FALLTHRU
1953 case 2:
1954 if (__pred(__first))
1955 return __first;
1956 ++__first;
1957 // FALLTHRU
1958 case 1:
1959 if (__pred(__first))
1960 return __first;
1961 ++__first;
1962 // FALLTHRU
1963 case 0:
1964 default:
1965 return __last;
1966 }
1967 }
1968
1969 template<typename _Iterator, typename _Predicate>
1970 _GLIBCXX20_CONSTEXPR
1971 inline _Iterator
1972 __find_if(_Iterator __first, _Iterator __last, _Predicate __pred)
1973 {
1974 return __find_if(__first, __last, __pred,
1975 std::__iterator_category(__first));
1976 }
1977
1978 template<typename _InputIterator, typename _Predicate>
1979 _GLIBCXX20_CONSTEXPR
1980 typename iterator_traits<_InputIterator>::difference_type
1981 __count_if(_InputIterator __first, _InputIterator __last, _Predicate __pred)
1982 {
1983 typename iterator_traits<_InputIterator>::difference_type __n = 0;
1984 for (; __first != __last; ++__first)
1985 if (__pred(__first))
1986 ++__n;
1987 return __n;
1988 }
1989
1990#if __cplusplus201402L >= 201103L
1991 template<typename _ForwardIterator1, typename _ForwardIterator2,
1992 typename _BinaryPredicate>
1993 _GLIBCXX20_CONSTEXPR
1994 bool
1995 __is_permutation(_ForwardIterator1 __first1, _ForwardIterator1 __last1,
1996 _ForwardIterator2 __first2, _BinaryPredicate __pred)
1997 {
1998 // Efficiently compare identical prefixes: O(N) if sequences
1999 // have the same elements in the same order.
2000 for (; __first1 != __last1; ++__first1, (void)++__first2)
2001 if (!__pred(__first1, __first2))
2002 break;
2003
2004 if (__first1 == __last1)
2005 return true;
2006
2007 // Establish __last2 assuming equal ranges by iterating over the
2008 // rest of the list.
2009 _ForwardIterator2 __last2 = __first2;
2010 std::advance(__last2, std::distance(__first1, __last1));
2011 for (_ForwardIterator1 __scan = __first1; __scan != __last1; ++__scan)
2012 {
2013 if (__scan != std::__find_if(__first1, __scan,
2014 __gnu_cxx::__ops::__iter_comp_iter(__pred, __scan)))
2015 continue; // We've seen this one before.
2016
2017 auto __matches
2018 = std::__count_if(__first2, __last2,
2019 __gnu_cxx::__ops::__iter_comp_iter(__pred, __scan));
2020 if (0 == __matches ||
2021 std::__count_if(__scan, __last1,
2022 __gnu_cxx::__ops::__iter_comp_iter(__pred, __scan))
2023 != __matches)
2024 return false;
2025 }
2026 return true;
2027 }
2028
2029 /**
2030 * @brief Checks whether a permutation of the second sequence is equal
2031 * to the first sequence.
2032 * @ingroup non_mutating_algorithms
2033 * @param __first1 Start of first range.
2034 * @param __last1 End of first range.
2035 * @param __first2 Start of second range.
2036 * @return true if there exists a permutation of the elements in the range
2037 * [__first2, __first2 + (__last1 - __first1)), beginning with
2038 * ForwardIterator2 begin, such that equal(__first1, __last1, begin)
2039 * returns true; otherwise, returns false.
2040 */
2041 template<typename _ForwardIterator1, typename _ForwardIterator2>
2042 _GLIBCXX20_CONSTEXPR
2043 inline bool
2044 is_permutation(_ForwardIterator1 __first1, _ForwardIterator1 __last1,
2045 _ForwardIterator2 __first2)
2046 {
2047 // concept requirements
2048 __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator1>)
2049 __glibcxx_function_requires(_ForwardIteratorConcept<_ForwardIterator2>)
2050 __glibcxx_function_requires(_EqualOpConcept<
2051 typename iterator_traits<_ForwardIterator1>::value_type,
2052 typename iterator_traits<_ForwardIterator2>::value_type>)
2053 __glibcxx_requires_valid_range(__first1, __last1);
2054
2055 return std::__is_permutation(__first1, __last1, __first2,
2056 __gnu_cxx::__ops::__iter_equal_to_iter());
2057 }
2058#endif // C++11
2059
2060_GLIBCXX_END_NAMESPACE_VERSION
2061} // namespace std
2062
2063// NB: This file is included within many other C++ includes, as a way
2064// of getting the base algorithms. So, make sure that parallel bits
2065// come in too if requested.
2066#ifdef _GLIBCXX_PARALLEL
2067# include <parallel/algobase.h>
2068#endif
2069
2070#endif