Bug Summary

File:llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Warning:line 5968, column 38
1st function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AMDGPUAsmParser.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/AMDGPU/AsmParser -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/AMDGPU/AsmParser -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/AMDGPU/AsmParser -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/AMDGPU/AsmParser -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-04-040900-46481-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
10#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11#include "MCTargetDesc/AMDGPUTargetStreamer.h"
12#include "SIDefines.h"
13#include "SIInstrInfo.h"
14#include "SIRegisterInfo.h"
15#include "TargetInfo/AMDGPUTargetInfo.h"
16#include "Utils/AMDGPUAsmUtils.h"
17#include "Utils/AMDGPUBaseInfo.h"
18#include "Utils/AMDKernelCodeTUtils.h"
19#include "llvm/ADT/APFloat.h"
20#include "llvm/ADT/SmallBitVector.h"
21#include "llvm/ADT/StringSet.h"
22#include "llvm/ADT/Twine.h"
23#include "llvm/MC/MCAsmInfo.h"
24#include "llvm/MC/MCContext.h"
25#include "llvm/MC/MCExpr.h"
26#include "llvm/MC/MCInst.h"
27#include "llvm/MC/MCParser/MCAsmParser.h"
28#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29#include "llvm/MC/MCParser/MCTargetAsmParser.h"
30#include "llvm/MC/MCSymbol.h"
31#include "llvm/Support/AMDGPUMetadata.h"
32#include "llvm/Support/AMDHSAKernelDescriptor.h"
33#include "llvm/Support/Casting.h"
34#include "llvm/Support/MachineValueType.h"
35#include "llvm/Support/TargetParser.h"
36#include "llvm/Support/TargetRegistry.h"
37
38using namespace llvm;
39using namespace llvm::AMDGPU;
40using namespace llvm::amdhsa;
41
42namespace {
43
44class AMDGPUAsmParser;
45
46enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47
48//===----------------------------------------------------------------------===//
49// Operand
50//===----------------------------------------------------------------------===//
51
52class AMDGPUOperand : public MCParsedAsmOperand {
53 enum KindTy {
54 Token,
55 Immediate,
56 Register,
57 Expression
58 } Kind;
59
60 SMLoc StartLoc, EndLoc;
61 const AMDGPUAsmParser *AsmParser;
62
63public:
64 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66
67 using Ptr = std::unique_ptr<AMDGPUOperand>;
68
69 struct Modifiers {
70 bool Abs = false;
71 bool Neg = false;
72 bool Sext = false;
73
74 bool hasFPModifiers() const { return Abs || Neg; }
75 bool hasIntModifiers() const { return Sext; }
76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77
78 int64_t getFPModifiersOperand() const {
79 int64_t Operand = 0;
80 Operand |= Abs ? SISrcMods::ABS : 0u;
81 Operand |= Neg ? SISrcMods::NEG : 0u;
82 return Operand;
83 }
84
85 int64_t getIntModifiersOperand() const {
86 int64_t Operand = 0;
87 Operand |= Sext ? SISrcMods::SEXT : 0u;
88 return Operand;
89 }
90
91 int64_t getModifiersOperand() const {
92 assert(!(hasFPModifiers() && hasIntModifiers())(static_cast<void> (0))
93 && "fp and int modifiers should not be used simultaneously")(static_cast<void> (0));
94 if (hasFPModifiers()) {
95 return getFPModifiersOperand();
96 } else if (hasIntModifiers()) {
97 return getIntModifiersOperand();
98 } else {
99 return 0;
100 }
101 }
102
103 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104 };
105
106 enum ImmTy {
107 ImmTyNone,
108 ImmTyGDS,
109 ImmTyLDS,
110 ImmTyOffen,
111 ImmTyIdxen,
112 ImmTyAddr64,
113 ImmTyOffset,
114 ImmTyInstOffset,
115 ImmTyOffset0,
116 ImmTyOffset1,
117 ImmTyCPol,
118 ImmTySWZ,
119 ImmTyTFE,
120 ImmTyD16,
121 ImmTyClampSI,
122 ImmTyOModSI,
123 ImmTyDPP8,
124 ImmTyDppCtrl,
125 ImmTyDppRowMask,
126 ImmTyDppBankMask,
127 ImmTyDppBoundCtrl,
128 ImmTyDppFi,
129 ImmTySdwaDstSel,
130 ImmTySdwaSrc0Sel,
131 ImmTySdwaSrc1Sel,
132 ImmTySdwaDstUnused,
133 ImmTyDMask,
134 ImmTyDim,
135 ImmTyUNorm,
136 ImmTyDA,
137 ImmTyR128A16,
138 ImmTyA16,
139 ImmTyLWE,
140 ImmTyExpTgt,
141 ImmTyExpCompr,
142 ImmTyExpVM,
143 ImmTyFORMAT,
144 ImmTyHwreg,
145 ImmTyOff,
146 ImmTySendMsg,
147 ImmTyInterpSlot,
148 ImmTyInterpAttr,
149 ImmTyAttrChan,
150 ImmTyOpSel,
151 ImmTyOpSelHi,
152 ImmTyNegLo,
153 ImmTyNegHi,
154 ImmTySwizzle,
155 ImmTyGprIdxMode,
156 ImmTyHigh,
157 ImmTyBLGP,
158 ImmTyCBSZ,
159 ImmTyABID,
160 ImmTyEndpgm,
161 };
162
163 enum ImmKindTy {
164 ImmKindTyNone,
165 ImmKindTyLiteral,
166 ImmKindTyConst,
167 };
168
169private:
170 struct TokOp {
171 const char *Data;
172 unsigned Length;
173 };
174
175 struct ImmOp {
176 int64_t Val;
177 ImmTy Type;
178 bool IsFPImm;
179 mutable ImmKindTy Kind;
180 Modifiers Mods;
181 };
182
183 struct RegOp {
184 unsigned RegNo;
185 Modifiers Mods;
186 };
187
188 union {
189 TokOp Tok;
190 ImmOp Imm;
191 RegOp Reg;
192 const MCExpr *Expr;
193 };
194
195public:
196 bool isToken() const override {
197 if (Kind == Token)
198 return true;
199
200 // When parsing operands, we can't always tell if something was meant to be
201 // a token, like 'gds', or an expression that references a global variable.
202 // In this case, we assume the string is an expression, and if we need to
203 // interpret is a token, then we treat the symbol name as the token.
204 return isSymbolRefExpr();
205 }
206
207 bool isSymbolRefExpr() const {
208 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209 }
210
211 bool isImm() const override {
212 return Kind == Immediate;
213 }
214
215 void setImmKindNone() const {
216 assert(isImm())(static_cast<void> (0));
217 Imm.Kind = ImmKindTyNone;
218 }
219
220 void setImmKindLiteral() const {
221 assert(isImm())(static_cast<void> (0));
222 Imm.Kind = ImmKindTyLiteral;
223 }
224
225 void setImmKindConst() const {
226 assert(isImm())(static_cast<void> (0));
227 Imm.Kind = ImmKindTyConst;
228 }
229
230 bool IsImmKindLiteral() const {
231 return isImm() && Imm.Kind == ImmKindTyLiteral;
232 }
233
234 bool isImmKindConst() const {
235 return isImm() && Imm.Kind == ImmKindTyConst;
236 }
237
238 bool isInlinableImm(MVT type) const;
239 bool isLiteralImm(MVT type) const;
240
241 bool isRegKind() const {
242 return Kind == Register;
243 }
244
245 bool isReg() const override {
246 return isRegKind() && !hasModifiers();
247 }
248
249 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
250 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
251 }
252
253 bool isRegOrImmWithInt16InputMods() const {
254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
255 }
256
257 bool isRegOrImmWithInt32InputMods() const {
258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259 }
260
261 bool isRegOrImmWithInt64InputMods() const {
262 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
263 }
264
265 bool isRegOrImmWithFP16InputMods() const {
266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
267 }
268
269 bool isRegOrImmWithFP32InputMods() const {
270 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
271 }
272
273 bool isRegOrImmWithFP64InputMods() const {
274 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
275 }
276
277 bool isVReg() const {
278 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
279 isRegClass(AMDGPU::VReg_64RegClassID) ||
280 isRegClass(AMDGPU::VReg_96RegClassID) ||
281 isRegClass(AMDGPU::VReg_128RegClassID) ||
282 isRegClass(AMDGPU::VReg_160RegClassID) ||
283 isRegClass(AMDGPU::VReg_192RegClassID) ||
284 isRegClass(AMDGPU::VReg_256RegClassID) ||
285 isRegClass(AMDGPU::VReg_512RegClassID) ||
286 isRegClass(AMDGPU::VReg_1024RegClassID);
287 }
288
289 bool isVReg32() const {
290 return isRegClass(AMDGPU::VGPR_32RegClassID);
291 }
292
293 bool isVReg32OrOff() const {
294 return isOff() || isVReg32();
295 }
296
297 bool isNull() const {
298 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
299 }
300
301 bool isVRegWithInputMods() const;
302
303 bool isSDWAOperand(MVT type) const;
304 bool isSDWAFP16Operand() const;
305 bool isSDWAFP32Operand() const;
306 bool isSDWAInt16Operand() const;
307 bool isSDWAInt32Operand() const;
308
309 bool isImmTy(ImmTy ImmT) const {
310 return isImm() && Imm.Type == ImmT;
311 }
312
313 bool isImmModifier() const {
314 return isImm() && Imm.Type != ImmTyNone;
315 }
316
317 bool isClampSI() const { return isImmTy(ImmTyClampSI); }
318 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
319 bool isDMask() const { return isImmTy(ImmTyDMask); }
320 bool isDim() const { return isImmTy(ImmTyDim); }
321 bool isUNorm() const { return isImmTy(ImmTyUNorm); }
322 bool isDA() const { return isImmTy(ImmTyDA); }
323 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
324 bool isGFX10A16() const { return isImmTy(ImmTyA16); }
325 bool isLWE() const { return isImmTy(ImmTyLWE); }
326 bool isOff() const { return isImmTy(ImmTyOff); }
327 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
328 bool isExpVM() const { return isImmTy(ImmTyExpVM); }
329 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
330 bool isOffen() const { return isImmTy(ImmTyOffen); }
331 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
332 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
333 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
334 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
335 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
336
337 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
338 bool isGDS() const { return isImmTy(ImmTyGDS); }
339 bool isLDS() const { return isImmTy(ImmTyLDS); }
340 bool isCPol() const { return isImmTy(ImmTyCPol); }
341 bool isSWZ() const { return isImmTy(ImmTySWZ); }
342 bool isTFE() const { return isImmTy(ImmTyTFE); }
343 bool isD16() const { return isImmTy(ImmTyD16); }
344 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
345 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
346 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
347 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
348 bool isFI() const { return isImmTy(ImmTyDppFi); }
349 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
350 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
351 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
352 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
353 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
354 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
355 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
356 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
357 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
358 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
359 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
360 bool isHigh() const { return isImmTy(ImmTyHigh); }
361
362 bool isMod() const {
363 return isClampSI() || isOModSI();
364 }
365
366 bool isRegOrImm() const {
367 return isReg() || isImm();
368 }
369
370 bool isRegClass(unsigned RCID) const;
371
372 bool isInlineValue() const;
373
374 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
375 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
376 }
377
378 bool isSCSrcB16() const {
379 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
380 }
381
382 bool isSCSrcV2B16() const {
383 return isSCSrcB16();
384 }
385
386 bool isSCSrcB32() const {
387 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
388 }
389
390 bool isSCSrcB64() const {
391 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
392 }
393
394 bool isBoolReg() const;
395
396 bool isSCSrcF16() const {
397 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
398 }
399
400 bool isSCSrcV2F16() const {
401 return isSCSrcF16();
402 }
403
404 bool isSCSrcF32() const {
405 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
406 }
407
408 bool isSCSrcF64() const {
409 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
410 }
411
412 bool isSSrcB32() const {
413 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
414 }
415
416 bool isSSrcB16() const {
417 return isSCSrcB16() || isLiteralImm(MVT::i16);
418 }
419
420 bool isSSrcV2B16() const {
421 llvm_unreachable("cannot happen")__builtin_unreachable();
422 return isSSrcB16();
423 }
424
425 bool isSSrcB64() const {
426 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
427 // See isVSrc64().
428 return isSCSrcB64() || isLiteralImm(MVT::i64);
429 }
430
431 bool isSSrcF32() const {
432 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
433 }
434
435 bool isSSrcF64() const {
436 return isSCSrcB64() || isLiteralImm(MVT::f64);
437 }
438
439 bool isSSrcF16() const {
440 return isSCSrcB16() || isLiteralImm(MVT::f16);
441 }
442
443 bool isSSrcV2F16() const {
444 llvm_unreachable("cannot happen")__builtin_unreachable();
445 return isSSrcF16();
446 }
447
448 bool isSSrcV2FP32() const {
449 llvm_unreachable("cannot happen")__builtin_unreachable();
450 return isSSrcF32();
451 }
452
453 bool isSCSrcV2FP32() const {
454 llvm_unreachable("cannot happen")__builtin_unreachable();
455 return isSCSrcF32();
456 }
457
458 bool isSSrcV2INT32() const {
459 llvm_unreachable("cannot happen")__builtin_unreachable();
460 return isSSrcB32();
461 }
462
463 bool isSCSrcV2INT32() const {
464 llvm_unreachable("cannot happen")__builtin_unreachable();
465 return isSCSrcB32();
466 }
467
468 bool isSSrcOrLdsB32() const {
469 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
470 isLiteralImm(MVT::i32) || isExpr();
471 }
472
473 bool isVCSrcB32() const {
474 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
475 }
476
477 bool isVCSrcB64() const {
478 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
479 }
480
481 bool isVCSrcB16() const {
482 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
483 }
484
485 bool isVCSrcV2B16() const {
486 return isVCSrcB16();
487 }
488
489 bool isVCSrcF32() const {
490 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
491 }
492
493 bool isVCSrcF64() const {
494 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
495 }
496
497 bool isVCSrcF16() const {
498 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
499 }
500
501 bool isVCSrcV2F16() const {
502 return isVCSrcF16();
503 }
504
505 bool isVSrcB32() const {
506 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
507 }
508
509 bool isVSrcB64() const {
510 return isVCSrcF64() || isLiteralImm(MVT::i64);
511 }
512
513 bool isVSrcB16() const {
514 return isVCSrcB16() || isLiteralImm(MVT::i16);
515 }
516
517 bool isVSrcV2B16() const {
518 return isVSrcB16() || isLiteralImm(MVT::v2i16);
519 }
520
521 bool isVCSrcV2FP32() const {
522 return isVCSrcF64();
523 }
524
525 bool isVSrcV2FP32() const {
526 return isVSrcF64() || isLiteralImm(MVT::v2f32);
527 }
528
529 bool isVCSrcV2INT32() const {
530 return isVCSrcB64();
531 }
532
533 bool isVSrcV2INT32() const {
534 return isVSrcB64() || isLiteralImm(MVT::v2i32);
535 }
536
537 bool isVSrcF32() const {
538 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
539 }
540
541 bool isVSrcF64() const {
542 return isVCSrcF64() || isLiteralImm(MVT::f64);
543 }
544
545 bool isVSrcF16() const {
546 return isVCSrcF16() || isLiteralImm(MVT::f16);
547 }
548
549 bool isVSrcV2F16() const {
550 return isVSrcF16() || isLiteralImm(MVT::v2f16);
551 }
552
553 bool isVISrcB32() const {
554 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
555 }
556
557 bool isVISrcB16() const {
558 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
559 }
560
561 bool isVISrcV2B16() const {
562 return isVISrcB16();
563 }
564
565 bool isVISrcF32() const {
566 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
567 }
568
569 bool isVISrcF16() const {
570 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
571 }
572
573 bool isVISrcV2F16() const {
574 return isVISrcF16() || isVISrcB32();
575 }
576
577 bool isVISrc_64B64() const {
578 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
579 }
580
581 bool isVISrc_64F64() const {
582 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
583 }
584
585 bool isVISrc_64V2FP32() const {
586 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
587 }
588
589 bool isVISrc_64V2INT32() const {
590 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
591 }
592
593 bool isVISrc_256B64() const {
594 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
595 }
596
597 bool isVISrc_256F64() const {
598 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
599 }
600
601 bool isVISrc_128B16() const {
602 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
603 }
604
605 bool isVISrc_128V2B16() const {
606 return isVISrc_128B16();
607 }
608
609 bool isVISrc_128B32() const {
610 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
611 }
612
613 bool isVISrc_128F32() const {
614 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
615 }
616
617 bool isVISrc_256V2FP32() const {
618 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
619 }
620
621 bool isVISrc_256V2INT32() const {
622 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
623 }
624
625 bool isVISrc_512B32() const {
626 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
627 }
628
629 bool isVISrc_512B16() const {
630 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
631 }
632
633 bool isVISrc_512V2B16() const {
634 return isVISrc_512B16();
635 }
636
637 bool isVISrc_512F32() const {
638 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
639 }
640
641 bool isVISrc_512F16() const {
642 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
643 }
644
645 bool isVISrc_512V2F16() const {
646 return isVISrc_512F16() || isVISrc_512B32();
647 }
648
649 bool isVISrc_1024B32() const {
650 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
651 }
652
653 bool isVISrc_1024B16() const {
654 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
655 }
656
657 bool isVISrc_1024V2B16() const {
658 return isVISrc_1024B16();
659 }
660
661 bool isVISrc_1024F32() const {
662 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
663 }
664
665 bool isVISrc_1024F16() const {
666 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
667 }
668
669 bool isVISrc_1024V2F16() const {
670 return isVISrc_1024F16() || isVISrc_1024B32();
671 }
672
673 bool isAISrcB32() const {
674 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
675 }
676
677 bool isAISrcB16() const {
678 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
679 }
680
681 bool isAISrcV2B16() const {
682 return isAISrcB16();
683 }
684
685 bool isAISrcF32() const {
686 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
687 }
688
689 bool isAISrcF16() const {
690 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
691 }
692
693 bool isAISrcV2F16() const {
694 return isAISrcF16() || isAISrcB32();
695 }
696
697 bool isAISrc_64B64() const {
698 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
699 }
700
701 bool isAISrc_64F64() const {
702 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
703 }
704
705 bool isAISrc_128B32() const {
706 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
707 }
708
709 bool isAISrc_128B16() const {
710 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
711 }
712
713 bool isAISrc_128V2B16() const {
714 return isAISrc_128B16();
715 }
716
717 bool isAISrc_128F32() const {
718 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
719 }
720
721 bool isAISrc_128F16() const {
722 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
723 }
724
725 bool isAISrc_128V2F16() const {
726 return isAISrc_128F16() || isAISrc_128B32();
727 }
728
729 bool isVISrc_128F16() const {
730 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
731 }
732
733 bool isVISrc_128V2F16() const {
734 return isVISrc_128F16() || isVISrc_128B32();
735 }
736
737 bool isAISrc_256B64() const {
738 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
739 }
740
741 bool isAISrc_256F64() const {
742 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
743 }
744
745 bool isAISrc_512B32() const {
746 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
747 }
748
749 bool isAISrc_512B16() const {
750 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
751 }
752
753 bool isAISrc_512V2B16() const {
754 return isAISrc_512B16();
755 }
756
757 bool isAISrc_512F32() const {
758 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
759 }
760
761 bool isAISrc_512F16() const {
762 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
763 }
764
765 bool isAISrc_512V2F16() const {
766 return isAISrc_512F16() || isAISrc_512B32();
767 }
768
769 bool isAISrc_1024B32() const {
770 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
771 }
772
773 bool isAISrc_1024B16() const {
774 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
775 }
776
777 bool isAISrc_1024V2B16() const {
778 return isAISrc_1024B16();
779 }
780
781 bool isAISrc_1024F32() const {
782 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
783 }
784
785 bool isAISrc_1024F16() const {
786 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
787 }
788
789 bool isAISrc_1024V2F16() const {
790 return isAISrc_1024F16() || isAISrc_1024B32();
791 }
792
793 bool isKImmFP32() const {
794 return isLiteralImm(MVT::f32);
795 }
796
797 bool isKImmFP16() const {
798 return isLiteralImm(MVT::f16);
799 }
800
801 bool isMem() const override {
802 return false;
803 }
804
805 bool isExpr() const {
806 return Kind == Expression;
807 }
808
809 bool isSoppBrTarget() const {
810 return isExpr() || isImm();
811 }
812
813 bool isSWaitCnt() const;
814 bool isHwreg() const;
815 bool isSendMsg() const;
816 bool isSwizzle() const;
817 bool isSMRDOffset8() const;
818 bool isSMEMOffset() const;
819 bool isSMRDLiteralOffset() const;
820 bool isDPP8() const;
821 bool isDPPCtrl() const;
822 bool isBLGP() const;
823 bool isCBSZ() const;
824 bool isABID() const;
825 bool isGPRIdxMode() const;
826 bool isS16Imm() const;
827 bool isU16Imm() const;
828 bool isEndpgm() const;
829
830 StringRef getExpressionAsToken() const {
831 assert(isExpr())(static_cast<void> (0));
832 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
833 return S->getSymbol().getName();
834 }
835
836 StringRef getToken() const {
837 assert(isToken())(static_cast<void> (0));
838
839 if (Kind == Expression)
840 return getExpressionAsToken();
841
842 return StringRef(Tok.Data, Tok.Length);
843 }
844
845 int64_t getImm() const {
846 assert(isImm())(static_cast<void> (0));
847 return Imm.Val;
848 }
849
850 void setImm(int64_t Val) {
851 assert(isImm())(static_cast<void> (0));
852 Imm.Val = Val;
853 }
854
855 ImmTy getImmTy() const {
856 assert(isImm())(static_cast<void> (0));
857 return Imm.Type;
858 }
859
860 unsigned getReg() const override {
861 assert(isRegKind())(static_cast<void> (0));
862 return Reg.RegNo;
863 }
864
865 SMLoc getStartLoc() const override {
866 return StartLoc;
867 }
868
869 SMLoc getEndLoc() const override {
870 return EndLoc;
871 }
872
873 SMRange getLocRange() const {
874 return SMRange(StartLoc, EndLoc);
875 }
876
877 Modifiers getModifiers() const {
878 assert(isRegKind() || isImmTy(ImmTyNone))(static_cast<void> (0));
879 return isRegKind() ? Reg.Mods : Imm.Mods;
880 }
881
882 void setModifiers(Modifiers Mods) {
883 assert(isRegKind() || isImmTy(ImmTyNone))(static_cast<void> (0));
884 if (isRegKind())
885 Reg.Mods = Mods;
886 else
887 Imm.Mods = Mods;
888 }
889
890 bool hasModifiers() const {
891 return getModifiers().hasModifiers();
892 }
893
894 bool hasFPModifiers() const {
895 return getModifiers().hasFPModifiers();
896 }
897
898 bool hasIntModifiers() const {
899 return getModifiers().hasIntModifiers();
900 }
901
902 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
903
904 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
905
906 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
907
908 template <unsigned Bitwidth>
909 void addKImmFPOperands(MCInst &Inst, unsigned N) const;
910
911 void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
912 addKImmFPOperands<16>(Inst, N);
913 }
914
915 void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
916 addKImmFPOperands<32>(Inst, N);
917 }
918
919 void addRegOperands(MCInst &Inst, unsigned N) const;
920
921 void addBoolRegOperands(MCInst &Inst, unsigned N) const {
922 addRegOperands(Inst, N);
923 }
924
925 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
926 if (isRegKind())
927 addRegOperands(Inst, N);
928 else if (isExpr())
929 Inst.addOperand(MCOperand::createExpr(Expr));
930 else
931 addImmOperands(Inst, N);
932 }
933
934 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
935 Modifiers Mods = getModifiers();
936 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
937 if (isRegKind()) {
938 addRegOperands(Inst, N);
939 } else {
940 addImmOperands(Inst, N, false);
941 }
942 }
943
944 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
945 assert(!hasIntModifiers())(static_cast<void> (0));
946 addRegOrImmWithInputModsOperands(Inst, N);
947 }
948
949 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
950 assert(!hasFPModifiers())(static_cast<void> (0));
951 addRegOrImmWithInputModsOperands(Inst, N);
952 }
953
954 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
955 Modifiers Mods = getModifiers();
956 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
957 assert(isRegKind())(static_cast<void> (0));
958 addRegOperands(Inst, N);
959 }
960
961 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
962 assert(!hasIntModifiers())(static_cast<void> (0));
963 addRegWithInputModsOperands(Inst, N);
964 }
965
966 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
967 assert(!hasFPModifiers())(static_cast<void> (0));
968 addRegWithInputModsOperands(Inst, N);
969 }
970
971 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
972 if (isImm())
973 addImmOperands(Inst, N);
974 else {
975 assert(isExpr())(static_cast<void> (0));
976 Inst.addOperand(MCOperand::createExpr(Expr));
977 }
978 }
979
980 static void printImmTy(raw_ostream& OS, ImmTy Type) {
981 switch (Type) {
982 case ImmTyNone: OS << "None"; break;
983 case ImmTyGDS: OS << "GDS"; break;
984 case ImmTyLDS: OS << "LDS"; break;
985 case ImmTyOffen: OS << "Offen"; break;
986 case ImmTyIdxen: OS << "Idxen"; break;
987 case ImmTyAddr64: OS << "Addr64"; break;
988 case ImmTyOffset: OS << "Offset"; break;
989 case ImmTyInstOffset: OS << "InstOffset"; break;
990 case ImmTyOffset0: OS << "Offset0"; break;
991 case ImmTyOffset1: OS << "Offset1"; break;
992 case ImmTyCPol: OS << "CPol"; break;
993 case ImmTySWZ: OS << "SWZ"; break;
994 case ImmTyTFE: OS << "TFE"; break;
995 case ImmTyD16: OS << "D16"; break;
996 case ImmTyFORMAT: OS << "FORMAT"; break;
997 case ImmTyClampSI: OS << "ClampSI"; break;
998 case ImmTyOModSI: OS << "OModSI"; break;
999 case ImmTyDPP8: OS << "DPP8"; break;
1000 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1001 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1002 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1003 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1004 case ImmTyDppFi: OS << "FI"; break;
1005 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1006 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1007 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1008 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1009 case ImmTyDMask: OS << "DMask"; break;
1010 case ImmTyDim: OS << "Dim"; break;
1011 case ImmTyUNorm: OS << "UNorm"; break;
1012 case ImmTyDA: OS << "DA"; break;
1013 case ImmTyR128A16: OS << "R128A16"; break;
1014 case ImmTyA16: OS << "A16"; break;
1015 case ImmTyLWE: OS << "LWE"; break;
1016 case ImmTyOff: OS << "Off"; break;
1017 case ImmTyExpTgt: OS << "ExpTgt"; break;
1018 case ImmTyExpCompr: OS << "ExpCompr"; break;
1019 case ImmTyExpVM: OS << "ExpVM"; break;
1020 case ImmTyHwreg: OS << "Hwreg"; break;
1021 case ImmTySendMsg: OS << "SendMsg"; break;
1022 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1023 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1024 case ImmTyAttrChan: OS << "AttrChan"; break;
1025 case ImmTyOpSel: OS << "OpSel"; break;
1026 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1027 case ImmTyNegLo: OS << "NegLo"; break;
1028 case ImmTyNegHi: OS << "NegHi"; break;
1029 case ImmTySwizzle: OS << "Swizzle"; break;
1030 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1031 case ImmTyHigh: OS << "High"; break;
1032 case ImmTyBLGP: OS << "BLGP"; break;
1033 case ImmTyCBSZ: OS << "CBSZ"; break;
1034 case ImmTyABID: OS << "ABID"; break;
1035 case ImmTyEndpgm: OS << "Endpgm"; break;
1036 }
1037 }
1038
1039 void print(raw_ostream &OS) const override {
1040 switch (Kind) {
1041 case Register:
1042 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1043 break;
1044 case Immediate:
1045 OS << '<' << getImm();
1046 if (getImmTy() != ImmTyNone) {
1047 OS << " type: "; printImmTy(OS, getImmTy());
1048 }
1049 OS << " mods: " << Imm.Mods << '>';
1050 break;
1051 case Token:
1052 OS << '\'' << getToken() << '\'';
1053 break;
1054 case Expression:
1055 OS << "<expr " << *Expr << '>';
1056 break;
1057 }
1058 }
1059
1060 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1061 int64_t Val, SMLoc Loc,
1062 ImmTy Type = ImmTyNone,
1063 bool IsFPImm = false) {
1064 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1065 Op->Imm.Val = Val;
1066 Op->Imm.IsFPImm = IsFPImm;
1067 Op->Imm.Kind = ImmKindTyNone;
1068 Op->Imm.Type = Type;
1069 Op->Imm.Mods = Modifiers();
1070 Op->StartLoc = Loc;
1071 Op->EndLoc = Loc;
1072 return Op;
1073 }
1074
1075 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1076 StringRef Str, SMLoc Loc,
1077 bool HasExplicitEncodingSize = true) {
1078 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1079 Res->Tok.Data = Str.data();
1080 Res->Tok.Length = Str.size();
1081 Res->StartLoc = Loc;
1082 Res->EndLoc = Loc;
1083 return Res;
1084 }
1085
1086 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1087 unsigned RegNo, SMLoc S,
1088 SMLoc E) {
1089 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1090 Op->Reg.RegNo = RegNo;
1091 Op->Reg.Mods = Modifiers();
1092 Op->StartLoc = S;
1093 Op->EndLoc = E;
1094 return Op;
1095 }
1096
1097 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1098 const class MCExpr *Expr, SMLoc S) {
1099 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1100 Op->Expr = Expr;
1101 Op->StartLoc = S;
1102 Op->EndLoc = S;
1103 return Op;
1104 }
1105};
1106
1107raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1108 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1109 return OS;
1110}
1111
1112//===----------------------------------------------------------------------===//
1113// AsmParser
1114//===----------------------------------------------------------------------===//
1115
1116// Holds info related to the current kernel, e.g. count of SGPRs used.
1117// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1118// .amdgpu_hsa_kernel or at EOF.
1119class KernelScopeInfo {
1120 int SgprIndexUnusedMin = -1;
1121 int VgprIndexUnusedMin = -1;
1122 MCContext *Ctx = nullptr;
1123
1124 void usesSgprAt(int i) {
1125 if (i >= SgprIndexUnusedMin) {
1126 SgprIndexUnusedMin = ++i;
1127 if (Ctx) {
1128 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1129 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1130 }
1131 }
1132 }
1133
1134 void usesVgprAt(int i) {
1135 if (i >= VgprIndexUnusedMin) {
1136 VgprIndexUnusedMin = ++i;
1137 if (Ctx) {
1138 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1139 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1140 }
1141 }
1142 }
1143
1144public:
1145 KernelScopeInfo() = default;
1146
1147 void initialize(MCContext &Context) {
1148 Ctx = &Context;
1149 usesSgprAt(SgprIndexUnusedMin = -1);
1150 usesVgprAt(VgprIndexUnusedMin = -1);
1151 }
1152
1153 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1154 switch (RegKind) {
1155 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1156 case IS_AGPR: // fall through
1157 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1158 default: break;
1159 }
1160 }
1161};
1162
1163class AMDGPUAsmParser : public MCTargetAsmParser {
1164 MCAsmParser &Parser;
1165
1166 // Number of extra operands parsed after the first optional operand.
1167 // This may be necessary to skip hardcoded mandatory operands.
1168 static const unsigned MAX_OPR_LOOKAHEAD = 8;
1169
1170 unsigned ForcedEncodingSize = 0;
1171 bool ForcedDPP = false;
1172 bool ForcedSDWA = false;
1173 KernelScopeInfo KernelScope;
1174 unsigned CPolSeen;
1175
1176 /// @name Auto-generated Match Functions
1177 /// {
1178
1179#define GET_ASSEMBLER_HEADER
1180#include "AMDGPUGenAsmMatcher.inc"
1181
1182 /// }
1183
1184private:
1185 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1186 bool OutOfRangeError(SMRange Range);
1187 /// Calculate VGPR/SGPR blocks required for given target, reserved
1188 /// registers, and user-specified NextFreeXGPR values.
1189 ///
1190 /// \param Features [in] Target features, used for bug corrections.
1191 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1192 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1193 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1194 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1195 /// descriptor field, if valid.
1196 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1197 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1198 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1199 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1200 /// \param VGPRBlocks [out] Result VGPR block count.
1201 /// \param SGPRBlocks [out] Result SGPR block count.
1202 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1203 bool FlatScrUsed, bool XNACKUsed,
1204 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1205 SMRange VGPRRange, unsigned NextFreeSGPR,
1206 SMRange SGPRRange, unsigned &VGPRBlocks,
1207 unsigned &SGPRBlocks);
1208 bool ParseDirectiveAMDGCNTarget();
1209 bool ParseDirectiveAMDHSAKernel();
1210 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1211 bool ParseDirectiveHSACodeObjectVersion();
1212 bool ParseDirectiveHSACodeObjectISA();
1213 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1214 bool ParseDirectiveAMDKernelCodeT();
1215 // TODO: Possibly make subtargetHasRegister const.
1216 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1217 bool ParseDirectiveAMDGPUHsaKernel();
1218
1219 bool ParseDirectiveISAVersion();
1220 bool ParseDirectiveHSAMetadata();
1221 bool ParseDirectivePALMetadataBegin();
1222 bool ParseDirectivePALMetadata();
1223 bool ParseDirectiveAMDGPULDS();
1224
1225 /// Common code to parse out a block of text (typically YAML) between start and
1226 /// end directives.
1227 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1228 const char *AssemblerDirectiveEnd,
1229 std::string &CollectString);
1230
1231 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1232 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1233 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1234 unsigned &RegNum, unsigned &RegWidth,
1235 bool RestoreOnFailure = false);
1236 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1237 unsigned &RegNum, unsigned &RegWidth,
1238 SmallVectorImpl<AsmToken> &Tokens);
1239 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1240 unsigned &RegWidth,
1241 SmallVectorImpl<AsmToken> &Tokens);
1242 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1243 unsigned &RegWidth,
1244 SmallVectorImpl<AsmToken> &Tokens);
1245 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1246 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1247 bool ParseRegRange(unsigned& Num, unsigned& Width);
1248 unsigned getRegularReg(RegisterKind RegKind,
1249 unsigned RegNum,
1250 unsigned RegWidth,
1251 SMLoc Loc);
1252
1253 bool isRegister();
1254 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1255 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1256 void initializeGprCountSymbol(RegisterKind RegKind);
1257 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1258 unsigned RegWidth);
1259 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1260 bool IsAtomic, bool IsLds = false);
1261 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1262 bool IsGdsHardcoded);
1263
1264public:
1265 enum AMDGPUMatchResultTy {
1266 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1267 };
1268 enum OperandMode {
1269 OperandMode_Default,
1270 OperandMode_NSA,
1271 };
1272
1273 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1274
1275 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1276 const MCInstrInfo &MII,
1277 const MCTargetOptions &Options)
1278 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1279 MCAsmParserExtension::Initialize(Parser);
1280
1281 if (getFeatureBits().none()) {
1282 // Set default features.
1283 copySTI().ToggleFeature("southern-islands");
1284 }
1285
1286 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1287
1288 {
1289 // TODO: make those pre-defined variables read-only.
1290 // Currently there is none suitable machinery in the core llvm-mc for this.
1291 // MCSymbol::isRedefinable is intended for another purpose, and
1292 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1293 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1294 MCContext &Ctx = getContext();
1295 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1296 MCSymbol *Sym =
1297 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1298 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1299 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1300 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1301 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1302 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1303 } else {
1304 MCSymbol *Sym =
1305 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1306 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1307 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1308 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1309 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1311 }
1312 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1313 initializeGprCountSymbol(IS_VGPR);
1314 initializeGprCountSymbol(IS_SGPR);
1315 } else
1316 KernelScope.initialize(getContext());
1317 }
1318 }
1319
1320 bool hasMIMG_R128() const {
1321 return AMDGPU::hasMIMG_R128(getSTI());
1322 }
1323
1324 bool hasPackedD16() const {
1325 return AMDGPU::hasPackedD16(getSTI());
1326 }
1327
1328 bool hasGFX10A16() const {
1329 return AMDGPU::hasGFX10A16(getSTI());
1330 }
1331
1332 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1333
1334 bool isSI() const {
1335 return AMDGPU::isSI(getSTI());
1336 }
1337
1338 bool isCI() const {
1339 return AMDGPU::isCI(getSTI());
1340 }
1341
1342 bool isVI() const {
1343 return AMDGPU::isVI(getSTI());
1344 }
1345
1346 bool isGFX9() const {
1347 return AMDGPU::isGFX9(getSTI());
1348 }
1349
1350 bool isGFX90A() const {
1351 return AMDGPU::isGFX90A(getSTI());
1352 }
1353
1354 bool isGFX9Plus() const {
1355 return AMDGPU::isGFX9Plus(getSTI());
1356 }
1357
1358 bool isGFX10() const {
1359 return AMDGPU::isGFX10(getSTI());
1360 }
1361
1362 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1363
1364 bool isGFX10_BEncoding() const {
1365 return AMDGPU::isGFX10_BEncoding(getSTI());
1366 }
1367
1368 bool hasInv2PiInlineImm() const {
1369 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1370 }
1371
1372 bool hasFlatOffsets() const {
1373 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1374 }
1375
1376 bool hasArchitectedFlatScratch() const {
1377 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1378 }
1379
1380 bool hasSGPR102_SGPR103() const {
1381 return !isVI() && !isGFX9();
1382 }
1383
1384 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1385
1386 bool hasIntClamp() const {
1387 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1388 }
1389
1390 AMDGPUTargetStreamer &getTargetStreamer() {
1391 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1392 return static_cast<AMDGPUTargetStreamer &>(TS);
1393 }
1394
1395 const MCRegisterInfo *getMRI() const {
1396 // We need this const_cast because for some reason getContext() is not const
1397 // in MCAsmParser.
1398 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1399 }
1400
1401 const MCInstrInfo *getMII() const {
1402 return &MII;
1403 }
1404
1405 const FeatureBitset &getFeatureBits() const {
1406 return getSTI().getFeatureBits();
1407 }
1408
1409 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1410 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1411 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1412
1413 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1414 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1415 bool isForcedDPP() const { return ForcedDPP; }
1416 bool isForcedSDWA() const { return ForcedSDWA; }
1417 ArrayRef<unsigned> getMatchedVariants() const;
1418 StringRef getMatchedVariantName() const;
1419
1420 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1421 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1422 bool RestoreOnFailure);
1423 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1424 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1425 SMLoc &EndLoc) override;
1426 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1427 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1428 unsigned Kind) override;
1429 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1430 OperandVector &Operands, MCStreamer &Out,
1431 uint64_t &ErrorInfo,
1432 bool MatchingInlineAsm) override;
1433 bool ParseDirective(AsmToken DirectiveID) override;
1434 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1435 OperandMode Mode = OperandMode_Default);
1436 StringRef parseMnemonicSuffix(StringRef Name);
1437 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1438 SMLoc NameLoc, OperandVector &Operands) override;
1439 //bool ProcessInstruction(MCInst &Inst);
1440
1441 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1442
1443 OperandMatchResultTy
1444 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1445 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1446 bool (*ConvertResult)(int64_t &) = nullptr);
1447
1448 OperandMatchResultTy
1449 parseOperandArrayWithPrefix(const char *Prefix,
1450 OperandVector &Operands,
1451 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1452 bool (*ConvertResult)(int64_t&) = nullptr);
1453
1454 OperandMatchResultTy
1455 parseNamedBit(StringRef Name, OperandVector &Operands,
1456 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1457 OperandMatchResultTy parseCPol(OperandVector &Operands);
1458 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1459 StringRef &Value,
1460 SMLoc &StringLoc);
1461
1462 bool isModifier();
1463 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1464 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1465 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1466 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1467 bool parseSP3NegModifier();
1468 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1469 OperandMatchResultTy parseReg(OperandVector &Operands);
1470 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1471 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1472 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1473 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1474 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1475 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1476 OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1477 OperandMatchResultTy parseUfmt(int64_t &Format);
1478 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1479 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1480 OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1481 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1482 OperandMatchResultTy parseNumericFormat(int64_t &Format);
1483 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1484 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1485
1486 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1487 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1488 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1489 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1490
1491 bool parseCnt(int64_t &IntVal);
1492 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1493 OperandMatchResultTy parseHwreg(OperandVector &Operands);
1494
1495private:
1496 struct OperandInfoTy {
1497 SMLoc Loc;
1498 int64_t Id;
1499 bool IsSymbolic = false;
1500 bool IsDefined = false;
1501
1502 OperandInfoTy(int64_t Id_) : Id(Id_) {}
1503 };
1504
1505 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1506 bool validateSendMsg(const OperandInfoTy &Msg,
1507 const OperandInfoTy &Op,
1508 const OperandInfoTy &Stream);
1509
1510 bool parseHwregBody(OperandInfoTy &HwReg,
1511 OperandInfoTy &Offset,
1512 OperandInfoTy &Width);
1513 bool validateHwreg(const OperandInfoTy &HwReg,
1514 const OperandInfoTy &Offset,
1515 const OperandInfoTy &Width);
1516
1517 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1518 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1519
1520 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1521 const OperandVector &Operands) const;
1522 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1523 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1524 SMLoc getLitLoc(const OperandVector &Operands) const;
1525 SMLoc getConstLoc(const OperandVector &Operands) const;
1526
1527 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1528 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1529 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1530 bool validateSOPLiteral(const MCInst &Inst) const;
1531 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1532 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1533 bool validateIntClampSupported(const MCInst &Inst);
1534 bool validateMIMGAtomicDMask(const MCInst &Inst);
1535 bool validateMIMGGatherDMask(const MCInst &Inst);
1536 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1537 bool validateMIMGDataSize(const MCInst &Inst);
1538 bool validateMIMGAddrSize(const MCInst &Inst);
1539 bool validateMIMGD16(const MCInst &Inst);
1540 bool validateMIMGDim(const MCInst &Inst);
1541 bool validateMIMGMSAA(const MCInst &Inst);
1542 bool validateOpSel(const MCInst &Inst);
1543 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1544 bool validateVccOperand(unsigned Reg) const;
1545 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1546 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1547 bool validateAGPRLdSt(const MCInst &Inst) const;
1548 bool validateVGPRAlign(const MCInst &Inst) const;
1549 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1550 bool validateDivScale(const MCInst &Inst);
1551 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1552 const SMLoc &IDLoc);
1553 Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1554 unsigned getConstantBusLimit(unsigned Opcode) const;
1555 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1556 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1557 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1558
1559 bool isSupportedMnemo(StringRef Mnemo,
1560 const FeatureBitset &FBS);
1561 bool isSupportedMnemo(StringRef Mnemo,
1562 const FeatureBitset &FBS,
1563 ArrayRef<unsigned> Variants);
1564 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1565
1566 bool isId(const StringRef Id) const;
1567 bool isId(const AsmToken &Token, const StringRef Id) const;
1568 bool isToken(const AsmToken::TokenKind Kind) const;
1569 bool trySkipId(const StringRef Id);
1570 bool trySkipId(const StringRef Pref, const StringRef Id);
1571 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1572 bool trySkipToken(const AsmToken::TokenKind Kind);
1573 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1574 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1575 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1576
1577 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1578 AsmToken::TokenKind getTokenKind() const;
1579 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1580 bool parseExpr(OperandVector &Operands);
1581 StringRef getTokenStr() const;
1582 AsmToken peekToken();
1583 AsmToken getToken() const;
1584 SMLoc getLoc() const;
1585 void lex();
1586
1587public:
1588 void onBeginOfFile() override;
1589
1590 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1591 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1592
1593 OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1594 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1595 OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1596 OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1597 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1598 OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1599
1600 bool parseSwizzleOperand(int64_t &Op,
1601 const unsigned MinVal,
1602 const unsigned MaxVal,
1603 const StringRef ErrMsg,
1604 SMLoc &Loc);
1605 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1606 const unsigned MinVal,
1607 const unsigned MaxVal,
1608 const StringRef ErrMsg);
1609 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1610 bool parseSwizzleOffset(int64_t &Imm);
1611 bool parseSwizzleMacro(int64_t &Imm);
1612 bool parseSwizzleQuadPerm(int64_t &Imm);
1613 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1614 bool parseSwizzleBroadcast(int64_t &Imm);
1615 bool parseSwizzleSwap(int64_t &Imm);
1616 bool parseSwizzleReverse(int64_t &Imm);
1617
1618 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1619 int64_t parseGPRIdxMacro();
1620
1621 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1622 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1623 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1624 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1625
1626 AMDGPUOperand::Ptr defaultCPol() const;
1627
1628 AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1629 AMDGPUOperand::Ptr defaultSMEMOffset() const;
1630 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1631 AMDGPUOperand::Ptr defaultFlatOffset() const;
1632
1633 OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1634
1635 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1636 OptionalImmIndexMap &OptionalIdx);
1637 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1638 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1639 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1640 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1641 OptionalImmIndexMap &OptionalIdx);
1642
1643 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1644
1645 void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1646 bool IsAtomic = false);
1647 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1648 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1649
1650 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1651
1652 bool parseDimId(unsigned &Encoding);
1653 OperandMatchResultTy parseDim(OperandVector &Operands);
1654 OperandMatchResultTy parseDPP8(OperandVector &Operands);
1655 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1656 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1657 int64_t parseDPPCtrlSel(StringRef Ctrl);
1658 int64_t parseDPPCtrlPerm();
1659 AMDGPUOperand::Ptr defaultRowMask() const;
1660 AMDGPUOperand::Ptr defaultBankMask() const;
1661 AMDGPUOperand::Ptr defaultBoundCtrl() const;
1662 AMDGPUOperand::Ptr defaultFI() const;
1663 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1664 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1665
1666 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1667 AMDGPUOperand::ImmTy Type);
1668 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1669 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1670 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1671 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1672 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1673 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1674 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1675 uint64_t BasicInstType,
1676 bool SkipDstVcc = false,
1677 bool SkipSrcVcc = false);
1678
1679 AMDGPUOperand::Ptr defaultBLGP() const;
1680 AMDGPUOperand::Ptr defaultCBSZ() const;
1681 AMDGPUOperand::Ptr defaultABID() const;
1682
1683 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1684 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1685};
1686
1687struct OptionalOperand {
1688 const char *Name;
1689 AMDGPUOperand::ImmTy Type;
1690 bool IsBit;
1691 bool (*ConvertResult)(int64_t&);
1692};
1693
1694} // end anonymous namespace
1695
1696// May be called with integer type with equivalent bitwidth.
1697static const fltSemantics *getFltSemantics(unsigned Size) {
1698 switch (Size) {
1699 case 4:
1700 return &APFloat::IEEEsingle();
1701 case 8:
1702 return &APFloat::IEEEdouble();
1703 case 2:
1704 return &APFloat::IEEEhalf();
1705 default:
1706 llvm_unreachable("unsupported fp type")__builtin_unreachable();
1707 }
1708}
1709
1710static const fltSemantics *getFltSemantics(MVT VT) {
1711 return getFltSemantics(VT.getSizeInBits() / 8);
1712}
1713
1714static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1715 switch (OperandType) {
1716 case AMDGPU::OPERAND_REG_IMM_INT32:
1717 case AMDGPU::OPERAND_REG_IMM_FP32:
1718 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1719 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1720 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1721 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1722 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1723 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1724 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1725 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1726 return &APFloat::IEEEsingle();
1727 case AMDGPU::OPERAND_REG_IMM_INT64:
1728 case AMDGPU::OPERAND_REG_IMM_FP64:
1729 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1730 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1731 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1732 return &APFloat::IEEEdouble();
1733 case AMDGPU::OPERAND_REG_IMM_INT16:
1734 case AMDGPU::OPERAND_REG_IMM_FP16:
1735 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1736 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1737 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1738 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1739 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1740 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1741 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1742 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1743 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1744 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1745 return &APFloat::IEEEhalf();
1746 default:
1747 llvm_unreachable("unsupported fp type")__builtin_unreachable();
1748 }
1749}
1750
1751//===----------------------------------------------------------------------===//
1752// Operand
1753//===----------------------------------------------------------------------===//
1754
1755static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1756 bool Lost;
1757
1758 // Convert literal to single precision
1759 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1760 APFloat::rmNearestTiesToEven,
1761 &Lost);
1762 // We allow precision lost but not overflow or underflow
1763 if (Status != APFloat::opOK &&
1764 Lost &&
1765 ((Status & APFloat::opOverflow) != 0 ||
1766 (Status & APFloat::opUnderflow) != 0)) {
1767 return false;
1768 }
1769
1770 return true;
1771}
1772
1773static bool isSafeTruncation(int64_t Val, unsigned Size) {
1774 return isUIntN(Size, Val) || isIntN(Size, Val);
1775}
1776
1777static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1778 if (VT.getScalarType() == MVT::i16) {
1779 // FP immediate values are broken.
1780 return isInlinableIntLiteral(Val);
1781 }
1782
1783 // f16/v2f16 operands work correctly for all values.
1784 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1785}
1786
1787bool AMDGPUOperand::isInlinableImm(MVT type) const {
1788
1789 // This is a hack to enable named inline values like
1790 // shared_base with both 32-bit and 64-bit operands.
1791 // Note that these values are defined as
1792 // 32-bit operands only.
1793 if (isInlineValue()) {
1794 return true;
1795 }
1796
1797 if (!isImmTy(ImmTyNone)) {
1798 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1799 return false;
1800 }
1801 // TODO: We should avoid using host float here. It would be better to
1802 // check the float bit values which is what a few other places do.
1803 // We've had bot failures before due to weird NaN support on mips hosts.
1804
1805 APInt Literal(64, Imm.Val);
1806
1807 if (Imm.IsFPImm) { // We got fp literal token
1808 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1809 return AMDGPU::isInlinableLiteral64(Imm.Val,
1810 AsmParser->hasInv2PiInlineImm());
1811 }
1812
1813 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1814 if (!canLosslesslyConvertToFPType(FPLiteral, type))
1815 return false;
1816
1817 if (type.getScalarSizeInBits() == 16) {
1818 return isInlineableLiteralOp16(
1819 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1820 type, AsmParser->hasInv2PiInlineImm());
1821 }
1822
1823 // Check if single precision literal is inlinable
1824 return AMDGPU::isInlinableLiteral32(
1825 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1826 AsmParser->hasInv2PiInlineImm());
1827 }
1828
1829 // We got int literal token.
1830 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1831 return AMDGPU::isInlinableLiteral64(Imm.Val,
1832 AsmParser->hasInv2PiInlineImm());
1833 }
1834
1835 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1836 return false;
1837 }
1838
1839 if (type.getScalarSizeInBits() == 16) {
1840 return isInlineableLiteralOp16(
1841 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1842 type, AsmParser->hasInv2PiInlineImm());
1843 }
1844
1845 return AMDGPU::isInlinableLiteral32(
1846 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1847 AsmParser->hasInv2PiInlineImm());
1848}
1849
1850bool AMDGPUOperand::isLiteralImm(MVT type) const {
1851 // Check that this immediate can be added as literal
1852 if (!isImmTy(ImmTyNone)) {
1853 return false;
1854 }
1855
1856 if (!Imm.IsFPImm) {
1857 // We got int literal token.
1858
1859 if (type == MVT::f64 && hasFPModifiers()) {
1860 // Cannot apply fp modifiers to int literals preserving the same semantics
1861 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1862 // disable these cases.
1863 return false;
1864 }
1865
1866 unsigned Size = type.getSizeInBits();
1867 if (Size == 64)
1868 Size = 32;
1869
1870 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1871 // types.
1872 return isSafeTruncation(Imm.Val, Size);
1873 }
1874
1875 // We got fp literal token
1876 if (type == MVT::f64) { // Expected 64-bit fp operand
1877 // We would set low 64-bits of literal to zeroes but we accept this literals
1878 return true;
1879 }
1880
1881 if (type == MVT::i64) { // Expected 64-bit int operand
1882 // We don't allow fp literals in 64-bit integer instructions. It is
1883 // unclear how we should encode them.
1884 return false;
1885 }
1886
1887 // We allow fp literals with f16x2 operands assuming that the specified
1888 // literal goes into the lower half and the upper half is zero. We also
1889 // require that the literal may be losslesly converted to f16.
1890 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1891 (type == MVT::v2i16)? MVT::i16 :
1892 (type == MVT::v2f32)? MVT::f32 : type;
1893
1894 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1895 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1896}
1897
1898bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1899 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1900}
1901
1902bool AMDGPUOperand::isVRegWithInputMods() const {
1903 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1904 // GFX90A allows DPP on 64-bit operands.
1905 (isRegClass(AMDGPU::VReg_64RegClassID) &&
1906 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1907}
1908
1909bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1910 if (AsmParser->isVI())
1911 return isVReg32();
1912 else if (AsmParser->isGFX9Plus())
1913 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1914 else
1915 return false;
1916}
1917
1918bool AMDGPUOperand::isSDWAFP16Operand() const {
1919 return isSDWAOperand(MVT::f16);
1920}
1921
1922bool AMDGPUOperand::isSDWAFP32Operand() const {
1923 return isSDWAOperand(MVT::f32);
1924}
1925
1926bool AMDGPUOperand::isSDWAInt16Operand() const {
1927 return isSDWAOperand(MVT::i16);
1928}
1929
1930bool AMDGPUOperand::isSDWAInt32Operand() const {
1931 return isSDWAOperand(MVT::i32);
1932}
1933
1934bool AMDGPUOperand::isBoolReg() const {
1935 auto FB = AsmParser->getFeatureBits();
1936 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1937 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1938}
1939
1940uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1941{
1942 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers())(static_cast<void> (0));
1943 assert(Size == 2 || Size == 4 || Size == 8)(static_cast<void> (0));
1944
1945 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1946
1947 if (Imm.Mods.Abs) {
1948 Val &= ~FpSignMask;
1949 }
1950 if (Imm.Mods.Neg) {
1951 Val ^= FpSignMask;
1952 }
1953
1954 return Val;
1955}
1956
1957void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1958 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1959 Inst.getNumOperands())) {
1960 addLiteralImmOperand(Inst, Imm.Val,
1961 ApplyModifiers &
1962 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1963 } else {
1964 assert(!isImmTy(ImmTyNone) || !hasModifiers())(static_cast<void> (0));
1965 Inst.addOperand(MCOperand::createImm(Imm.Val));
1966 setImmKindNone();
1967 }
1968}
1969
1970void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1971 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1972 auto OpNum = Inst.getNumOperands();
1973 // Check that this operand accepts literals
1974 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum))(static_cast<void> (0));
1975
1976 if (ApplyModifiers) {
1977 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum))(static_cast<void> (0));
1978 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1979 Val = applyInputFPModifiers(Val, Size);
1980 }
1981
1982 APInt Literal(64, Val);
1983 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1984
1985 if (Imm.IsFPImm) { // We got fp literal token
1986 switch (OpTy) {
1987 case AMDGPU::OPERAND_REG_IMM_INT64:
1988 case AMDGPU::OPERAND_REG_IMM_FP64:
1989 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1990 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1991 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1992 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1993 AsmParser->hasInv2PiInlineImm())) {
1994 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1995 setImmKindConst();
1996 return;
1997 }
1998
1999 // Non-inlineable
2000 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2001 // For fp operands we check if low 32 bits are zeros
2002 if (Literal.getLoBits(32) != 0) {
2003 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2004 "Can't encode literal as exact 64-bit floating-point operand. "
2005 "Low 32-bits will be set to zero");
2006 }
2007
2008 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2009 setImmKindLiteral();
2010 return;
2011 }
2012
2013 // We don't allow fp literals in 64-bit integer instructions. It is
2014 // unclear how we should encode them. This case should be checked earlier
2015 // in predicate methods (isLiteralImm())
2016 llvm_unreachable("fp literal in 64-bit integer instruction.")__builtin_unreachable();
2017
2018 case AMDGPU::OPERAND_REG_IMM_INT32:
2019 case AMDGPU::OPERAND_REG_IMM_FP32:
2020 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2021 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2022 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2023 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2024 case AMDGPU::OPERAND_REG_IMM_INT16:
2025 case AMDGPU::OPERAND_REG_IMM_FP16:
2026 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2027 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2028 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2029 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2030 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2031 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2032 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2033 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2034 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2035 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2036 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2037 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2038 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2039 case AMDGPU::OPERAND_REG_IMM_V2INT32: {
2040 bool lost;
2041 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2042 // Convert literal to single precision
2043 FPLiteral.convert(*getOpFltSemantics(OpTy),
2044 APFloat::rmNearestTiesToEven, &lost);
2045 // We allow precision lost but not overflow or underflow. This should be
2046 // checked earlier in isLiteralImm()
2047
2048 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2049 Inst.addOperand(MCOperand::createImm(ImmVal));
2050 setImmKindLiteral();
2051 return;
2052 }
2053 default:
2054 llvm_unreachable("invalid operand size")__builtin_unreachable();
2055 }
2056
2057 return;
2058 }
2059
2060 // We got int literal token.
2061 // Only sign extend inline immediates.
2062 switch (OpTy) {
2063 case AMDGPU::OPERAND_REG_IMM_INT32:
2064 case AMDGPU::OPERAND_REG_IMM_FP32:
2065 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2066 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2067 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2068 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2069 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2070 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2071 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2072 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2073 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2074 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2075 if (isSafeTruncation(Val, 32) &&
2076 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2077 AsmParser->hasInv2PiInlineImm())) {
2078 Inst.addOperand(MCOperand::createImm(Val));
2079 setImmKindConst();
2080 return;
2081 }
2082
2083 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2084 setImmKindLiteral();
2085 return;
2086
2087 case AMDGPU::OPERAND_REG_IMM_INT64:
2088 case AMDGPU::OPERAND_REG_IMM_FP64:
2089 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2090 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2091 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2092 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2093 Inst.addOperand(MCOperand::createImm(Val));
2094 setImmKindConst();
2095 return;
2096 }
2097
2098 Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2099 setImmKindLiteral();
2100 return;
2101
2102 case AMDGPU::OPERAND_REG_IMM_INT16:
2103 case AMDGPU::OPERAND_REG_IMM_FP16:
2104 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2105 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2106 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2107 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2108 if (isSafeTruncation(Val, 16) &&
2109 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2110 AsmParser->hasInv2PiInlineImm())) {
2111 Inst.addOperand(MCOperand::createImm(Val));
2112 setImmKindConst();
2113 return;
2114 }
2115
2116 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2117 setImmKindLiteral();
2118 return;
2119
2120 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2121 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2122 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2123 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2124 assert(isSafeTruncation(Val, 16))(static_cast<void> (0));
2125 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),(static_cast<void> (0))
2126 AsmParser->hasInv2PiInlineImm()))(static_cast<void> (0));
2127
2128 Inst.addOperand(MCOperand::createImm(Val));
2129 return;
2130 }
2131 default:
2132 llvm_unreachable("invalid operand size")__builtin_unreachable();
2133 }
2134}
2135
2136template <unsigned Bitwidth>
2137void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2138 APInt Literal(64, Imm.Val);
2139 setImmKindNone();
2140
2141 if (!Imm.IsFPImm) {
2142 // We got int literal token.
2143 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2144 return;
2145 }
2146
2147 bool Lost;
2148 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2149 FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2150 APFloat::rmNearestTiesToEven, &Lost);
2151 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2152}
2153
2154void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2155 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2156}
2157
2158static bool isInlineValue(unsigned Reg) {
2159 switch (Reg) {
2160 case AMDGPU::SRC_SHARED_BASE:
2161 case AMDGPU::SRC_SHARED_LIMIT:
2162 case AMDGPU::SRC_PRIVATE_BASE:
2163 case AMDGPU::SRC_PRIVATE_LIMIT:
2164 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2165 return true;
2166 case AMDGPU::SRC_VCCZ:
2167 case AMDGPU::SRC_EXECZ:
2168 case AMDGPU::SRC_SCC:
2169 return true;
2170 case AMDGPU::SGPR_NULL:
2171 return true;
2172 default:
2173 return false;
2174 }
2175}
2176
2177bool AMDGPUOperand::isInlineValue() const {
2178 return isRegKind() && ::isInlineValue(getReg());
2179}
2180
2181//===----------------------------------------------------------------------===//
2182// AsmParser
2183//===----------------------------------------------------------------------===//
2184
2185static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2186 if (Is == IS_VGPR) {
2187 switch (RegWidth) {
2188 default: return -1;
2189 case 1: return AMDGPU::VGPR_32RegClassID;
2190 case 2: return AMDGPU::VReg_64RegClassID;
2191 case 3: return AMDGPU::VReg_96RegClassID;
2192 case 4: return AMDGPU::VReg_128RegClassID;
2193 case 5: return AMDGPU::VReg_160RegClassID;
2194 case 6: return AMDGPU::VReg_192RegClassID;
2195 case 7: return AMDGPU::VReg_224RegClassID;
2196 case 8: return AMDGPU::VReg_256RegClassID;
2197 case 16: return AMDGPU::VReg_512RegClassID;
2198 case 32: return AMDGPU::VReg_1024RegClassID;
2199 }
2200 } else if (Is == IS_TTMP) {
2201 switch (RegWidth) {
2202 default: return -1;
2203 case 1: return AMDGPU::TTMP_32RegClassID;
2204 case 2: return AMDGPU::TTMP_64RegClassID;
2205 case 4: return AMDGPU::TTMP_128RegClassID;
2206 case 8: return AMDGPU::TTMP_256RegClassID;
2207 case 16: return AMDGPU::TTMP_512RegClassID;
2208 }
2209 } else if (Is == IS_SGPR) {
2210 switch (RegWidth) {
2211 default: return -1;
2212 case 1: return AMDGPU::SGPR_32RegClassID;
2213 case 2: return AMDGPU::SGPR_64RegClassID;
2214 case 3: return AMDGPU::SGPR_96RegClassID;
2215 case 4: return AMDGPU::SGPR_128RegClassID;
2216 case 5: return AMDGPU::SGPR_160RegClassID;
2217 case 6: return AMDGPU::SGPR_192RegClassID;
2218 case 7: return AMDGPU::SGPR_224RegClassID;
2219 case 8: return AMDGPU::SGPR_256RegClassID;
2220 case 16: return AMDGPU::SGPR_512RegClassID;
2221 }
2222 } else if (Is == IS_AGPR) {
2223 switch (RegWidth) {
2224 default: return -1;
2225 case 1: return AMDGPU::AGPR_32RegClassID;
2226 case 2: return AMDGPU::AReg_64RegClassID;
2227 case 3: return AMDGPU::AReg_96RegClassID;
2228 case 4: return AMDGPU::AReg_128RegClassID;
2229 case 5: return AMDGPU::AReg_160RegClassID;
2230 case 6: return AMDGPU::AReg_192RegClassID;
2231 case 7: return AMDGPU::AReg_224RegClassID;
2232 case 8: return AMDGPU::AReg_256RegClassID;
2233 case 16: return AMDGPU::AReg_512RegClassID;
2234 case 32: return AMDGPU::AReg_1024RegClassID;
2235 }
2236 }
2237 return -1;
2238}
2239
2240static unsigned getSpecialRegForName(StringRef RegName) {
2241 return StringSwitch<unsigned>(RegName)
2242 .Case("exec", AMDGPU::EXEC)
2243 .Case("vcc", AMDGPU::VCC)
2244 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2245 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2246 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2247 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2248 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2249 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2250 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2251 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2252 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2253 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2254 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2255 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2256 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2257 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2258 .Case("m0", AMDGPU::M0)
2259 .Case("vccz", AMDGPU::SRC_VCCZ)
2260 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2261 .Case("execz", AMDGPU::SRC_EXECZ)
2262 .Case("src_execz", AMDGPU::SRC_EXECZ)
2263 .Case("scc", AMDGPU::SRC_SCC)
2264 .Case("src_scc", AMDGPU::SRC_SCC)
2265 .Case("tba", AMDGPU::TBA)
2266 .Case("tma", AMDGPU::TMA)
2267 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2268 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2269 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2270 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2271 .Case("vcc_lo", AMDGPU::VCC_LO)
2272 .Case("vcc_hi", AMDGPU::VCC_HI)
2273 .Case("exec_lo", AMDGPU::EXEC_LO)
2274 .Case("exec_hi", AMDGPU::EXEC_HI)
2275 .Case("tma_lo", AMDGPU::TMA_LO)
2276 .Case("tma_hi", AMDGPU::TMA_HI)
2277 .Case("tba_lo", AMDGPU::TBA_LO)
2278 .Case("tba_hi", AMDGPU::TBA_HI)
2279 .Case("pc", AMDGPU::PC_REG)
2280 .Case("null", AMDGPU::SGPR_NULL)
2281 .Default(AMDGPU::NoRegister);
2282}
2283
2284bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2285 SMLoc &EndLoc, bool RestoreOnFailure) {
2286 auto R = parseRegister();
2287 if (!R) return true;
2288 assert(R->isReg())(static_cast<void> (0));
2289 RegNo = R->getReg();
2290 StartLoc = R->getStartLoc();
2291 EndLoc = R->getEndLoc();
2292 return false;
2293}
2294
2295bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2296 SMLoc &EndLoc) {
2297 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2298}
2299
2300OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2301 SMLoc &StartLoc,
2302 SMLoc &EndLoc) {
2303 bool Result =
2304 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2305 bool PendingErrors = getParser().hasPendingError();
2306 getParser().clearPendingErrors();
2307 if (PendingErrors)
2308 return MatchOperand_ParseFail;
2309 if (Result)
2310 return MatchOperand_NoMatch;
2311 return MatchOperand_Success;
2312}
2313
2314bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2315 RegisterKind RegKind, unsigned Reg1,
2316 SMLoc Loc) {
2317 switch (RegKind) {
2318 case IS_SPECIAL:
2319 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2320 Reg = AMDGPU::EXEC;
2321 RegWidth = 2;
2322 return true;
2323 }
2324 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2325 Reg = AMDGPU::FLAT_SCR;
2326 RegWidth = 2;
2327 return true;
2328 }
2329 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2330 Reg = AMDGPU::XNACK_MASK;
2331 RegWidth = 2;
2332 return true;
2333 }
2334 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2335 Reg = AMDGPU::VCC;
2336 RegWidth = 2;
2337 return true;
2338 }
2339 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2340 Reg = AMDGPU::TBA;
2341 RegWidth = 2;
2342 return true;
2343 }
2344 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2345 Reg = AMDGPU::TMA;
2346 RegWidth = 2;
2347 return true;
2348 }
2349 Error(Loc, "register does not fit in the list");
2350 return false;
2351 case IS_VGPR:
2352 case IS_SGPR:
2353 case IS_AGPR:
2354 case IS_TTMP:
2355 if (Reg1 != Reg + RegWidth) {
2356 Error(Loc, "registers in a list must have consecutive indices");
2357 return false;
2358 }
2359 RegWidth++;
2360 return true;
2361 default:
2362 llvm_unreachable("unexpected register kind")__builtin_unreachable();
2363 }
2364}
2365
2366struct RegInfo {
2367 StringLiteral Name;
2368 RegisterKind Kind;
2369};
2370
2371static constexpr RegInfo RegularRegisters[] = {
2372 {{"v"}, IS_VGPR},
2373 {{"s"}, IS_SGPR},
2374 {{"ttmp"}, IS_TTMP},
2375 {{"acc"}, IS_AGPR},
2376 {{"a"}, IS_AGPR},
2377};
2378
2379static bool isRegularReg(RegisterKind Kind) {
2380 return Kind == IS_VGPR ||
2381 Kind == IS_SGPR ||
2382 Kind == IS_TTMP ||
2383 Kind == IS_AGPR;
2384}
2385
2386static const RegInfo* getRegularRegInfo(StringRef Str) {
2387 for (const RegInfo &Reg : RegularRegisters)
2388 if (Str.startswith(Reg.Name))
2389 return &Reg;
2390 return nullptr;
2391}
2392
2393static bool getRegNum(StringRef Str, unsigned& Num) {
2394 return !Str.getAsInteger(10, Num);
2395}
2396
2397bool
2398AMDGPUAsmParser::isRegister(const AsmToken &Token,
2399 const AsmToken &NextToken) const {
2400
2401 // A list of consecutive registers: [s0,s1,s2,s3]
2402 if (Token.is(AsmToken::LBrac))
2403 return true;
2404
2405 if (!Token.is(AsmToken::Identifier))
2406 return false;
2407
2408 // A single register like s0 or a range of registers like s[0:1]
2409
2410 StringRef Str = Token.getString();
2411 const RegInfo *Reg = getRegularRegInfo(Str);
2412 if (Reg) {
2413 StringRef RegName = Reg->Name;
2414 StringRef RegSuffix = Str.substr(RegName.size());
2415 if (!RegSuffix.empty()) {
2416 unsigned Num;
2417 // A single register with an index: rXX
2418 if (getRegNum(RegSuffix, Num))
2419 return true;
2420 } else {
2421 // A range of registers: r[XX:YY].
2422 if (NextToken.is(AsmToken::LBrac))
2423 return true;
2424 }
2425 }
2426
2427 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2428}
2429
2430bool
2431AMDGPUAsmParser::isRegister()
2432{
2433 return isRegister(getToken(), peekToken());
2434}
2435
2436unsigned
2437AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2438 unsigned RegNum,
2439 unsigned RegWidth,
2440 SMLoc Loc) {
2441
2442 assert(isRegularReg(RegKind))(static_cast<void> (0));
2443
2444 unsigned AlignSize = 1;
2445 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2446 // SGPR and TTMP registers must be aligned.
2447 // Max required alignment is 4 dwords.
2448 AlignSize = std::min(RegWidth, 4u);
2449 }
2450
2451 if (RegNum % AlignSize != 0) {
2452 Error(Loc, "invalid register alignment");
2453 return AMDGPU::NoRegister;
2454 }
2455
2456 unsigned RegIdx = RegNum / AlignSize;
2457 int RCID = getRegClass(RegKind, RegWidth);
2458 if (RCID == -1) {
2459 Error(Loc, "invalid or unsupported register size");
2460 return AMDGPU::NoRegister;
2461 }
2462
2463 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2464 const MCRegisterClass RC = TRI->getRegClass(RCID);
2465 if (RegIdx >= RC.getNumRegs()) {
2466 Error(Loc, "register index is out of range");
2467 return AMDGPU::NoRegister;
2468 }
2469
2470 return RC.getRegister(RegIdx);
2471}
2472
2473bool
2474AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2475 int64_t RegLo, RegHi;
2476 if (!skipToken(AsmToken::LBrac, "missing register index"))
2477 return false;
2478
2479 SMLoc FirstIdxLoc = getLoc();
2480 SMLoc SecondIdxLoc;
2481
2482 if (!parseExpr(RegLo))
2483 return false;
2484
2485 if (trySkipToken(AsmToken::Colon)) {
2486 SecondIdxLoc = getLoc();
2487 if (!parseExpr(RegHi))
2488 return false;
2489 } else {
2490 RegHi = RegLo;
2491 }
2492
2493 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2494 return false;
2495
2496 if (!isUInt<32>(RegLo)) {
2497 Error(FirstIdxLoc, "invalid register index");
2498 return false;
2499 }
2500
2501 if (!isUInt<32>(RegHi)) {
2502 Error(SecondIdxLoc, "invalid register index");
2503 return false;
2504 }
2505
2506 if (RegLo > RegHi) {
2507 Error(FirstIdxLoc, "first register index should not exceed second index");
2508 return false;
2509 }
2510
2511 Num = static_cast<unsigned>(RegLo);
2512 Width = (RegHi - RegLo) + 1;
2513 return true;
2514}
2515
2516unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2517 unsigned &RegNum, unsigned &RegWidth,
2518 SmallVectorImpl<AsmToken> &Tokens) {
2519 assert(isToken(AsmToken::Identifier))(static_cast<void> (0));
2520 unsigned Reg = getSpecialRegForName(getTokenStr());
2521 if (Reg) {
2522 RegNum = 0;
2523 RegWidth = 1;
2524 RegKind = IS_SPECIAL;
2525 Tokens.push_back(getToken());
2526 lex(); // skip register name
2527 }
2528 return Reg;
2529}
2530
2531unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2532 unsigned &RegNum, unsigned &RegWidth,
2533 SmallVectorImpl<AsmToken> &Tokens) {
2534 assert(isToken(AsmToken::Identifier))(static_cast<void> (0));
2535 StringRef RegName = getTokenStr();
2536 auto Loc = getLoc();
2537
2538 const RegInfo *RI = getRegularRegInfo(RegName);
2539 if (!RI) {
2540 Error(Loc, "invalid register name");
2541 return AMDGPU::NoRegister;
2542 }
2543
2544 Tokens.push_back(getToken());
2545 lex(); // skip register name
2546
2547 RegKind = RI->Kind;
2548 StringRef RegSuffix = RegName.substr(RI->Name.size());
2549 if (!RegSuffix.empty()) {
2550 // Single 32-bit register: vXX.
2551 if (!getRegNum(RegSuffix, RegNum)) {
2552 Error(Loc, "invalid register index");
2553 return AMDGPU::NoRegister;
2554 }
2555 RegWidth = 1;
2556 } else {
2557 // Range of registers: v[XX:YY]. ":YY" is optional.
2558 if (!ParseRegRange(RegNum, RegWidth))
2559 return AMDGPU::NoRegister;
2560 }
2561
2562 return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2563}
2564
2565unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2566 unsigned &RegWidth,
2567 SmallVectorImpl<AsmToken> &Tokens) {
2568 unsigned Reg = AMDGPU::NoRegister;
2569 auto ListLoc = getLoc();
2570
2571 if (!skipToken(AsmToken::LBrac,
2572 "expected a register or a list of registers")) {
2573 return AMDGPU::NoRegister;
2574 }
2575
2576 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2577
2578 auto Loc = getLoc();
2579 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2580 return AMDGPU::NoRegister;
2581 if (RegWidth != 1) {
2582 Error(Loc, "expected a single 32-bit register");
2583 return AMDGPU::NoRegister;
2584 }
2585
2586 for (; trySkipToken(AsmToken::Comma); ) {
2587 RegisterKind NextRegKind;
2588 unsigned NextReg, NextRegNum, NextRegWidth;
2589 Loc = getLoc();
2590
2591 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2592 NextRegNum, NextRegWidth,
2593 Tokens)) {
2594 return AMDGPU::NoRegister;
2595 }
2596 if (NextRegWidth != 1) {
2597 Error(Loc, "expected a single 32-bit register");
2598 return AMDGPU::NoRegister;
2599 }
2600 if (NextRegKind != RegKind) {
2601 Error(Loc, "registers in a list must be of the same kind");
2602 return AMDGPU::NoRegister;
2603 }
2604 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2605 return AMDGPU::NoRegister;
2606 }
2607
2608 if (!skipToken(AsmToken::RBrac,
2609 "expected a comma or a closing square bracket")) {
2610 return AMDGPU::NoRegister;
2611 }
2612
2613 if (isRegularReg(RegKind))
2614 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2615
2616 return Reg;
2617}
2618
2619bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2620 unsigned &RegNum, unsigned &RegWidth,
2621 SmallVectorImpl<AsmToken> &Tokens) {
2622 auto Loc = getLoc();
2623 Reg = AMDGPU::NoRegister;
2624
2625 if (isToken(AsmToken::Identifier)) {
2626 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2627 if (Reg == AMDGPU::NoRegister)
2628 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2629 } else {
2630 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2631 }
2632
2633 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2634 if (Reg == AMDGPU::NoRegister) {
2635 assert(Parser.hasPendingError())(static_cast<void> (0));
2636 return false;
2637 }
2638
2639 if (!subtargetHasRegister(*TRI, Reg)) {
2640 if (Reg == AMDGPU::SGPR_NULL) {
2641 Error(Loc, "'null' operand is not supported on this GPU");
2642 } else {
2643 Error(Loc, "register not available on this GPU");
2644 }
2645 return false;
2646 }
2647
2648 return true;
2649}
2650
2651bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2652 unsigned &RegNum, unsigned &RegWidth,
2653 bool RestoreOnFailure /*=false*/) {
2654 Reg = AMDGPU::NoRegister;
2655
2656 SmallVector<AsmToken, 1> Tokens;
2657 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2658 if (RestoreOnFailure) {
2659 while (!Tokens.empty()) {
2660 getLexer().UnLex(Tokens.pop_back_val());
2661 }
2662 }
2663 return true;
2664 }
2665 return false;
2666}
2667
2668Optional<StringRef>
2669AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2670 switch (RegKind) {
2671 case IS_VGPR:
2672 return StringRef(".amdgcn.next_free_vgpr");
2673 case IS_SGPR:
2674 return StringRef(".amdgcn.next_free_sgpr");
2675 default:
2676 return None;
2677 }
2678}
2679
2680void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2681 auto SymbolName = getGprCountSymbolName(RegKind);
2682 assert(SymbolName && "initializing invalid register kind")(static_cast<void> (0));
2683 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2684 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2685}
2686
2687bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2688 unsigned DwordRegIndex,
2689 unsigned RegWidth) {
2690 // Symbols are only defined for GCN targets
2691 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2692 return true;
2693
2694 auto SymbolName = getGprCountSymbolName(RegKind);
2695 if (!SymbolName)
2696 return true;
2697 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2698
2699 int64_t NewMax = DwordRegIndex + RegWidth - 1;
2700 int64_t OldCount;
2701
2702 if (!Sym->isVariable())
2703 return !Error(getLoc(),
2704 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2705 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2706 return !Error(
2707 getLoc(),
2708 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2709
2710 if (OldCount <= NewMax)
2711 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2712
2713 return true;
2714}
2715
2716std::unique_ptr<AMDGPUOperand>
2717AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2718 const auto &Tok = getToken();
2719 SMLoc StartLoc = Tok.getLoc();
2720 SMLoc EndLoc = Tok.getEndLoc();
2721 RegisterKind RegKind;
2722 unsigned Reg, RegNum, RegWidth;
2723
2724 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2725 return nullptr;
2726 }
2727 if (isHsaAbiVersion3Or4(&getSTI())) {
2728 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2729 return nullptr;
2730 } else
2731 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2732 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2733}
2734
2735OperandMatchResultTy
2736AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2737 // TODO: add syntactic sugar for 1/(2*PI)
2738
2739 assert(!isRegister())(static_cast<void> (0));
2740 assert(!isModifier())(static_cast<void> (0));
2741
2742 const auto& Tok = getToken();
2743 const auto& NextTok = peekToken();
2744 bool IsReal = Tok.is(AsmToken::Real);
2745 SMLoc S = getLoc();
2746 bool Negate = false;
2747
2748 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2749 lex();
2750 IsReal = true;
2751 Negate = true;
2752 }
2753
2754 if (IsReal) {
2755 // Floating-point expressions are not supported.
2756 // Can only allow floating-point literals with an
2757 // optional sign.
2758
2759 StringRef Num = getTokenStr();
2760 lex();
2761
2762 APFloat RealVal(APFloat::IEEEdouble());
2763 auto roundMode = APFloat::rmNearestTiesToEven;
2764 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2765 return MatchOperand_ParseFail;
2766 }
2767 if (Negate)
2768 RealVal.changeSign();
2769
2770 Operands.push_back(
2771 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2772 AMDGPUOperand::ImmTyNone, true));
2773
2774 return MatchOperand_Success;
2775
2776 } else {
2777 int64_t IntVal;
2778 const MCExpr *Expr;
2779 SMLoc S = getLoc();
2780
2781 if (HasSP3AbsModifier) {
2782 // This is a workaround for handling expressions
2783 // as arguments of SP3 'abs' modifier, for example:
2784 // |1.0|
2785 // |-1|
2786 // |1+x|
2787 // This syntax is not compatible with syntax of standard
2788 // MC expressions (due to the trailing '|').
2789 SMLoc EndLoc;
2790 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2791 return MatchOperand_ParseFail;
2792 } else {
2793 if (Parser.parseExpression(Expr))
2794 return MatchOperand_ParseFail;
2795 }
2796
2797 if (Expr->evaluateAsAbsolute(IntVal)) {
2798 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2799 } else {
2800 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2801 }
2802
2803 return MatchOperand_Success;
2804 }
2805
2806 return MatchOperand_NoMatch;
2807}
2808
2809OperandMatchResultTy
2810AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2811 if (!isRegister())
2812 return MatchOperand_NoMatch;
2813
2814 if (auto R = parseRegister()) {
2815 assert(R->isReg())(static_cast<void> (0));
2816 Operands.push_back(std::move(R));
2817 return MatchOperand_Success;
2818 }
2819 return MatchOperand_ParseFail;
2820}
2821
2822OperandMatchResultTy
2823AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2824 auto res = parseReg(Operands);
2825 if (res != MatchOperand_NoMatch) {
2826 return res;
2827 } else if (isModifier()) {
2828 return MatchOperand_NoMatch;
2829 } else {
2830 return parseImm(Operands, HasSP3AbsMod);
2831 }
2832}
2833
2834bool
2835AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2836 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2837 const auto &str = Token.getString();
2838 return str == "abs" || str == "neg" || str == "sext";
2839 }
2840 return false;
2841}
2842
2843bool
2844AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2845 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2846}
2847
2848bool
2849AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2850 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2851}
2852
2853bool
2854AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2855 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2856}
2857
2858// Check if this is an operand modifier or an opcode modifier
2859// which may look like an expression but it is not. We should
2860// avoid parsing these modifiers as expressions. Currently
2861// recognized sequences are:
2862// |...|
2863// abs(...)
2864// neg(...)
2865// sext(...)
2866// -reg
2867// -|...|
2868// -abs(...)
2869// name:...
2870// Note that simple opcode modifiers like 'gds' may be parsed as
2871// expressions; this is a special case. See getExpressionAsToken.
2872//
2873bool
2874AMDGPUAsmParser::isModifier() {
2875
2876 AsmToken Tok = getToken();
2877 AsmToken NextToken[2];
2878 peekTokens(NextToken);
2879
2880 return isOperandModifier(Tok, NextToken[0]) ||
2881 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2882 isOpcodeModifierWithVal(Tok, NextToken[0]);
2883}
2884
2885// Check if the current token is an SP3 'neg' modifier.
2886// Currently this modifier is allowed in the following context:
2887//
2888// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2889// 2. Before an 'abs' modifier: -abs(...)
2890// 3. Before an SP3 'abs' modifier: -|...|
2891//
2892// In all other cases "-" is handled as a part
2893// of an expression that follows the sign.
2894//
2895// Note: When "-" is followed by an integer literal,
2896// this is interpreted as integer negation rather
2897// than a floating-point NEG modifier applied to N.
2898// Beside being contr-intuitive, such use of floating-point
2899// NEG modifier would have resulted in different meaning
2900// of integer literals used with VOP1/2/C and VOP3,
2901// for example:
2902// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2903// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2904// Negative fp literals with preceding "-" are
2905// handled likewise for unifomtity
2906//
2907bool
2908AMDGPUAsmParser::parseSP3NegModifier() {
2909
2910 AsmToken NextToken[2];
2911 peekTokens(NextToken);
2912
2913 if (isToken(AsmToken::Minus) &&
2914 (isRegister(NextToken[0], NextToken[1]) ||
2915 NextToken[0].is(AsmToken::Pipe) ||
2916 isId(NextToken[0], "abs"))) {
2917 lex();
2918 return true;
2919 }
2920
2921 return false;
2922}
2923
2924OperandMatchResultTy
2925AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2926 bool AllowImm) {
2927 bool Neg, SP3Neg;
2928 bool Abs, SP3Abs;
2929 SMLoc Loc;
2930
2931 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2932 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2933 Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2934 return MatchOperand_ParseFail;
2935 }
2936
2937 SP3Neg = parseSP3NegModifier();
2938
2939 Loc = getLoc();
2940 Neg = trySkipId("neg");
2941 if (Neg && SP3Neg) {
2942 Error(Loc, "expected register or immediate");
2943 return MatchOperand_ParseFail;
2944 }
2945 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2946 return MatchOperand_ParseFail;
2947
2948 Abs = trySkipId("abs");
2949 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2950 return MatchOperand_ParseFail;
2951
2952 Loc = getLoc();
2953 SP3Abs = trySkipToken(AsmToken::Pipe);
2954 if (Abs && SP3Abs) {
2955 Error(Loc, "expected register or immediate");
2956 return MatchOperand_ParseFail;
2957 }
2958
2959 OperandMatchResultTy Res;
2960 if (AllowImm) {
2961 Res = parseRegOrImm(Operands, SP3Abs);
2962 } else {
2963 Res = parseReg(Operands);
2964 }
2965 if (Res != MatchOperand_Success) {
2966 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2967 }
2968
2969 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2970 return MatchOperand_ParseFail;
2971 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2972 return MatchOperand_ParseFail;
2973 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2974 return MatchOperand_ParseFail;
2975
2976 AMDGPUOperand::Modifiers Mods;
2977 Mods.Abs = Abs || SP3Abs;
2978 Mods.Neg = Neg || SP3Neg;
2979
2980 if (Mods.hasFPModifiers()) {
2981 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2982 if (Op.isExpr()) {
2983 Error(Op.getStartLoc(), "expected an absolute expression");
2984 return MatchOperand_ParseFail;
2985 }
2986 Op.setModifiers(Mods);
2987 }
2988 return MatchOperand_Success;
2989}
2990
2991OperandMatchResultTy
2992AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2993 bool AllowImm) {
2994 bool Sext = trySkipId("sext");
2995 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2996 return MatchOperand_ParseFail;
2997
2998 OperandMatchResultTy Res;
2999 if (AllowImm) {
3000 Res = parseRegOrImm(Operands);
3001 } else {
3002 Res = parseReg(Operands);
3003 }
3004 if (Res != MatchOperand_Success) {
3005 return Sext? MatchOperand_ParseFail : Res;
3006 }
3007
3008 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3009 return MatchOperand_ParseFail;
3010
3011 AMDGPUOperand::Modifiers Mods;
3012 Mods.Sext = Sext;
3013
3014 if (Mods.hasIntModifiers()) {
3015 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3016 if (Op.isExpr()) {
3017 Error(Op.getStartLoc(), "expected an absolute expression");
3018 return MatchOperand_ParseFail;
3019 }
3020 Op.setModifiers(Mods);
3021 }
3022
3023 return MatchOperand_Success;
3024}
3025
3026OperandMatchResultTy
3027AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3028 return parseRegOrImmWithFPInputMods(Operands, false);
3029}
3030
3031OperandMatchResultTy
3032AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3033 return parseRegOrImmWithIntInputMods(Operands, false);
3034}
3035
3036OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3037 auto Loc = getLoc();
3038 if (trySkipId("off")) {
3039 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3040 AMDGPUOperand::ImmTyOff, false));
3041 return MatchOperand_Success;
3042 }
3043
3044 if (!isRegister())
3045 return MatchOperand_NoMatch;
3046
3047 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3048 if (Reg) {
3049 Operands.push_back(std::move(Reg));
3050 return MatchOperand_Success;
3051 }
3052
3053 return MatchOperand_ParseFail;
3054
3055}
3056
3057unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3058 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3059
3060 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3061 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3062 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3063 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3064 return Match_InvalidOperand;
3065
3066 if ((TSFlags & SIInstrFlags::VOP3) &&
3067 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3068 getForcedEncodingSize() != 64)
3069 return Match_PreferE32;
3070
3071 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3072 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3073 // v_mac_f32/16 allow only dst_sel == DWORD;
3074 auto OpNum =
3075 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3076 const auto &Op = Inst.getOperand(OpNum);
3077 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3078 return Match_InvalidOperand;
3079 }
3080 }
3081
3082 return Match_Success;
3083}
3084
3085static ArrayRef<unsigned> getAllVariants() {
3086 static const unsigned Variants[] = {
3087 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3088 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3089 };
3090
3091 return makeArrayRef(Variants);
3092}
3093
3094// What asm variants we should check
3095ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3096 if (getForcedEncodingSize() == 32) {
3097 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3098 return makeArrayRef(Variants);
3099 }
3100
3101 if (isForcedVOP3()) {
3102 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3103 return makeArrayRef(Variants);
3104 }
3105
3106 if (isForcedSDWA()) {
3107 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3108 AMDGPUAsmVariants::SDWA9};
3109 return makeArrayRef(Variants);
3110 }
3111
3112 if (isForcedDPP()) {
3113 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3114 return makeArrayRef(Variants);
3115 }
3116
3117 return getAllVariants();
3118}
3119
3120StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3121 if (getForcedEncodingSize() == 32)
3122 return "e32";
3123
3124 if (isForcedVOP3())
3125 return "e64";
3126
3127 if (isForcedSDWA())
3128 return "sdwa";
3129
3130 if (isForcedDPP())
3131 return "dpp";
3132
3133 return "";
3134}
3135
3136unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3137 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3138 const unsigned Num = Desc.getNumImplicitUses();
3139 for (unsigned i = 0; i < Num; ++i) {
3140 unsigned Reg = Desc.ImplicitUses[i];
3141 switch (Reg) {
3142 case AMDGPU::FLAT_SCR:
3143 case AMDGPU::VCC:
3144 case AMDGPU::VCC_LO:
3145 case AMDGPU::VCC_HI:
3146 case AMDGPU::M0:
3147 return Reg;
3148 default:
3149 break;
3150 }
3151 }
3152 return AMDGPU::NoRegister;
3153}
3154
3155// NB: This code is correct only when used to check constant
3156// bus limitations because GFX7 support no f16 inline constants.
3157// Note that there are no cases when a GFX7 opcode violates
3158// constant bus limitations due to the use of an f16 constant.
3159bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3160 unsigned OpIdx) const {
3161 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3162
3163 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3164 return false;
3165 }
3166
3167 const MCOperand &MO = Inst.getOperand(OpIdx);
3168
3169 int64_t Val = MO.getImm();
3170 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3171
3172 switch (OpSize) { // expected operand size
3173 case 8:
3174 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3175 case 4:
3176 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3177 case 2: {
3178 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3179 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3180 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3181 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3182 return AMDGPU::isInlinableIntLiteral(Val);
3183
3184 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3185 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3186 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3187 return AMDGPU::isInlinableIntLiteralV216(Val);
3188
3189 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3190 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3191 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3192 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3193
3194 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3195 }
3196 default:
3197 llvm_unreachable("invalid operand size")__builtin_unreachable();
3198 }
3199}
3200
3201unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3202 if (!isGFX10Plus())
3203 return 1;
3204
3205 switch (Opcode) {
3206 // 64-bit shift instructions can use only one scalar value input
3207 case AMDGPU::V_LSHLREV_B64_e64:
3208 case AMDGPU::V_LSHLREV_B64_gfx10:
3209 case AMDGPU::V_LSHRREV_B64_e64:
3210 case AMDGPU::V_LSHRREV_B64_gfx10:
3211 case AMDGPU::V_ASHRREV_I64_e64:
3212 case AMDGPU::V_ASHRREV_I64_gfx10:
3213 case AMDGPU::V_LSHL_B64_e64:
3214 case AMDGPU::V_LSHR_B64_e64:
3215 case AMDGPU::V_ASHR_I64_e64:
3216 return 1;
3217 default:
3218 return 2;
3219 }
3220}
3221
3222bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3223 const MCOperand &MO = Inst.getOperand(OpIdx);
3224 if (MO.isImm()) {
3225 return !isInlineConstant(Inst, OpIdx);
3226 } else if (MO.isReg()) {
3227 auto Reg = MO.getReg();
3228 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3229 auto PReg = mc2PseudoReg(Reg);
3230 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3231 } else {
3232 return true;
3233 }
3234}
3235
3236bool
3237AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3238 const OperandVector &Operands) {
3239 const unsigned Opcode = Inst.getOpcode();
3240 const MCInstrDesc &Desc = MII.get(Opcode);
3241 unsigned LastSGPR = AMDGPU::NoRegister;
3242 unsigned ConstantBusUseCount = 0;
3243 unsigned NumLiterals = 0;
3244 unsigned LiteralSize;
3245
3246 if (Desc.TSFlags &
3247 (SIInstrFlags::VOPC |
3248 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3249 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3250 SIInstrFlags::SDWA)) {
3251 // Check special imm operands (used by madmk, etc)
3252 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3253 ++ConstantBusUseCount;
3254 }
3255
3256 SmallDenseSet<unsigned> SGPRsUsed;
3257 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3258 if (SGPRUsed != AMDGPU::NoRegister) {
3259 SGPRsUsed.insert(SGPRUsed);
3260 ++ConstantBusUseCount;
3261 }
3262
3263 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3264 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3265 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3266
3267 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3268
3269 for (int OpIdx : OpIndices) {
3270 if (OpIdx == -1) break;
3271
3272 const MCOperand &MO = Inst.getOperand(OpIdx);
3273 if (usesConstantBus(Inst, OpIdx)) {
3274 if (MO.isReg()) {
3275 LastSGPR = mc2PseudoReg(MO.getReg());
3276 // Pairs of registers with a partial intersections like these
3277 // s0, s[0:1]
3278 // flat_scratch_lo, flat_scratch
3279 // flat_scratch_lo, flat_scratch_hi
3280 // are theoretically valid but they are disabled anyway.
3281 // Note that this code mimics SIInstrInfo::verifyInstruction
3282 if (!SGPRsUsed.count(LastSGPR)) {
3283 SGPRsUsed.insert(LastSGPR);
3284 ++ConstantBusUseCount;
3285 }
3286 } else { // Expression or a literal
3287
3288 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3289 continue; // special operand like VINTERP attr_chan
3290
3291 // An instruction may use only one literal.
3292 // This has been validated on the previous step.
3293 // See validateVOP3Literal.
3294 // This literal may be used as more than one operand.
3295 // If all these operands are of the same size,
3296 // this literal counts as one scalar value.
3297 // Otherwise it counts as 2 scalar values.
3298 // See "GFX10 Shader Programming", section 3.6.2.3.
3299
3300 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3301 if (Size < 4) Size = 4;
3302
3303 if (NumLiterals == 0) {
3304 NumLiterals = 1;
3305 LiteralSize = Size;
3306 } else if (LiteralSize != Size) {
3307 NumLiterals = 2;
3308 }
3309 }
3310 }
3311 }
3312 }
3313 ConstantBusUseCount += NumLiterals;
3314
3315 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3316 return true;
3317
3318 SMLoc LitLoc = getLitLoc(Operands);
3319 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3320 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3321 Error(Loc, "invalid operand (violates constant bus restrictions)");
3322 return false;
3323}
3324
3325bool
3326AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3327 const OperandVector &Operands) {
3328 const unsigned Opcode = Inst.getOpcode();
3329 const MCInstrDesc &Desc = MII.get(Opcode);
3330
3331 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3332 if (DstIdx == -1 ||
3333 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3334 return true;
3335 }
3336
3337 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3338
3339 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3340 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3341 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3342
3343 assert(DstIdx != -1)(static_cast<void> (0));
3344 const MCOperand &Dst = Inst.getOperand(DstIdx);
3345 assert(Dst.isReg())(static_cast<void> (0));
3346 const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3347
3348 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3349
3350 for (int SrcIdx : SrcIndices) {
3351 if (SrcIdx == -1) break;
3352 const MCOperand &Src = Inst.getOperand(SrcIdx);
3353 if (Src.isReg()) {
3354 const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3355 if (isRegIntersect(DstReg, SrcReg, TRI)) {
3356 Error(getRegLoc(SrcReg, Operands),
3357 "destination must be different than all sources");
3358 return false;
3359 }
3360 }
3361 }
3362
3363 return true;
3364}
3365
3366bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3367
3368 const unsigned Opc = Inst.getOpcode();
3369 const MCInstrDesc &Desc = MII.get(Opc);
3370
3371 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3372 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3373 assert(ClampIdx != -1)(static_cast<void> (0));
3374 return Inst.getOperand(ClampIdx).getImm() == 0;
3375 }
3376
3377 return true;
3378}
3379
3380bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3381
3382 const unsigned Opc = Inst.getOpcode();
3383 const MCInstrDesc &Desc = MII.get(Opc);
3384
3385 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3386 return true;
3387
3388 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3389 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3390 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3391
3392 assert(VDataIdx != -1)(static_cast<void> (0));
3393
3394 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3395 return true;
3396
3397 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3398 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3399 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3400 if (DMask == 0)
3401 DMask = 1;
3402
3403 unsigned DataSize =
3404 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3405 if (hasPackedD16()) {
3406 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3407 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3408 DataSize = (DataSize + 1) / 2;
3409 }
3410
3411 return (VDataSize / 4) == DataSize + TFESize;
3412}
3413
3414bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3415 const unsigned Opc = Inst.getOpcode();
3416 const MCInstrDesc &Desc = MII.get(Opc);
3417
3418 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3419 return true;
3420
3421 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3422
3423 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3424 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3425 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3426 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3427 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3428 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3429
3430 assert(VAddr0Idx != -1)(static_cast<void> (0));
3431 assert(SrsrcIdx != -1)(static_cast<void> (0));
3432 assert(SrsrcIdx > VAddr0Idx)(static_cast<void> (0));
3433
3434 if (DimIdx == -1)
3435 return true; // intersect_ray
3436
3437 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3438 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3439 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3440 unsigned ActualAddrSize =
3441 IsNSA ? SrsrcIdx - VAddr0Idx
3442 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3443 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3444
3445 unsigned ExpectedAddrSize =
3446 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3447
3448 if (!IsNSA) {
3449 if (ExpectedAddrSize > 8)
3450 ExpectedAddrSize = 16;
3451
3452 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3453 // This provides backward compatibility for assembly created
3454 // before 160b/192b/224b types were directly supported.
3455 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3456 return true;
3457 }
3458
3459 return ActualAddrSize == ExpectedAddrSize;
3460}
3461
3462bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3463
3464 const unsigned Opc = Inst.getOpcode();
3465 const MCInstrDesc &Desc = MII.get(Opc);
3466
3467 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3468 return true;
3469 if (!Desc.mayLoad() || !Desc.mayStore())
3470 return true; // Not atomic
3471
3472 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3473 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3474
3475 // This is an incomplete check because image_atomic_cmpswap
3476 // may only use 0x3 and 0xf while other atomic operations
3477 // may use 0x1 and 0x3. However these limitations are
3478 // verified when we check that dmask matches dst size.
3479 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3480}
3481
3482bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3483
3484 const unsigned Opc = Inst.getOpcode();
3485 const MCInstrDesc &Desc = MII.get(Opc);
3486
3487 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3488 return true;
3489
3490 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3491 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3492
3493 // GATHER4 instructions use dmask in a different fashion compared to
3494 // other MIMG instructions. The only useful DMASK values are
3495 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3496 // (red,red,red,red) etc.) The ISA document doesn't mention
3497 // this.
3498 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3499}
3500
3501bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3502 const unsigned Opc = Inst.getOpcode();
3503 const MCInstrDesc &Desc = MII.get(Opc);
3504
3505 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3506 return true;
3507
3508 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3509 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3510 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3511
3512 if (!BaseOpcode->MSAA)
3513 return true;
3514
3515 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3516 assert(DimIdx != -1)(static_cast<void> (0));
3517
3518 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3519 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3520
3521 return DimInfo->MSAA;
3522}
3523
3524static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3525{
3526 switch (Opcode) {
3527 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3528 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3529 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3530 return true;
3531 default:
3532 return false;
3533 }
3534}
3535
3536// movrels* opcodes should only allow VGPRS as src0.
3537// This is specified in .td description for vop1/vop3,
3538// but sdwa is handled differently. See isSDWAOperand.
3539bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3540 const OperandVector &Operands) {
3541
3542 const unsigned Opc = Inst.getOpcode();
3543 const MCInstrDesc &Desc = MII.get(Opc);
3544
3545 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3546 return true;
3547
3548 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3549 assert(Src0Idx != -1)(static_cast<void> (0));
3550
3551 SMLoc ErrLoc;
3552 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3553 if (Src0.isReg()) {
3554 auto Reg = mc2PseudoReg(Src0.getReg());
3555 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3556 if (!isSGPR(Reg, TRI))
3557 return true;
3558 ErrLoc = getRegLoc(Reg, Operands);
3559 } else {
3560 ErrLoc = getConstLoc(Operands);
3561 }
3562
3563 Error(ErrLoc, "source operand must be a VGPR");
3564 return false;
3565}
3566
3567bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3568 const OperandVector &Operands) {
3569
3570 const unsigned Opc = Inst.getOpcode();
3571
3572 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3573 return true;
3574
3575 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3576 assert(Src0Idx != -1)(static_cast<void> (0));
3577
3578 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3579 if (!Src0.isReg())
3580 return true;
3581
3582 auto Reg = mc2PseudoReg(Src0.getReg());
3583 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3584 if (isSGPR(Reg, TRI)) {
3585 Error(getRegLoc(Reg, Operands),
3586 "source operand must be either a VGPR or an inline constant");
3587 return false;
3588 }
3589
3590 return true;
3591}
3592
3593bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3594 switch (Inst.getOpcode()) {
3595 default:
3596 return true;
3597 case V_DIV_SCALE_F32_gfx6_gfx7:
3598 case V_DIV_SCALE_F32_vi:
3599 case V_DIV_SCALE_F32_gfx10:
3600 case V_DIV_SCALE_F64_gfx6_gfx7:
3601 case V_DIV_SCALE_F64_vi:
3602 case V_DIV_SCALE_F64_gfx10:
3603 break;
3604 }
3605
3606 // TODO: Check that src0 = src1 or src2.
3607
3608 for (auto Name : {AMDGPU::OpName::src0_modifiers,
3609 AMDGPU::OpName::src2_modifiers,
3610 AMDGPU::OpName::src2_modifiers}) {
3611 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3612 .getImm() &
3613 SISrcMods::ABS) {
3614 return false;
3615 }
3616 }
3617
3618 return true;
3619}
3620
3621bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3622
3623 const unsigned Opc = Inst.getOpcode();
3624 const MCInstrDesc &Desc = MII.get(Opc);
3625
3626 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3627 return true;
3628
3629 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3630 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3631 if (isCI() || isSI())
3632 return false;
3633 }
3634
3635 return true;
3636}
3637
3638bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3639 const unsigned Opc = Inst.getOpcode();
3640 const MCInstrDesc &Desc = MII.get(Opc);
3641
3642 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3643 return true;
3644
3645 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3646 if (DimIdx < 0)
3647 return true;
3648
3649 long Imm = Inst.getOperand(DimIdx).getImm();
3650 if (Imm < 0 || Imm >= 8)
3651 return false;
3652
3653 return true;
3654}
3655
3656static bool IsRevOpcode(const unsigned Opcode)
3657{
3658 switch (Opcode) {
3659 case AMDGPU::V_SUBREV_F32_e32:
3660 case AMDGPU::V_SUBREV_F32_e64:
3661 case AMDGPU::V_SUBREV_F32_e32_gfx10:
3662 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3663 case AMDGPU::V_SUBREV_F32_e32_vi:
3664 case AMDGPU::V_SUBREV_F32_e64_gfx10:
3665 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3666 case AMDGPU::V_SUBREV_F32_e64_vi:
3667
3668 case AMDGPU::V_SUBREV_CO_U32_e32:
3669 case AMDGPU::V_SUBREV_CO_U32_e64:
3670 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3671 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3672
3673 case AMDGPU::V_SUBBREV_U32_e32:
3674 case AMDGPU::V_SUBBREV_U32_e64:
3675 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3676 case AMDGPU::V_SUBBREV_U32_e32_vi:
3677 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3678 case AMDGPU::V_SUBBREV_U32_e64_vi:
3679
3680 case AMDGPU::V_SUBREV_U32_e32:
3681 case AMDGPU::V_SUBREV_U32_e64:
3682 case AMDGPU::V_SUBREV_U32_e32_gfx9:
3683 case AMDGPU::V_SUBREV_U32_e32_vi:
3684 case AMDGPU::V_SUBREV_U32_e64_gfx9:
3685 case AMDGPU::V_SUBREV_U32_e64_vi:
3686
3687 case AMDGPU::V_SUBREV_F16_e32:
3688 case AMDGPU::V_SUBREV_F16_e64:
3689 case AMDGPU::V_SUBREV_F16_e32_gfx10:
3690 case AMDGPU::V_SUBREV_F16_e32_vi:
3691 case AMDGPU::V_SUBREV_F16_e64_gfx10:
3692 case AMDGPU::V_SUBREV_F16_e64_vi:
3693
3694 case AMDGPU::V_SUBREV_U16_e32:
3695 case AMDGPU::V_SUBREV_U16_e64:
3696 case AMDGPU::V_SUBREV_U16_e32_vi:
3697 case AMDGPU::V_SUBREV_U16_e64_vi:
3698
3699 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3700 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3701 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3702
3703 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3704 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3705
3706 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3707 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3708
3709 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3710 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3711
3712 case AMDGPU::V_LSHRREV_B32_e32:
3713 case AMDGPU::V_LSHRREV_B32_e64:
3714 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3715 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3716 case AMDGPU::V_LSHRREV_B32_e32_vi:
3717 case AMDGPU::V_LSHRREV_B32_e64_vi:
3718 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3719 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3720
3721 case AMDGPU::V_ASHRREV_I32_e32:
3722 case AMDGPU::V_ASHRREV_I32_e64:
3723 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3724 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3725 case AMDGPU::V_ASHRREV_I32_e32_vi:
3726 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3727 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3728 case AMDGPU::V_ASHRREV_I32_e64_vi:
3729
3730 case AMDGPU::V_LSHLREV_B32_e32:
3731 case AMDGPU::V_LSHLREV_B32_e64:
3732 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3733 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3734 case AMDGPU::V_LSHLREV_B32_e32_vi:
3735 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3736 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3737 case AMDGPU::V_LSHLREV_B32_e64_vi:
3738
3739 case AMDGPU::V_LSHLREV_B16_e32:
3740 case AMDGPU::V_LSHLREV_B16_e64:
3741 case AMDGPU::V_LSHLREV_B16_e32_vi:
3742 case AMDGPU::V_LSHLREV_B16_e64_vi:
3743 case AMDGPU::V_LSHLREV_B16_gfx10:
3744
3745 case AMDGPU::V_LSHRREV_B16_e32:
3746 case AMDGPU::V_LSHRREV_B16_e64:
3747 case AMDGPU::V_LSHRREV_B16_e32_vi:
3748 case AMDGPU::V_LSHRREV_B16_e64_vi:
3749 case AMDGPU::V_LSHRREV_B16_gfx10:
3750
3751 case AMDGPU::V_ASHRREV_I16_e32:
3752 case AMDGPU::V_ASHRREV_I16_e64:
3753 case AMDGPU::V_ASHRREV_I16_e32_vi:
3754 case AMDGPU::V_ASHRREV_I16_e64_vi:
3755 case AMDGPU::V_ASHRREV_I16_gfx10:
3756
3757 case AMDGPU::V_LSHLREV_B64_e64:
3758 case AMDGPU::V_LSHLREV_B64_gfx10:
3759 case AMDGPU::V_LSHLREV_B64_vi:
3760
3761 case AMDGPU::V_LSHRREV_B64_e64:
3762 case AMDGPU::V_LSHRREV_B64_gfx10:
3763 case AMDGPU::V_LSHRREV_B64_vi:
3764
3765 case AMDGPU::V_ASHRREV_I64_e64:
3766 case AMDGPU::V_ASHRREV_I64_gfx10:
3767 case AMDGPU::V_ASHRREV_I64_vi:
3768
3769 case AMDGPU::V_PK_LSHLREV_B16:
3770 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3771 case AMDGPU::V_PK_LSHLREV_B16_vi:
3772
3773 case AMDGPU::V_PK_LSHRREV_B16:
3774 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3775 case AMDGPU::V_PK_LSHRREV_B16_vi:
3776 case AMDGPU::V_PK_ASHRREV_I16:
3777 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3778 case AMDGPU::V_PK_ASHRREV_I16_vi:
3779 return true;
3780 default:
3781 return false;
3782 }
3783}
3784
3785Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3786
3787 using namespace SIInstrFlags;
3788 const unsigned Opcode = Inst.getOpcode();
3789 const MCInstrDesc &Desc = MII.get(Opcode);
3790
3791 // lds_direct register is defined so that it can be used
3792 // with 9-bit operands only. Ignore encodings which do not accept these.
3793 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3794 if ((Desc.TSFlags & Enc) == 0)
3795 return None;
3796
3797 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3798 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3799 if (SrcIdx == -1)
3800 break;
3801 const auto &Src = Inst.getOperand(SrcIdx);
3802 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3803
3804 if (isGFX90A())
3805 return StringRef("lds_direct is not supported on this GPU");
3806
3807 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3808 return StringRef("lds_direct cannot be used with this instruction");
3809
3810 if (SrcName != OpName::src0)
3811 return StringRef("lds_direct may be used as src0 only");
3812 }
3813 }
3814
3815 return None;
3816}
3817
3818SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3819 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3820 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3821 if (Op.isFlatOffset())
3822 return Op.getStartLoc();
3823 }
3824 return getLoc();
3825}
3826
3827bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3828 const OperandVector &Operands) {
3829 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3830 if ((TSFlags & SIInstrFlags::FLAT) == 0)
3831 return true;
3832
3833 auto Opcode = Inst.getOpcode();
3834 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3835 assert(OpNum != -1)(static_cast<void> (0));
3836
3837 const auto &Op = Inst.getOperand(OpNum);
3838 if (!hasFlatOffsets() && Op.getImm() != 0) {
3839 Error(getFlatOffsetLoc(Operands),
3840 "flat offset modifier is not supported on this GPU");
3841 return false;
3842 }
3843
3844 // For FLAT segment the offset must be positive;
3845 // MSB is ignored and forced to zero.
3846 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3847 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3848 if (!isIntN(OffsetSize, Op.getImm())) {
3849 Error(getFlatOffsetLoc(Operands),
3850 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3851 return false;
3852 }
3853 } else {
3854 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3855 if (!isUIntN(OffsetSize, Op.getImm())) {
3856 Error(getFlatOffsetLoc(Operands),
3857 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3858 return false;
3859 }
3860 }
3861
3862 return true;
3863}
3864
3865SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3866 // Start with second operand because SMEM Offset cannot be dst or src0.
3867 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3868 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3869 if (Op.isSMEMOffset())
3870 return Op.getStartLoc();
3871 }
3872 return getLoc();
3873}
3874
3875bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3876 const OperandVector &Operands) {
3877 if (isCI() || isSI())
3878 return true;
3879
3880 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3881 if ((TSFlags & SIInstrFlags::SMRD) == 0)
3882 return true;
3883
3884 auto Opcode = Inst.getOpcode();
3885 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3886 if (OpNum == -1)
3887 return true;
3888
3889 const auto &Op = Inst.getOperand(OpNum);
3890 if (!Op.isImm())
3891 return true;
3892
3893 uint64_t Offset = Op.getImm();
3894 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3895 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3896 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3897 return true;
3898
3899 Error(getSMEMOffsetLoc(Operands),
3900 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3901 "expected a 21-bit signed offset");
3902
3903 return false;
3904}
3905
3906bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3907 unsigned Opcode = Inst.getOpcode();
3908 const MCInstrDesc &Desc = MII.get(Opcode);
3909 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3910 return true;
3911
3912 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3913 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3914
3915 const int OpIndices[] = { Src0Idx, Src1Idx };
3916
3917 unsigned NumExprs = 0;
3918 unsigned NumLiterals = 0;
3919 uint32_t LiteralValue;
3920
3921 for (int OpIdx : OpIndices) {
3922 if (OpIdx == -1) break;
3923
3924 const MCOperand &MO = Inst.getOperand(OpIdx);
3925 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3926 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3927 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3928 uint32_t Value = static_cast<uint32_t>(MO.getImm());
3929 if (NumLiterals == 0 || LiteralValue != Value) {
3930 LiteralValue = Value;
3931 ++NumLiterals;
3932 }
3933 } else if (MO.isExpr()) {
3934 ++NumExprs;
3935 }
3936 }
3937 }
3938
3939 return NumLiterals + NumExprs <= 1;
3940}
3941
3942bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3943 const unsigned Opc = Inst.getOpcode();
3944 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3945 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3946 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3947 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3948
3949 if (OpSel & ~3)
3950 return false;
3951 }
3952 return true;
3953}
3954
3955bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
3956 const OperandVector &Operands) {
3957 const unsigned Opc = Inst.getOpcode();
3958 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
3959 if (DppCtrlIdx < 0)
3960 return true;
3961 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
3962
3963 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
3964 // DPP64 is supported for row_newbcast only.
3965 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3966 if (Src0Idx >= 0 &&
3967 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
3968 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
3969 Error(S, "64 bit dpp only supports row_newbcast");
3970 return false;
3971 }
3972 }
3973
3974 return true;
3975}
3976
3977// Check if VCC register matches wavefront size
3978bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3979 auto FB = getFeatureBits();
3980 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3981 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3982}
3983
3984// VOP3 literal is only allowed in GFX10+ and only one can be used
3985bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3986 const OperandVector &Operands) {
3987 unsigned Opcode = Inst.getOpcode();
3988 const MCInstrDesc &Desc = MII.get(Opcode);
3989 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3990 return true;
3991
3992 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3993 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3994 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3995
3996 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3997
3998 unsigned NumExprs = 0;
3999 unsigned NumLiterals = 0;
4000 uint32_t LiteralValue;
4001
4002 for (int OpIdx : OpIndices) {
4003 if (OpIdx == -1) break;
4004
4005 const MCOperand &MO = Inst.getOperand(OpIdx);
4006 if (!MO.isImm() && !MO.isExpr())
4007 continue;
4008 if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4009 continue;
4010
4011 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4012 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4013 Error(getConstLoc(Operands),
4014 "inline constants are not allowed for this operand");
4015 return false;
4016 }
4017
4018 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4019 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4020 if (NumLiterals == 0 || LiteralValue != Value) {
4021 LiteralValue = Value;
4022 ++NumLiterals;
4023 }
4024 } else if (MO.isExpr()) {
4025 ++NumExprs;
4026 }
4027 }
4028 NumLiterals += NumExprs;
4029
4030 if (!NumLiterals)
4031 return true;
4032
4033 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4034 Error(getLitLoc(Operands), "literal operands are not supported");
4035 return false;
4036 }
4037
4038 if (NumLiterals > 1) {
4039 Error(getLitLoc(Operands), "only one literal operand is allowed");
4040 return false;
4041 }
4042
4043 return true;
4044}
4045
4046// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4047static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4048 const MCRegisterInfo *MRI) {
4049 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4050 if (OpIdx < 0)
4051 return -1;
4052
4053 const MCOperand &Op = Inst.getOperand(OpIdx);
4054 if (!Op.isReg())
4055 return -1;
4056
4057 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4058 auto Reg = Sub ? Sub : Op.getReg();
4059 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4060 return AGPR32.contains(Reg) ? 1 : 0;
4061}
4062
4063bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4064 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4065 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4066 SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4067 SIInstrFlags::DS)) == 0)
4068 return true;
4069
4070 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4071 : AMDGPU::OpName::vdata;
4072
4073 const MCRegisterInfo *MRI = getMRI();
4074 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4075 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4076
4077 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4078 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4079 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4080 return false;
4081 }
4082
4083 auto FB = getFeatureBits();
4084 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4085 if (DataAreg < 0 || DstAreg < 0)
4086 return true;
4087 return DstAreg == DataAreg;
4088 }
4089
4090 return DstAreg < 1 && DataAreg < 1;
4091}
4092
4093bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4094 auto FB = getFeatureBits();
4095 if (!FB[AMDGPU::FeatureGFX90AInsts])
4096 return true;
4097
4098 const MCRegisterInfo *MRI = getMRI();
4099 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4100 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4101 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4102 const MCOperand &Op = Inst.getOperand(I);
4103 if (!Op.isReg())
4104 continue;
4105
4106 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4107 if (!Sub)
4108 continue;
4109
4110 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4111 return false;
4112 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4113 return false;
4114 }
4115
4116 return true;
4117}
4118
4119// gfx90a has an undocumented limitation:
4120// DS_GWS opcodes must use even aligned registers.
4121bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4122 const OperandVector &Operands) {
4123 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4124 return true;
4125
4126 int Opc = Inst.getOpcode();
4127 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4128 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4129 return true;
4130
4131 const MCRegisterInfo *MRI = getMRI();
4132 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4133 int Data0Pos =
4134 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4135 assert(Data0Pos != -1)(static_cast<void> (0));
4136 auto Reg = Inst.getOperand(Data0Pos).getReg();
4137 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4138 if (RegIdx & 1) {
4139 SMLoc RegLoc = getRegLoc(Reg, Operands);
4140 Error(RegLoc, "vgpr must be even aligned");
4141 return false;
4142 }
4143
4144 return true;
4145}
4146
4147bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4148 const OperandVector &Operands,
4149 const SMLoc &IDLoc) {
4150 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4151 AMDGPU::OpName::cpol);
4152 if (CPolPos == -1)
4153 return true;
4154
4155 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4156
4157 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4158 if ((TSFlags & (SIInstrFlags::SMRD)) &&
4159 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4160 Error(IDLoc, "invalid cache policy for SMRD instruction");
4161 return false;
4162 }
4163
4164 if (isGFX90A() && (CPol & CPol::SCC)) {
4165 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4166 StringRef CStr(S.getPointer());
4167 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4168 Error(S, "scc is not supported on this GPU");
4169 return false;
4170 }
4171
4172 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4173 return true;
4174
4175 if (TSFlags & SIInstrFlags::IsAtomicRet) {
4176 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4177 Error(IDLoc, "instruction must use glc");
4178 return false;
4179 }
4180 } else {
4181 if (CPol & CPol::GLC) {
4182 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4183 StringRef CStr(S.getPointer());
4184 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4185 Error(S, "instruction must not use glc");
4186 return false;
4187 }
4188 }
4189
4190 return true;
4191}
4192
4193bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4194 const SMLoc &IDLoc,
4195 const OperandVector &Operands) {
4196 if (auto ErrMsg = validateLdsDirect(Inst)) {
4197 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4198 return false;
4199 }
4200 if (!validateSOPLiteral(Inst)) {
4201 Error(getLitLoc(Operands),
4202 "only one literal operand is allowed");
4203 return false;
4204 }
4205 if (!validateVOP3Literal(Inst, Operands)) {
4206 return false;
4207 }
4208 if (!validateConstantBusLimitations(Inst, Operands)) {
4209 return false;
4210 }
4211 if (!validateEarlyClobberLimitations(Inst, Operands)) {
4212 return false;
4213 }
4214 if (!validateIntClampSupported(Inst)) {
4215 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4216 "integer clamping is not supported on this GPU");
4217 return false;
4218 }
4219 if (!validateOpSel(Inst)) {
4220 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4221 "invalid op_sel operand");
4222 return false;
4223 }
4224 if (!validateDPP(Inst, Operands)) {
4225 return false;
4226 }
4227 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4228 if (!validateMIMGD16(Inst)) {
4229 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4230 "d16 modifier is not supported on this GPU");
4231 return false;
4232 }
4233 if (!validateMIMGDim(Inst)) {
4234 Error(IDLoc, "dim modifier is required on this GPU");
4235 return false;
4236 }
4237 if (!validateMIMGMSAA(Inst)) {
4238 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4239 "invalid dim; must be MSAA type");
4240 return false;
4241 }
4242 if (!validateMIMGDataSize(Inst)) {
4243 Error(IDLoc,
4244 "image data size does not match dmask and tfe");
4245 return false;
4246 }
4247 if (!validateMIMGAddrSize(Inst)) {
4248 Error(IDLoc,
4249 "image address size does not match dim and a16");
4250 return false;
4251 }
4252 if (!validateMIMGAtomicDMask(Inst)) {
4253 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4254 "invalid atomic image dmask");
4255 return false;
4256 }
4257 if (!validateMIMGGatherDMask(Inst)) {
4258 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4259 "invalid image_gather dmask: only one bit must be set");
4260 return false;
4261 }
4262 if (!validateMovrels(Inst, Operands)) {
4263 return false;
4264 }
4265 if (!validateFlatOffset(Inst, Operands)) {
4266 return false;
4267 }
4268 if (!validateSMEMOffset(Inst, Operands)) {
4269 return false;
4270 }
4271 if (!validateMAIAccWrite(Inst, Operands)) {
4272 return false;
4273 }
4274 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4275 return false;
4276 }
4277
4278 if (!validateAGPRLdSt(Inst)) {
4279 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4280 ? "invalid register class: data and dst should be all VGPR or AGPR"
4281 : "invalid register class: agpr loads and stores not supported on this GPU"
4282 );
4283 return false;
4284 }
4285 if (!validateVGPRAlign(Inst)) {
4286 Error(IDLoc,
4287 "invalid register class: vgpr tuples must be 64 bit aligned");
4288 return false;
4289 }
4290 if (!validateGWS(Inst, Operands)) {
4291 return false;
4292 }
4293
4294 if (!validateDivScale(Inst)) {
4295 Error(IDLoc, "ABS not allowed in VOP3B instructions");
4296 return false;
4297 }
4298 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4299 return false;
4300 }
4301
4302 return true;
4303}
4304
4305static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4306 const FeatureBitset &FBS,
4307 unsigned VariantID = 0);
4308
4309static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4310 const FeatureBitset &AvailableFeatures,
4311 unsigned VariantID);
4312
4313bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4314 const FeatureBitset &FBS) {
4315 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4316}
4317
4318bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4319 const FeatureBitset &FBS,
4320 ArrayRef<unsigned> Variants) {
4321 for (auto Variant : Variants) {
4322 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4323 return true;
4324 }
4325
4326 return false;
4327}
4328
4329bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4330 const SMLoc &IDLoc) {
4331 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4332
4333 // Check if requested instruction variant is supported.
4334 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4335 return false;
4336
4337 // This instruction is not supported.
4338 // Clear any other pending errors because they are no longer relevant.
4339 getParser().clearPendingErrors();
4340
4341 // Requested instruction variant is not supported.
4342 // Check if any other variants are supported.
4343 StringRef VariantName = getMatchedVariantName();
4344 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4345 return Error(IDLoc,
4346 Twine(VariantName,
4347 " variant of this instruction is not supported"));
4348 }
4349
4350 // Finally check if this instruction is supported on any other GPU.
4351 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4352 return Error(IDLoc, "instruction not supported on this GPU");
4353 }
4354
4355 // Instruction not supported on any GPU. Probably a typo.
4356 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4357 return Error(IDLoc, "invalid instruction" + Suggestion);
4358}
4359
4360bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4361 OperandVector &Operands,
4362 MCStreamer &Out,
4363 uint64_t &ErrorInfo,
4364 bool MatchingInlineAsm) {
4365 MCInst Inst;
4366 unsigned Result = Match_Success;
4367 for (auto Variant : getMatchedVariants()) {
4368 uint64_t EI;
4369 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4370 Variant);
4371 // We order match statuses from least to most specific. We use most specific
4372 // status as resulting
4373 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4374 if ((R == Match_Success) ||
4375 (R == Match_PreferE32) ||
4376 (R == Match_MissingFeature && Result != Match_PreferE32) ||
4377 (R == Match_InvalidOperand && Result != Match_MissingFeature
4378 && Result != Match_PreferE32) ||
4379 (R == Match_MnemonicFail && Result != Match_InvalidOperand
4380 && Result != Match_MissingFeature
4381 && Result != Match_PreferE32)) {
4382 Result = R;
4383 ErrorInfo = EI;
4384 }
4385 if (R == Match_Success)
4386 break;
4387 }
4388
4389 if (Result == Match_Success) {
4390 if (!validateInstruction(Inst, IDLoc, Operands)) {
4391 return true;
4392 }
4393 Inst.setLoc(IDLoc);
4394 Out.emitInstruction(Inst, getSTI());
4395 return false;
4396 }
4397
4398 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4399 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4400 return true;
4401 }
4402
4403 switch (Result) {
4404 default: break;
4405 case Match_MissingFeature:
4406 // It has been verified that the specified instruction
4407 // mnemonic is valid. A match was found but it requires
4408 // features which are not supported on this GPU.
4409 return Error(IDLoc, "operands are not valid for this GPU or mode");
4410
4411 case Match_InvalidOperand: {
4412 SMLoc ErrorLoc = IDLoc;
4413 if (ErrorInfo != ~0ULL) {
4414 if (ErrorInfo >= Operands.size()) {
4415 return Error(IDLoc, "too few operands for instruction");
4416 }
4417 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4418 if (ErrorLoc == SMLoc())
4419 ErrorLoc = IDLoc;
4420 }
4421 return Error(ErrorLoc, "invalid operand for instruction");
4422 }
4423
4424 case Match_PreferE32:
4425 return Error(IDLoc, "internal error: instruction without _e64 suffix "
4426 "should be encoded as e32");
4427 case Match_MnemonicFail:
4428 llvm_unreachable("Invalid instructions should have been handled already")__builtin_unreachable();
4429 }
4430 llvm_unreachable("Implement any new match types added!")__builtin_unreachable();
4431}
4432
4433bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4434 int64_t Tmp = -1;
4435 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4436 return true;
4437 }
4438 if (getParser().parseAbsoluteExpression(Tmp)) {
4439 return true;
4440 }
4441 Ret = static_cast<uint32_t>(Tmp);
4442 return false;
4443}
4444
4445bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4446 uint32_t &Minor) {
4447 if (ParseAsAbsoluteExpression(Major))
4448 return TokError("invalid major version");
4449
4450 if (!trySkipToken(AsmToken::Comma))
4451 return TokError("minor version number required, comma expected");
4452
4453 if (ParseAsAbsoluteExpression(Minor))
4454 return TokError("invalid minor version");
4455
4456 return false;
4457}
4458
4459bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4460 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4461 return TokError("directive only supported for amdgcn architecture");
4462
4463 std::string TargetIDDirective;
4464 SMLoc TargetStart = getTok().getLoc();
4465 if (getParser().parseEscapedString(TargetIDDirective))
4466 return true;
4467
4468 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4469 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4470 return getParser().Error(TargetRange.Start,
4471 (Twine(".amdgcn_target directive's target id ") +
4472 Twine(TargetIDDirective) +
4473 Twine(" does not match the specified target id ") +
4474 Twine(getTargetStreamer().getTargetID()->toString())).str());
4475
4476 return false;
4477}
4478
4479bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4480 return Error(Range.Start, "value out of range", Range);
4481}
4482
4483bool AMDGPUAsmParser::calculateGPRBlocks(
4484 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4485 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4486 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4487 unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4488 // TODO(scott.linder): These calculations are duplicated from
4489 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4490 IsaVersion Version = getIsaVersion(getSTI().getCPU());
4491
4492 unsigned NumVGPRs = NextFreeVGPR;
4493 unsigned NumSGPRs = NextFreeSGPR;
4494
4495 if (Version.Major >= 10)
4496 NumSGPRs = 0;
4497 else {
4498 unsigned MaxAddressableNumSGPRs =
4499 IsaInfo::getAddressableNumSGPRs(&getSTI());
4500
4501 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4502 NumSGPRs > MaxAddressableNumSGPRs)
4503 return OutOfRangeError(SGPRRange);
4504
4505 NumSGPRs +=
4506 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4507
4508 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4509 NumSGPRs > MaxAddressableNumSGPRs)
4510 return OutOfRangeError(SGPRRange);
4511
4512 if (Features.test(FeatureSGPRInitBug))
4513 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4514 }
4515
4516 VGPRBlocks =
4517 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4518 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4519
4520 return false;
4521}
4522
4523bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4524 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4525 return TokError("directive only supported for amdgcn architecture");
4526
4527 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4528 return TokError("directive only supported for amdhsa OS");
4529
4530 StringRef KernelName;
4531 if (getParser().parseIdentifier(KernelName))
4532 return true;
4533
4534 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4535
4536 StringSet<> Seen;
4537
4538 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4539
4540 SMRange VGPRRange;
4541 uint64_t NextFreeVGPR = 0;
4542 uint64_t AccumOffset = 0;
4543 SMRange SGPRRange;
4544 uint64_t NextFreeSGPR = 0;
4545 unsigned UserSGPRCount = 0;
4546 bool ReserveVCC = true;
4547 bool ReserveFlatScr = true;
4548 Optional<bool> EnableWavefrontSize32;
4549
4550 while (true) {
4551 while (trySkipToken(AsmToken::EndOfStatement));
4552
4553 StringRef ID;
4554 SMRange IDRange = getTok().getLocRange();
4555 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4556 return true;
4557
4558 if (ID == ".end_amdhsa_kernel")
4559 break;
4560
4561 if (Seen.find(ID) != Seen.end())
4562 return TokError(".amdhsa_ directives cannot be repeated");
4563 Seen.insert(ID);
4564
4565 SMLoc ValStart = getLoc();
4566 int64_t IVal;
4567 if (getParser().parseAbsoluteExpression(IVal))
4568 return true;
4569 SMLoc ValEnd = getLoc();
4570 SMRange ValRange = SMRange(ValStart, ValEnd);
4571
4572 if (IVal < 0)
4573 return OutOfRangeError(ValRange);
4574
4575 uint64_t Val = IVal;
4576
4577#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
4578 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
4579 return OutOfRangeError(RANGE); \
4580 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE)FIELD &= ~ENTRY; FIELD |= ((VALUE << ENTRY_SHIFT) &
ENTRY)
;
4581
4582 if (ID == ".amdhsa_group_segment_fixed_size") {
4583 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT8>(Val))
4584 return OutOfRangeError(ValRange);
4585 KD.group_segment_fixed_size = Val;
4586 } else if (ID == ".amdhsa_private_segment_fixed_size") {
4587 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT8>(Val))
4588 return OutOfRangeError(ValRange);
4589 KD.private_segment_fixed_size = Val;
4590 } else if (ID == ".amdhsa_kernarg_size") {
4591 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT8>(Val))
4592 return OutOfRangeError(ValRange);
4593 KD.kernarg_size = Val;
4594 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4595 if (hasArchitectedFlatScratch())
4596 return Error(IDRange.Start,
4597 "directive is not supported with architected flat scratch",
4598 IDRange);
4599 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4600 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4601 Val, ValRange);
4602 if (Val)
4603 UserSGPRCount += 4;
4604 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4605 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4606 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4607 ValRange);
4608 if (Val)
4609 UserSGPRCount += 2;
4610 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4611 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4612 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4613 ValRange);
4614 if (Val)
4615 UserSGPRCount += 2;
4616 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4617 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4618 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4619 Val, ValRange);
4620 if (Val)
4621 UserSGPRCount += 2;
4622 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4623 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4624 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4625 ValRange);
4626 if (Val)
4627 UserSGPRCount += 2;
4628 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4629 if (hasArchitectedFlatScratch())
4630 return Error(IDRange.Start,
4631 "directive is not supported with architected flat scratch",
4632 IDRange);
4633 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4634 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4635 ValRange);
4636 if (Val)
4637 UserSGPRCount += 2;
4638 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4639 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4640 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4641 Val, ValRange);
4642 if (Val)
4643 UserSGPRCount += 1;
4644 } else if (ID == ".amdhsa_wavefront_size32") {
4645 if (IVersion.Major < 10)
4646 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4647 EnableWavefrontSize32 = Val;
4648 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4649 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4650 Val, ValRange);
4651 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4652 if (hasArchitectedFlatScratch())
4653 return Error(IDRange.Start,
4654 "directive is not supported with architected flat scratch",
4655 IDRange);
4656 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4657 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4658 } else if (ID == ".amdhsa_enable_private_segment") {
4659 if (!hasArchitectedFlatScratch())
4660 return Error(
4661 IDRange.Start,
4662 "directive is not supported without architected flat scratch",
4663 IDRange);
4664 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4665 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4666 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4667 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4668 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4669 ValRange);
4670 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4671 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4672 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4673 ValRange);
4674 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4675 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4676 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4677 ValRange);
4678 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4679 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4680 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4681 ValRange);
4682 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4683 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4684 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4685 ValRange);
4686 } else if (ID == ".amdhsa_next_free_vgpr") {
4687 VGPRRange = ValRange;
4688 NextFreeVGPR = Val;
4689 } else if (ID == ".amdhsa_next_free_sgpr") {
4690 SGPRRange = ValRange;
4691 NextFreeSGPR = Val;
4692 } else if (ID == ".amdhsa_accum_offset") {
4693 if (!isGFX90A())
4694 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4695 AccumOffset = Val;
4696 } else if (ID == ".amdhsa_reserve_vcc") {
4697 if (!isUInt<1>(Val))
4698 return OutOfRangeError(ValRange);
4699 ReserveVCC = Val;
4700 } else if (ID == ".amdhsa_reserve_flat_scratch") {
4701 if (IVersion.Major < 7)
4702 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4703 if (hasArchitectedFlatScratch())
4704 return Error(IDRange.Start,
4705 "directive is not supported with architected flat scratch",
4706 IDRange);
4707 if (!isUInt<1>(Val))
4708 return OutOfRangeError(ValRange);
4709 ReserveFlatScr = Val;
4710 } else if (ID == ".amdhsa_reserve_xnack_mask") {
4711 if (IVersion.Major < 8)
4712 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4713 if (!isUInt<1>(Val))
4714 return OutOfRangeError(ValRange);
4715 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4716 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4717 IDRange);
4718 } else if (ID == ".amdhsa_float_round_mode_32") {
4719 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4720 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4721 } else if (ID == ".amdhsa_float_round_mode_16_64") {
4722 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4723 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4724 } else if (ID == ".amdhsa_float_denorm_mode_32") {
4725 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4726 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4727 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4728 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4729 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4730 ValRange);
4731 } else if (ID == ".amdhsa_dx10_clamp") {
4732 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4733 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4734 } else if (ID == ".amdhsa_ieee_mode") {
4735 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4736 Val, ValRange);
4737 } else if (ID == ".amdhsa_fp16_overflow") {
4738 if (IVersion.Major < 9)
4739 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4740 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4741 ValRange);
4742 } else if (ID == ".amdhsa_tg_split") {
4743 if (!isGFX90A())
4744 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4745 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4746 ValRange);
4747 } else if (ID == ".amdhsa_workgroup_processor_mode") {
4748 if (IVersion.Major < 10)
4749 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4750 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4751 ValRange);
4752 } else if (ID == ".amdhsa_memory_ordered") {
4753 if (IVersion.Major < 10)
4754 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4755 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4756 ValRange);
4757 } else if (ID == ".amdhsa_forward_progress") {
4758 if (IVersion.Major < 10)
4759 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4760 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4761 ValRange);
4762 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4763 PARSE_BITS_ENTRY(
4764 KD.compute_pgm_rsrc2,
4765 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4766 ValRange);
4767 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4768 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4769 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4770 Val, ValRange);
4771 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4772 PARSE_BITS_ENTRY(
4773 KD.compute_pgm_rsrc2,
4774 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4775 ValRange);
4776 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4777 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4778 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4779 Val, ValRange);
4780 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4781 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4782 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4783 Val, ValRange);
4784 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4785 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4786 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4787 Val, ValRange);
4788 } else if (ID == ".amdhsa_exception_int_div_zero") {
4789 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4790 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4791 Val, ValRange);
4792 } else {
4793 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4794 }
4795
4796#undef PARSE_BITS_ENTRY
4797 }
4798
4799 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4800 return TokError(".amdhsa_next_free_vgpr directive is required");
4801
4802 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4803 return TokError(".amdhsa_next_free_sgpr directive is required");
4804
4805 unsigned VGPRBlocks;
4806 unsigned SGPRBlocks;
4807 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4808 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4809 EnableWavefrontSize32, NextFreeVGPR,
4810 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4811 SGPRBlocks))
4812 return true;
4813
4814 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4815 VGPRBlocks))
4816 return OutOfRangeError(VGPRRange);
4817 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,KD.compute_pgm_rsrc1 &= ~COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT
; KD.compute_pgm_rsrc1 |= ((VGPRBlocks << COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT
) & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT)
4818 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks)KD.compute_pgm_rsrc1 &= ~COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT
; KD.compute_pgm_rsrc1 |= ((VGPRBlocks << COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT
) & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT)
;
4819
4820 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4821 SGPRBlocks))
4822 return OutOfRangeError(SGPRRange);
4823 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,KD.compute_pgm_rsrc1 &= ~COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT
; KD.compute_pgm_rsrc1 |= ((SGPRBlocks << COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT
) & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT)
4824 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,KD.compute_pgm_rsrc1 &= ~COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT
; KD.compute_pgm_rsrc1 |= ((SGPRBlocks << COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT
) & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT)
4825 SGPRBlocks)KD.compute_pgm_rsrc1 &= ~COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT
; KD.compute_pgm_rsrc1 |= ((SGPRBlocks << COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT
) & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT)
;
4826
4827 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4828 return TokError("too many user SGPRs enabled");
4829 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,KD.compute_pgm_rsrc2 &= ~COMPUTE_PGM_RSRC2_USER_SGPR_COUNT
; KD.compute_pgm_rsrc2 |= ((UserSGPRCount << COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT
) & COMPUTE_PGM_RSRC2_USER_SGPR_COUNT)
4830 UserSGPRCount)KD.compute_pgm_rsrc2 &= ~COMPUTE_PGM_RSRC2_USER_SGPR_COUNT
; KD.compute_pgm_rsrc2 |= ((UserSGPRCount << COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT
) & COMPUTE_PGM_RSRC2_USER_SGPR_COUNT)
;
4831
4832 if (isGFX90A()) {
4833 if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4834 return TokError(".amdhsa_accum_offset directive is required");
4835 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4836 return TokError("accum_offset should be in range [4..256] in "
4837 "increments of 4");
4838 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4839 return TokError("accum_offset exceeds total VGPR allocation");
4840 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,KD.compute_pgm_rsrc3 &= ~COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET
; KD.compute_pgm_rsrc3 |= (((AccumOffset / 4 - 1) << COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT
) & COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET)
4841 (AccumOffset / 4 - 1))KD.compute_pgm_rsrc3 &= ~COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET
; KD.compute_pgm_rsrc3 |= (((AccumOffset / 4 - 1) << COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT
) & COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET)
;
4842 }
4843
4844 getTargetStreamer().EmitAmdhsaKernelDescriptor(
4845 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4846 ReserveFlatScr);
4847 return false;
4848}
4849
4850bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4851 uint32_t Major;
4852 uint32_t Minor;
4853
4854 if (ParseDirectiveMajorMinor(Major, Minor))
4855 return true;
4856
4857 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4858 return false;
4859}
4860
4861bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4862 uint32_t Major;
4863 uint32_t Minor;
4864 uint32_t Stepping;
4865 StringRef VendorName;
4866 StringRef ArchName;
4867
4868 // If this directive has no arguments, then use the ISA version for the
4869 // targeted GPU.
4870 if (isToken(AsmToken::EndOfStatement)) {
4871 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4872 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4873 ISA.Stepping,
4874 "AMD", "AMDGPU");
4875 return false;
4876 }
4877
4878 if (ParseDirectiveMajorMinor(Major, Minor))
4879 return true;
4880
4881 if (!trySkipToken(AsmToken::Comma))
4882 return TokError("stepping version number required, comma expected");
4883
4884 if (ParseAsAbsoluteExpression(Stepping))
4885 return TokError("invalid stepping version");
4886
4887 if (!trySkipToken(AsmToken::Comma))
4888 return TokError("vendor name required, comma expected");
4889
4890 if (!parseString(VendorName, "invalid vendor name"))
4891 return true;
4892
4893 if (!trySkipToken(AsmToken::Comma))
4894 return TokError("arch name required, comma expected");
4895
4896 if (!parseString(ArchName, "invalid arch name"))
4897 return true;
4898
4899 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4900 VendorName, ArchName);
4901 return false;
4902}
4903
4904bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4905 amd_kernel_code_t &Header) {
4906 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4907 // assembly for backwards compatibility.
4908 if (ID == "max_scratch_backing_memory_byte_size") {
4909 Parser.eatToEndOfStatement();
4910 return false;
4911 }
4912
4913 SmallString<40> ErrStr;
4914 raw_svector_ostream Err(ErrStr);
4915 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4916 return TokError(Err.str());
4917 }
4918 Lex();
4919
4920 if (ID == "enable_wavefront_size32") {
4921 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4922 if (!isGFX10Plus())
4923 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4924 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4925 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4926 } else {
4927 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4928 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4929 }
4930 }
4931
4932 if (ID == "wavefront_size") {
4933 if (Header.wavefront_size == 5) {
4934 if (!isGFX10Plus())
4935 return TokError("wavefront_size=5 is only allowed on GFX10+");
4936 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4937 return TokError("wavefront_size=5 requires +WavefrontSize32");
4938 } else if (Header.wavefront_size == 6) {
4939 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4940 return TokError("wavefront_size=6 requires +WavefrontSize64");
4941 }
4942 }
4943
4944 if (ID == "enable_wgp_mode") {
4945 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers)(((Header.compute_pgm_resource_registers) >> 29) & 0x1
)
&&
4946 !isGFX10Plus())
4947 return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4948 }
4949
4950 if (ID == "enable_mem_ordered") {
4951 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers)(((Header.compute_pgm_resource_registers) >> 30) & 0x1
)
&&
4952 !isGFX10Plus())
4953 return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4954 }
4955
4956 if (ID == "enable_fwd_progress") {
4957 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers)(((Header.compute_pgm_resource_registers) >> 31) & 0x1
)
&&
4958 !isGFX10Plus())
4959 return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4960 }
4961
4962 return false;
4963}
4964
4965bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4966 amd_kernel_code_t Header;
4967 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4968
4969 while (true) {
4970 // Lex EndOfStatement. This is in a while loop, because lexing a comment
4971 // will set the current token to EndOfStatement.
4972 while(trySkipToken(AsmToken::EndOfStatement));
4973
4974 StringRef ID;
4975 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4976 return true;
4977
4978 if (ID == ".end_amd_kernel_code_t")
4979 break;
4980
4981 if (ParseAMDKernelCodeTValue(ID, Header))
4982 return true;
4983 }
4984
4985 getTargetStreamer().EmitAMDKernelCodeT(Header);
4986
4987 return false;
4988}
4989
4990bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4991 StringRef KernelName;
4992 if (!parseId(KernelName, "expected symbol name"))
4993 return true;
4994
4995 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4996 ELF::STT_AMDGPU_HSA_KERNEL);
4997
4998 KernelScope.initialize(getContext());
4999 return false;
5000}
5001
5002bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5003 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5004 return Error(getLoc(),
5005 ".amd_amdgpu_isa directive is not available on non-amdgcn "
5006 "architectures");
5007 }
5008
5009 auto TargetIDDirective = getLexer().getTok().getStringContents();
5010 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5011 return Error(getParser().getTok().getLoc(), "target id must match options");
5012
5013 getTargetStreamer().EmitISAVersion();
5014 Lex();
5015
5016 return false;
5017}
5018
5019bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5020 const char *AssemblerDirectiveBegin;
5021 const char *AssemblerDirectiveEnd;
5022 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5023 isHsaAbiVersion3Or4(&getSTI())
5024 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5025 HSAMD::V3::AssemblerDirectiveEnd)
5026 : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5027 HSAMD::AssemblerDirectiveEnd);
5028
5029 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5030 return Error(getLoc(),
5031 (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5032 "not available on non-amdhsa OSes")).str());
5033 }
5034
5035 std::string HSAMetadataString;
5036 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5037 HSAMetadataString))
5038 return true;
5039
5040 if (isHsaAbiVersion3Or4(&getSTI())) {
5041 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5042 return Error(getLoc(), "invalid HSA metadata");
5043 } else {
5044 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5045 return Error(getLoc(), "invalid HSA metadata");
5046 }
5047
5048 return false;
5049}
5050
5051/// Common code to parse out a block of text (typically YAML) between start and
5052/// end directives.
5053bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5054 const char *AssemblerDirectiveEnd,
5055 std::string &CollectString) {
5056
5057 raw_string_ostream CollectStream(CollectString);
5058
5059 getLexer().setSkipSpace(false);
5060
5061 bool FoundEnd = false;
5062 while (!isToken(AsmToken::Eof)) {
5063 while (isToken(AsmToken::Space)) {
5064 CollectStream << getTokenStr();
5065 Lex();
5066 }
5067
5068 if (trySkipId(AssemblerDirectiveEnd)) {
5069 FoundEnd = true;
5070 break;
5071 }
5072
5073 CollectStream << Parser.parseStringToEndOfStatement()
5074 << getContext().getAsmInfo()->getSeparatorString();
5075
5076 Parser.eatToEndOfStatement();
5077 }
5078
5079 getLexer().setSkipSpace(true);
5080
5081 if (isToken(AsmToken::Eof) && !FoundEnd) {
5082 return TokError(Twine("expected directive ") +
5083 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5084 }
5085
5086 CollectStream.flush();
5087 return false;
5088}
5089
5090/// Parse the assembler directive for new MsgPack-format PAL metadata.
5091bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5092 std::string String;
5093 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5094 AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5095 return true;
5096
5097 auto PALMetadata = getTargetStreamer().getPALMetadata();
5098 if (!PALMetadata->setFromString(String))
5099 return Error(getLoc(), "invalid PAL metadata");
5100 return false;
5101}
5102
5103/// Parse the assembler directive for old linear-format PAL metadata.
5104bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5105 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5106 return Error(getLoc(),
5107 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5108 "not available on non-amdpal OSes")).str());
5109 }
5110
5111 auto PALMetadata = getTargetStreamer().getPALMetadata();
5112 PALMetadata->setLegacy();
5113 for (;;) {
5114 uint32_t Key, Value;
5115 if (ParseAsAbsoluteExpression(Key)) {
5116 return TokError(Twine("invalid value in ") +
5117 Twine(PALMD::AssemblerDirective));
5118 }
5119 if (!trySkipToken(AsmToken::Comma)) {
5120 return TokError(Twine("expected an even number of values in ") +
5121 Twine(PALMD::AssemblerDirective));
5122 }
5123 if (ParseAsAbsoluteExpression(Value)) {
5124 return TokError(Twine("invalid value in ") +
5125 Twine(PALMD::AssemblerDirective));
5126 }
5127 PALMetadata->setRegister(Key, Value);
5128 if (!trySkipToken(AsmToken::Comma))
5129 break;
5130 }
5131 return false;
5132}
5133
5134/// ParseDirectiveAMDGPULDS
5135/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5136bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5137 if (getParser().checkForValidSection())
5138 return true;
5139
5140 StringRef Name;
5141 SMLoc NameLoc = getLoc();
5142 if (getParser().parseIdentifier(Name))
5143 return TokError("expected identifier in directive");
5144
5145 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5146 if (parseToken(AsmToken::Comma, "expected ','"))
5147 return true;
5148
5149 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5150
5151 int64_t Size;
5152 SMLoc SizeLoc = getLoc();
5153 if (getParser().parseAbsoluteExpression(Size))
5154 return true;
5155 if (Size < 0)
5156 return Error(SizeLoc, "size must be non-negative");
5157 if (Size > LocalMemorySize)
5158 return Error(SizeLoc, "size is too large");
5159
5160 int64_t Alignment = 4;
5161 if (trySkipToken(AsmToken::Comma)) {
5162 SMLoc AlignLoc = getLoc();
5163 if (getParser().parseAbsoluteExpression(Alignment))
5164 return true;
5165 if (Alignment < 0 || !isPowerOf2_64(Alignment))
5166 return Error(AlignLoc, "alignment must be a power of two");
5167
5168 // Alignment larger than the size of LDS is possible in theory, as long
5169 // as the linker manages to place to symbol at address 0, but we do want
5170 // to make sure the alignment fits nicely into a 32-bit integer.
5171 if (Alignment >= 1u << 31)
5172 return Error(AlignLoc, "alignment is too large");
5173 }
5174
5175 if (parseToken(AsmToken::EndOfStatement,
5176 "unexpected token in '.amdgpu_lds' directive"))
5177 return true;
5178
5179 Symbol->redefineIfPossible();
5180 if (!Symbol->isUndefined())
5181 return Error(NameLoc, "invalid symbol redefinition");
5182
5183 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5184 return false;
5185}
5186
5187bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5188 StringRef IDVal = DirectiveID.getString();
5189
5190 if (isHsaAbiVersion3Or4(&getSTI())) {
5191 if (IDVal == ".amdhsa_kernel")
5192 return ParseDirectiveAMDHSAKernel();
5193
5194 // TODO: Restructure/combine with PAL metadata directive.
5195 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5196 return ParseDirectiveHSAMetadata();
5197 } else {
5198 if (IDVal == ".hsa_code_object_version")
5199 return ParseDirectiveHSACodeObjectVersion();
5200
5201 if (IDVal == ".hsa_code_object_isa")
5202 return ParseDirectiveHSACodeObjectISA();
5203
5204 if (IDVal == ".amd_kernel_code_t")
5205 return ParseDirectiveAMDKernelCodeT();
5206
5207 if (IDVal == ".amdgpu_hsa_kernel")
5208 return ParseDirectiveAMDGPUHsaKernel();
5209
5210 if (IDVal == ".amd_amdgpu_isa")
5211 return ParseDirectiveISAVersion();
5212
5213 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5214 return ParseDirectiveHSAMetadata();
5215 }
5216
5217 if (IDVal == ".amdgcn_target")
5218 return ParseDirectiveAMDGCNTarget();
5219
5220 if (IDVal == ".amdgpu_lds")
5221 return ParseDirectiveAMDGPULDS();
5222
5223 if (IDVal == PALMD::AssemblerDirectiveBegin)
5224 return ParseDirectivePALMetadataBegin();
5225
5226 if (IDVal == PALMD::AssemblerDirective)
5227 return ParseDirectivePALMetadata();
5228
5229 return true;
5230}
5231
5232bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5233 unsigned RegNo) {
5234
5235 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5236 R.isValid(); ++R) {
5237 if (*R == RegNo)
5238 return isGFX9Plus();
5239 }
5240
5241 // GFX10 has 2 more SGPRs 104 and 105.
5242 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5243 R.isValid(); ++R) {
5244 if (*R == RegNo)
5245 return hasSGPR104_SGPR105();
5246 }
5247
5248 switch (RegNo) {
5249 case AMDGPU::SRC_SHARED_BASE:
5250 case AMDGPU::SRC_SHARED_LIMIT:
5251 case AMDGPU::SRC_PRIVATE_BASE:
5252 case AMDGPU::SRC_PRIVATE_LIMIT:
5253 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5254 return isGFX9Plus();
5255 case AMDGPU::TBA:
5256 case AMDGPU::TBA_LO:
5257 case AMDGPU::TBA_HI:
5258 case AMDGPU::TMA:
5259 case AMDGPU::TMA_LO:
5260 case AMDGPU::TMA_HI:
5261 return !isGFX9Plus();
5262 case AMDGPU::XNACK_MASK:
5263 case AMDGPU::XNACK_MASK_LO:
5264 case AMDGPU::XNACK_MASK_HI:
5265 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5266 case AMDGPU::SGPR_NULL:
5267 return isGFX10Plus();
5268 default:
5269 break;
5270 }
5271
5272 if (isCI())
5273 return true;
5274
5275 if (isSI() || isGFX10Plus()) {
5276 // No flat_scr on SI.
5277 // On GFX10 flat scratch is not a valid register operand and can only be
5278 // accessed with s_setreg/s_getreg.
5279 switch (RegNo) {
5280 case AMDGPU::FLAT_SCR:
5281 case AMDGPU::FLAT_SCR_LO:
5282 case AMDGPU::FLAT_SCR_HI:
5283 return false;
5284 default:
5285 return true;
5286 }
5287 }
5288
5289 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5290 // SI/CI have.
5291 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5292 R.isValid(); ++R) {
5293 if (*R == RegNo)
5294 return hasSGPR102_SGPR103();
5295 }
5296
5297 return true;
5298}
5299
5300OperandMatchResultTy
5301AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5302 OperandMode Mode) {
5303 // Try to parse with a custom parser
5304 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5305
5306 // If we successfully parsed the operand or if there as an error parsing,
5307 // we are done.
5308 //
5309 // If we are parsing after we reach EndOfStatement then this means we
5310 // are appending default values to the Operands list. This is only done
5311 // by custom parser, so we shouldn't continue on to the generic parsing.
5312 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5313 isToken(AsmToken::EndOfStatement))
5314 return ResTy;
5315
5316 SMLoc RBraceLoc;
5317 SMLoc LBraceLoc = getLoc();
5318 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5319 unsigned Prefix = Operands.size();
5320
5321 for (;;) {
5322 auto Loc = getLoc();
5323 ResTy = parseReg(Operands);
5324 if (ResTy == MatchOperand_NoMatch)
5325 Error(Loc, "expected a register");
5326 if (ResTy != MatchOperand_Success)
5327 return MatchOperand_ParseFail;
5328
5329 RBraceLoc = getLoc();
5330 if (trySkipToken(AsmToken::RBrac))
5331 break;
5332
5333 if (!skipToken(AsmToken::Comma,
5334 "expected a comma or a closing square bracket")) {
5335 return MatchOperand_ParseFail;
5336 }
5337 }
5338
5339 if (Operands.size() - Prefix > 1) {
5340 Operands.insert(Operands.begin() + Prefix,
5341 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5342 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5343 }
5344
5345 return MatchOperand_Success;
5346 }
5347
5348 return parseRegOrImm(Operands);
5349}
5350
5351StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5352 // Clear any forced encodings from the previous instruction.
5353 setForcedEncodingSize(0);
5354 setForcedDPP(false);
5355 setForcedSDWA(false);
5356
5357 if (Name.endswith("_e64")) {
5358 setForcedEncodingSize(64);
5359 return Name.substr(0, Name.size() - 4);
5360 } else if (Name.endswith("_e32")) {
5361 setForcedEncodingSize(32);
5362 return Name.substr(0, Name.size() - 4);
5363 } else if (Name.endswith("_dpp")) {
5364 setForcedDPP(true);
5365 return Name.substr(0, Name.size() - 4);
5366 } else if (Name.endswith("_sdwa")) {
5367 setForcedSDWA(true);
5368 return Name.substr(0, Name.size() - 5);
5369 }
5370 return Name;
5371}
5372
5373bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5374 StringRef Name,
5375 SMLoc NameLoc, OperandVector &Operands) {
5376 // Add the instruction mnemonic
5377 Name = parseMnemonicSuffix(Name);
5378 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5379
5380 bool IsMIMG = Name.startswith("image_");
5381
5382 while (!trySkipToken(AsmToken::EndOfStatement)) {
5383 OperandMode Mode = OperandMode_Default;
5384 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5385 Mode = OperandMode_NSA;
5386 CPolSeen = 0;
5387 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5388
5389 if (Res != MatchOperand_Success) {
5390 checkUnsupportedInstruction(Name, NameLoc);
5391 if (!Parser.hasPendingError()) {
5392 // FIXME: use real operand location rather than the current location.
5393 StringRef Msg =
5394 (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5395 "not a valid operand.";
5396 Error(getLoc(), Msg);
5397 }
5398 while (!trySkipToken(AsmToken::EndOfStatement)) {
5399 lex();
5400 }
5401 return true;
5402 }
5403
5404 // Eat the comma or space if there is one.
5405 trySkipToken(AsmToken::Comma);
5406 }
5407
5408 return false;
5409}
5410
5411//===----------------------------------------------------------------------===//
5412// Utility functions
5413//===----------------------------------------------------------------------===//
5414
5415OperandMatchResultTy
5416AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5417
5418 if (!trySkipId(Prefix, AsmToken::Colon))
5419 return MatchOperand_NoMatch;
5420
5421 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5422}
5423
5424OperandMatchResultTy
5425AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5426 AMDGPUOperand::ImmTy ImmTy,
5427 bool (*ConvertResult)(int64_t&)) {
5428 SMLoc S = getLoc();
5429 int64_t Value = 0;
5430
5431 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5432 if (Res != MatchOperand_Success)
5433 return Res;
5434
5435 if (ConvertResult && !ConvertResult(Value)) {
5436 Error(S, "invalid " + StringRef(Prefix) + " value.");
5437 }
5438
5439 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5440 return MatchOperand_Success;
5441}
5442
5443OperandMatchResultTy
5444AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5445 OperandVector &Operands,
5446 AMDGPUOperand::ImmTy ImmTy,
5447 bool (*ConvertResult)(int64_t&)) {
5448 SMLoc S = getLoc();
5449 if (!trySkipId(Prefix, AsmToken::Colon))
5450 return MatchOperand_NoMatch;
5451
5452 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5453 return MatchOperand_ParseFail;
5454
5455 unsigned Val = 0;
5456 const unsigned MaxSize = 4;
5457
5458 // FIXME: How to verify the number of elements matches the number of src
5459 // operands?
5460 for (int I = 0; ; ++I) {
5461 int64_t Op;
5462 SMLoc Loc = getLoc();
5463 if (!parseExpr(Op))
5464 return MatchOperand_ParseFail;
5465
5466 if (Op != 0 && Op != 1) {
5467 Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5468 return MatchOperand_ParseFail;
5469 }
5470
5471 Val |= (Op << I);
5472
5473 if (trySkipToken(AsmToken::RBrac))
5474 break;
5475
5476 if (I + 1 == MaxSize) {
5477 Error(getLoc(), "expected a closing square bracket");
5478 return MatchOperand_ParseFail;
5479 }
5480
5481 if (!skipToken(AsmToken::Comma, "expected a comma"))
5482 return MatchOperand_ParseFail;
5483 }
5484
5485 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5486 return MatchOperand_Success;
5487}
5488
5489OperandMatchResultTy
5490AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5491 AMDGPUOperand::ImmTy ImmTy) {
5492 int64_t Bit;
5493 SMLoc S = getLoc();
5494
5495 if (trySkipId(Name)) {
5496 Bit = 1;
5497 } else if (trySkipId("no", Name)) {
5498 Bit = 0;
5499 } else {
5500 return MatchOperand_NoMatch;
5501 }
5502
5503 if (Name == "r128" && !hasMIMG_R128()) {
5504 Error(S, "r128 modifier is not supported on this GPU");
5505 return MatchOperand_ParseFail;
5506 }
5507 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5508 Error(S, "a16 modifier is not supported on this GPU");
5509 return MatchOperand_ParseFail;
5510 }
5511
5512 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5513 ImmTy = AMDGPUOperand::ImmTyR128A16;
5514
5515 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5516 return MatchOperand_Success;
5517}
5518
5519OperandMatchResultTy
5520AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5521 unsigned CPolOn = 0;
5522 unsigned CPolOff = 0;
5523 SMLoc S = getLoc();
5524
5525 if (trySkipId("glc"))
5526 CPolOn = AMDGPU::CPol::GLC;
5527 else if (trySkipId("noglc"))
5528 CPolOff = AMDGPU::CPol::GLC;
5529 else if (trySkipId("slc"))
5530 CPolOn = AMDGPU::CPol::SLC;
5531 else if (trySkipId("noslc"))
5532 CPolOff = AMDGPU::CPol::SLC;
5533 else if (trySkipId("dlc"))
5534 CPolOn = AMDGPU::CPol::DLC;
5535 else if (trySkipId("nodlc"))
5536 CPolOff = AMDGPU::CPol::DLC;
5537 else if (trySkipId("scc"))
5538 CPolOn = AMDGPU::CPol::SCC;
5539 else if (trySkipId("noscc"))
5540 CPolOff = AMDGPU::CPol::SCC;
5541 else
5542 return MatchOperand_NoMatch;
5543
5544 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5545 Error(S, "dlc modifier is not supported on this GPU");
5546 return MatchOperand_ParseFail;
5547 }
5548
5549 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5550 Error(S, "scc modifier is not supported on this GPU");
5551 return MatchOperand_ParseFail;
5552 }
5553
5554 if (CPolSeen & (CPolOn | CPolOff)) {
5555 Error(S, "duplicate cache policy modifier");
5556 return MatchOperand_ParseFail;
5557 }
5558
5559 CPolSeen |= (CPolOn | CPolOff);
5560
5561 for (unsigned I = 1; I != Operands.size(); ++I) {
5562 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5563 if (Op.isCPol()) {
5564 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5565 return MatchOperand_Success;
5566 }
5567 }
5568
5569 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5570 AMDGPUOperand::ImmTyCPol));
5571
5572 return MatchOperand_Success;
5573}
5574
5575static void addOptionalImmOperand(
5576 MCInst& Inst, const OperandVector& Operands,
5577 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5578 AMDGPUOperand::ImmTy ImmT,
5579 int64_t Default = 0) {
5580 auto i = OptionalIdx.find(ImmT);
5581 if (i != OptionalIdx.end()) {
5582 unsigned Idx = i->second;
5583 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5584 } else {
5585 Inst.addOperand(MCOperand::createImm(Default));
5586 }
5587}
5588
5589OperandMatchResultTy
5590AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5591 StringRef &Value,
5592 SMLoc &StringLoc) {
5593 if (!trySkipId(Prefix, AsmToken::Colon))
5594 return MatchOperand_NoMatch;
5595
5596 StringLoc = getLoc();
5597 return parseId(Value, "expected an identifier") ? MatchOperand_Success
5598 : MatchOperand_ParseFail;
5599}
5600
5601//===----------------------------------------------------------------------===//
5602// MTBUF format
5603//===----------------------------------------------------------------------===//
5604
5605bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5606 int64_t MaxVal,
5607 int64_t &Fmt) {
5608 int64_t Val;
5609 SMLoc Loc = getLoc();
5610
5611 auto Res = parseIntWithPrefix(Pref, Val);
5612 if (Res == MatchOperand_ParseFail)
5613 return false;
5614 if (Res == MatchOperand_NoMatch)
5615 return true;
5616
5617 if (Val < 0 || Val > MaxVal) {
5618 Error(Loc, Twine("out of range ", StringRef(Pref)));
5619 return false;
5620 }
5621
5622 Fmt = Val;
5623 return true;
5624}
5625
5626// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5627// values to live in a joint format operand in the MCInst encoding.
5628OperandMatchResultTy
5629AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5630 using namespace llvm::AMDGPU::MTBUFFormat;
5631
5632 int64_t Dfmt = DFMT_UNDEF;
5633 int64_t Nfmt = NFMT_UNDEF;
5634
5635 // dfmt and nfmt can appear in either order, and each is optional.
5636 for (int I = 0; I < 2; ++I) {
5637 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5638 return MatchOperand_ParseFail;
5639
5640 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5641 return MatchOperand_ParseFail;
5642 }
5643 // Skip optional comma between dfmt/nfmt
5644 // but guard against 2 commas following each other.
5645 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5646 !peekToken().is(AsmToken::Comma)) {
5647 trySkipToken(AsmToken::Comma);
5648 }
5649 }
5650
5651 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5652 return MatchOperand_NoMatch;
5653
5654 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5655 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5656
5657 Format = encodeDfmtNfmt(Dfmt, Nfmt);
5658 return MatchOperand_Success;
5659}
5660
5661OperandMatchResultTy
5662AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5663 using namespace llvm::AMDGPU::MTBUFFormat;
5664
5665 int64_t Fmt = UFMT_UNDEF;
5666
5667 if (!tryParseFmt("format", UFMT_MAX, Fmt))
5668 return MatchOperand_ParseFail;
5669
5670 if (Fmt == UFMT_UNDEF)
5671 return MatchOperand_NoMatch;
5672
5673 Format = Fmt;
5674 return MatchOperand_Success;
5675}
5676
5677bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5678 int64_t &Nfmt,
5679 StringRef FormatStr,
5680 SMLoc Loc) {
5681 using namespace llvm::AMDGPU::MTBUFFormat;
5682 int64_t Format;
5683
5684 Format = getDfmt(FormatStr);
5685 if (Format != DFMT_UNDEF) {
5686 Dfmt = Format;
5687 return true;
5688 }
5689
5690 Format = getNfmt(FormatStr, getSTI());
5691 if (Format != NFMT_UNDEF) {
5692 Nfmt = Format;
5693 return true;
5694 }
5695
5696 Error(Loc, "unsupported format");
5697 return false;
5698}
5699
5700OperandMatchResultTy
5701AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5702 SMLoc FormatLoc,
5703 int64_t &Format) {
5704 using namespace llvm::AMDGPU::MTBUFFormat;
5705
5706 int64_t Dfmt = DFMT_UNDEF;
5707 int64_t Nfmt = NFMT_UNDEF;
5708 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5709 return MatchOperand_ParseFail;
5710
5711 if (trySkipToken(AsmToken::Comma)) {
5712 StringRef Str;
5713 SMLoc Loc = getLoc();
5714 if (!parseId(Str, "expected a format string") ||
5715 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5716 return MatchOperand_ParseFail;
5717 }
5718 if (Dfmt == DFMT_UNDEF) {
5719 Error(Loc, "duplicate numeric format");
5720 return MatchOperand_ParseFail;
5721 } else if (Nfmt == NFMT_UNDEF) {
5722 Error(Loc, "duplicate data format");
5723 return MatchOperand_ParseFail;
5724 }
5725 }
5726
5727 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5728 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5729
5730 if (isGFX10Plus()) {
5731 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5732 if (Ufmt == UFMT_UNDEF) {
5733 Error(FormatLoc, "unsupported format");
5734 return MatchOperand_ParseFail;
5735 }
5736 Format = Ufmt;
5737 } else {
5738 Format = encodeDfmtNfmt(Dfmt, Nfmt);
5739 }
5740
5741 return MatchOperand_Success;
5742}
5743
5744OperandMatchResultTy
5745AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5746 SMLoc Loc,
5747 int64_t &Format) {
5748 using namespace llvm::AMDGPU::MTBUFFormat;
5749
5750 auto Id = getUnifiedFormat(FormatStr);
5751 if (Id == UFMT_UNDEF)
5752 return MatchOperand_NoMatch;
5753
5754 if (!isGFX10Plus()) {
5755 Error(Loc, "unified format is not supported on this GPU");
5756 return MatchOperand_ParseFail;
5757 }
5758
5759 Format = Id;
5760 return MatchOperand_Success;
5761}
5762
5763OperandMatchResultTy
5764AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5765 using namespace llvm::AMDGPU::MTBUFFormat;
5766 SMLoc Loc = getLoc();
5767
5768 if (!parseExpr(Format))
5769 return MatchOperand_ParseFail;
5770 if (!isValidFormatEncoding(Format, getSTI())) {
5771 Error(Loc, "out of range format");
5772 return MatchOperand_ParseFail;
5773 }
5774
5775 return MatchOperand_Success;
5776}
5777
5778OperandMatchResultTy
5779AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5780 using namespace llvm::AMDGPU::MTBUFFormat;
5781
5782 if (!trySkipId("format", AsmToken::Colon))
5783 return MatchOperand_NoMatch;
5784
5785 if (trySkipToken(AsmToken::LBrac)) {
5786 StringRef FormatStr;
5787 SMLoc Loc = getLoc();
5788 if (!parseId(FormatStr, "expected a format string"))
5789 return MatchOperand_ParseFail;
5790
5791 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5792 if (Res == MatchOperand_NoMatch)
5793 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5794 if (Res != MatchOperand_Success)
5795 return Res;
5796
5797 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5798 return MatchOperand_ParseFail;
5799
5800 return MatchOperand_Success;
5801 }
5802
5803 return parseNumericFormat(Format);
5804}
5805
5806OperandMatchResultTy
5807AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5808 using namespace llvm::AMDGPU::MTBUFFormat;
5809
5810 int64_t Format = getDefaultFormatEncoding(getSTI());
5811 OperandMatchResultTy Res;
5812 SMLoc Loc = getLoc();
5813
5814 // Parse legacy format syntax.
5815 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5816 if (Res == MatchOperand_ParseFail)
5817 return Res;
5818
5819 bool FormatFound = (Res == MatchOperand_Success);
5820
5821 Operands.push_back(
5822 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5823
5824 if (FormatFound)
5825 trySkipToken(AsmToken::Comma);
5826
5827 if (isToken(AsmToken::EndOfStatement)) {
5828 // We are expecting an soffset operand,
5829 // but let matcher handle the error.
5830 return MatchOperand_Success;
5831 }
5832
5833 // Parse soffset.
5834 Res = parseRegOrImm(Operands);
5835 if (Res != MatchOperand_Success)
5836 return Res;
5837
5838 trySkipToken(AsmToken::Comma);
5839
5840 if (!FormatFound) {
5841 Res = parseSymbolicOrNumericFormat(Format);
5842 if (Res == MatchOperand_ParseFail)
5843 return Res;
5844 if (Res == MatchOperand_Success) {
5845 auto Size = Operands.size();
5846 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5847 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT)(static_cast<void> (0));
5848 Op.setImm(Format);
5849 }
5850 return MatchOperand_Success;
5851 }
5852
5853 if (isId("format") && peekToken().is(AsmToken::Colon)) {
5854 Error(getLoc(), "duplicate format");
5855 return MatchOperand_ParseFail;
5856 }
5857 return MatchOperand_Success;
5858}
5859
5860//===----------------------------------------------------------------------===//
5861// ds
5862//===----------------------------------------------------------------------===//
5863
5864void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5865 const OperandVector &Operands) {
5866 OptionalImmIndexMap OptionalIdx;
5867
5868 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5869 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5870
5871 // Add the register arguments
5872 if (Op.isReg()) {
5873 Op.addRegOperands(Inst, 1);
5874 continue;
5875 }
5876
5877 // Handle optional arguments
5878 OptionalIdx[Op.getImmTy()] = i;
5879 }
5880
5881 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5882 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5883 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5884
5885 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5886}
5887
5888void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5889 bool IsGdsHardcoded) {
5890 OptionalImmIndexMap OptionalIdx;
5891
5892 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5893 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5894
5895 // Add the register arguments
5896 if (Op.isReg()) {
5897 Op.addRegOperands(Inst, 1);
5898 continue;
5899 }
5900
5901 if (Op.isToken() && Op.getToken() == "gds") {
5902 IsGdsHardcoded = true;
5903 continue;
5904 }
5905
5906 // Handle optional arguments
5907 OptionalIdx[Op.getImmTy()] = i;
5908 }
5909
5910 AMDGPUOperand::ImmTy OffsetType =
5911 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5912 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5913 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5914 AMDGPUOperand::ImmTyOffset;
5915
5916 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5917
5918 if (!IsGdsHardcoded) {
5919 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5920 }
5921 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5922}
5923
5924void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5925 OptionalImmIndexMap OptionalIdx;
5926
5927 unsigned OperandIdx[4];
5928 unsigned EnMask = 0;
5929 int SrcIdx = 0;
5930
5931 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
1
Assuming 'i' is not equal to 'e'
2
Loop condition is true. Entering loop body
6
Assuming 'i' is not equal to 'e'
7
Loop condition is true. Entering loop body
11
Assuming 'i' is equal to 'e'
12
Loop condition is false. Execution continues on line 5963
5932 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5933
5934 // Add the register arguments
5935 if (Op.isReg()) {
3
Assuming the condition is true
4
Taking true branch
8
Assuming the condition is true
9
Taking true branch
5936 assert(SrcIdx < 4)(static_cast<void> (0));
5937 OperandIdx[SrcIdx] = Inst.size();
5938 Op.addRegOperands(Inst, 1);
5939 ++SrcIdx;
5940 continue;
5
Execution continues on line 5931
10
Execution continues on line 5931
5941 }
5942
5943 if (Op.isOff()) {
5944 assert(SrcIdx < 4)(static_cast<void> (0));
5945 OperandIdx[SrcIdx] = Inst.size();
5946 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5947 ++SrcIdx;
5948 continue;
5949 }
5950
5951 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5952 Op.addImmOperands(Inst, 1);
5953 continue;
5954 }
5955
5956 if (Op.isToken() && Op.getToken() == "done")
5957 continue;
5958
5959 // Handle optional arguments
5960 OptionalIdx[Op.getImmTy()] = i;
5961 }
5962
5963 assert(SrcIdx == 4)(static_cast<void> (0));
5964
5965 bool Compr = false;
5966 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
13
Calling 'operator!='
16
Returning from 'operator!='
17
Taking true branch
5967 Compr = true;
5968 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
18
1st function call argument is an uninitialized value
5969 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5970 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5971 }
5972
5973 for (auto i = 0; i < SrcIdx; ++i) {
5974 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5975 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5976 }
5977 }
5978
5979 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5980 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5981
5982 Inst.addOperand(MCOperand::createImm(EnMask));
5983}
5984
5985//===----------------------------------------------------------------------===//
5986// s_waitcnt
5987//===----------------------------------------------------------------------===//
5988
5989static bool
5990encodeCnt(
5991 const AMDGPU::IsaVersion ISA,
5992 int64_t &IntVal,
5993 int64_t CntVal,
5994 bool Saturate,
5995 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5996 unsigned (*decode)(const IsaVersion &Version, unsigned))
5997{
5998 bool Failed = false;
5999
6000 IntVal = encode(ISA, IntVal, CntVal);
6001 if (CntVal != decode(ISA, IntVal)) {
6002 if (Saturate) {
6003 IntVal = encode(ISA, IntVal, -1);
6004 } else {
6005 Failed = true;
6006 }
6007 }
6008 return Failed;
6009}
6010
6011bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6012
6013 SMLoc CntLoc = getLoc();
6014 StringRef CntName = getTokenStr();
6015
6016 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6017 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6018 return false;
6019
6020 int64_t CntVal;
6021 SMLoc ValLoc = getLoc();
6022 if (!parseExpr(CntVal))
6023 return false;
6024
6025 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6026
6027 bool Failed = true;
6028 bool Sat = CntName.endswith("_sat");
6029
6030 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6031 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6032 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6033 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6034 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6035 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6036 } else {
6037 Error(CntLoc, "invalid counter name " + CntName);
6038 return false;
6039 }
6040
6041 if (Failed) {
6042 Error(ValLoc, "too large value for " + CntName);
6043 return false;
6044 }
6045
6046 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6047 return false;
6048
6049 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6050 if (isToken(AsmToken::EndOfStatement)) {
6051 Error(getLoc(), "expected a counter name");
6052 return false;
6053 }
6054 }
6055
6056 return true;
6057}
6058
6059OperandMatchResultTy
6060AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6061 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6062 int64_t Waitcnt = getWaitcntBitMask(ISA);
6063 SMLoc S = getLoc();
6064
6065 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6066 while (!isToken(AsmToken::EndOfStatement)) {
6067 if (!parseCnt(Waitcnt))
6068 return MatchOperand_ParseFail;
6069 }
6070 } else {
6071 if (!parseExpr(Waitcnt))
6072 return MatchOperand_ParseFail;
6073 }
6074
6075 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6076 return MatchOperand_Success;
6077}
6078
6079bool
6080AMDGPUOperand::isSWaitCnt() const {
6081 return isImm();
6082}
6083
6084//===----------------------------------------------------------------------===//
6085// hwreg
6086//===----------------------------------------------------------------------===//
6087
6088bool
6089AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6090 OperandInfoTy &Offset,
6091 OperandInfoTy &Width) {
6092 using namespace llvm::AMDGPU::Hwreg;
6093
6094 // The register may be specified by name or using a numeric code
6095 HwReg.Loc = getLoc();
6096 if (isToken(AsmToken::Identifier) &&
6097 (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6098 HwReg.IsSymbolic = true;
6099 lex(); // skip register name
6100 } else if (!parseExpr(HwReg.Id, "a register name")) {
6101 return false;
6102 }
6103
6104 if (trySkipToken(AsmToken::RParen))
6105 return true;
6106
6107 // parse optional params
6108 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6109 return false;
6110
6111 Offset.Loc = getLoc();
6112 if (!parseExpr(Offset.Id))
6113 return false;
6114
6115 if (!skipToken(AsmToken::Comma, "expected a comma"))
6116 return false;
6117
6118 Width.Loc = getLoc();
6119 return parseExpr(Width.Id) &&
6120 skipToken(AsmToken::RParen, "expected a closing parenthesis");
6121}
6122
6123bool
6124AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6125 const OperandInfoTy &Offset,
6126 const OperandInfoTy &Width) {
6127
6128 using namespace llvm::AMDGPU::Hwreg;
6129
6130 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6131 Error(HwReg.Loc,
6132 "specified hardware register is not supported on this GPU");
6133 return false;
6134 }
6135 if (!isValidHwreg(HwReg.Id)) {
6136 Error(HwReg.Loc,
6137 "invalid code of hardware register: only 6-bit values are legal");
6138 return false;
6139 }
6140 if (!isValidHwregOffset(Offset.Id)) {
6141 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6142 return false;
6143 }
6144 if (!isValidHwregWidth(Width.Id)) {
6145 Error(Width.Loc,
6146 "invalid bitfield width: only values from 1 to 32 are legal");
6147 return false;
6148 }
6149 return true;
6150}
6151
6152OperandMatchResultTy
6153AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6154 using namespace llvm::AMDGPU::Hwreg;
6155
6156 int64_t ImmVal = 0;
6157 SMLoc Loc = getLoc();
6158
6159 if (trySkipId("hwreg", AsmToken::LParen)) {
6160 OperandInfoTy HwReg(ID_UNKNOWN_);
6161 OperandInfoTy Offset(OFFSET_DEFAULT_);
6162 OperandInfoTy Width(WIDTH_DEFAULT_);
6163 if (parseHwregBody(HwReg, Offset, Width) &&
6164 validateHwreg(HwReg, Offset, Width)) {
6165 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6166 } else {
6167 return MatchOperand_ParseFail;
6168 }
6169 } else if (parseExpr(ImmVal, "a hwreg macro")) {
6170 if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6171 Error(Loc, "invalid immediate: only 16-bit values are legal");
6172 return MatchOperand_ParseFail;
6173 }
6174 } else {
6175 return MatchOperand_ParseFail;
6176 }
6177
6178 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6179 return MatchOperand_Success;
6180}
6181
6182bool AMDGPUOperand::isHwreg() const {
6183 return isImmTy(ImmTyHwreg);
6184}
6185
6186//===----------------------------------------------------------------------===//
6187// sendmsg
6188//===----------------------------------------------------------------------===//
6189
6190bool
6191AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6192 OperandInfoTy &Op,
6193 OperandInfoTy &Stream) {
6194 using namespace llvm::AMDGPU::SendMsg;
6195
6196 Msg.Loc = getLoc();
6197 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6198 Msg.IsSymbolic = true;
6199 lex(); // skip message name
6200 } else if (!parseExpr(Msg.Id, "a message name")) {
6201 return false;
6202 }
6203
6204 if (trySkipToken(AsmToken::Comma)) {
6205 Op.IsDefined = true;
6206 Op.Loc = getLoc();
6207 if (isToken(AsmToken::Identifier) &&
6208 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6209 lex(); // skip operation name
6210 } else if (!parseExpr(Op.Id, "an operation name")) {
6211 return false;
6212 }
6213
6214 if (trySkipToken(AsmToken::Comma)) {
6215 Stream.IsDefined = true;
6216 Stream.Loc = getLoc();
6217 if (!parseExpr(Stream.Id))
6218 return false;
6219 }
6220 }
6221
6222 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6223}
6224
6225bool
6226AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6227 const OperandInfoTy &Op,
6228 const OperandInfoTy &Stream) {
6229 using namespace llvm::AMDGPU::SendMsg;
6230
6231 // Validation strictness depends on whether message is specified
6232 // in a symbolc or in a numeric form. In the latter case
6233 // only encoding possibility is checked.
6234 bool Strict = Msg.IsSymbolic;
6235
6236 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6237 Error(Msg.Loc, "invalid message id");
6238 return false;
6239 }
6240 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6241 if (Op.IsDefined) {
6242 Error(Op.Loc, "message does not support operations");
6243 } else {
6244 Error(Msg.Loc, "missing message operation");
6245 }
6246 return false;
6247 }
6248 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6249 Error(Op.Loc, "invalid operation id");
6250 return false;
6251 }
6252 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6253 Error(Stream.Loc, "message operation does not support streams");
6254 return false;
6255 }
6256 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6257 Error(Stream.Loc, "invalid message stream id");
6258 return false;
6259 }
6260 return true;
6261}
6262
6263OperandMatchResultTy
6264AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6265 using namespace llvm::AMDGPU::SendMsg;
6266
6267 int64_t ImmVal = 0;
6268 SMLoc Loc = getLoc();
6269
6270 if (trySkipId("sendmsg", AsmToken::LParen)) {
6271 OperandInfoTy Msg(ID_UNKNOWN_);
6272 OperandInfoTy Op(OP_NONE_);
6273 OperandInfoTy Stream(STREAM_ID_NONE_);
6274 if (parseSendMsgBody(Msg, Op, Stream) &&
6275 validateSendMsg(Msg, Op, Stream)) {
6276 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6277 } else {
6278 return MatchOperand_ParseFail;
6279 }
6280 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6281 if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6282 Error(Loc, "invalid immediate: only 16-bit values are legal");
6283 return MatchOperand_ParseFail;
6284 }
6285 } else {
6286 return MatchOperand_ParseFail;
6287 }
6288
6289 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6290 return MatchOperand_Success;
6291}
6292
6293bool AMDGPUOperand::isSendMsg() const {
6294 return isImmTy(ImmTySendMsg);
6295}
6296
6297//===----------------------------------------------------------------------===//
6298// v_interp
6299//===----------------------------------------------------------------------===//
6300
6301OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6302 StringRef Str;
6303 SMLoc S = getLoc();
6304
6305 if (!parseId(Str))
6306 return MatchOperand_NoMatch;
6307
6308 int Slot = StringSwitch<int>(Str)
6309 .Case("p10", 0)
6310 .Case("p20", 1)
6311 .Case("p0", 2)
6312 .Default(-1);
6313
6314 if (Slot == -1) {
6315 Error(S, "invalid interpolation slot");
6316 return MatchOperand_ParseFail;
6317 }
6318
6319 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6320 AMDGPUOperand::ImmTyInterpSlot));
6321 return MatchOperand_Success;
6322}
6323
6324OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6325 StringRef Str;
6326 SMLoc S = getLoc();
6327
6328 if (!parseId(Str))
6329 return MatchOperand_NoMatch;
6330
6331 if (!Str.startswith("attr")) {
6332 Error(S, "invalid interpolation attribute");
6333 return MatchOperand_ParseFail;
6334 }
6335
6336 StringRef Chan = Str.take_back(2);
6337 int AttrChan = StringSwitch<int>(Chan)
6338 .Case(".x", 0)
6339 .Case(".y", 1)
6340 .Case(".z", 2)
6341 .Case(".w", 3)
6342 .Default(-1);
6343 if (AttrChan == -1) {
6344 Error(S, "invalid or missing interpolation attribute channel");
6345 return MatchOperand_ParseFail;
6346 }
6347
6348 Str = Str.drop_back(2).drop_front(4);
6349
6350 uint8_t Attr;
6351 if (Str.getAsInteger(10, Attr)) {
6352 Error(S, "invalid or missing interpolation attribute number");
6353 return MatchOperand_ParseFail;
6354 }
6355
6356 if (Attr > 63) {
6357 Error(S, "out of bounds interpolation attribute number");
6358 return MatchOperand_ParseFail;
6359 }
6360
6361 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6362
6363 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6364 AMDGPUOperand::ImmTyInterpAttr));
6365 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6366 AMDGPUOperand::ImmTyAttrChan));
6367 return MatchOperand_Success;
6368}
6369
6370//===----------------------------------------------------------------------===//
6371// exp
6372//===----------------------------------------------------------------------===//
6373
6374OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6375 using namespace llvm::AMDGPU::Exp;
6376
6377 StringRef Str;
6378 SMLoc S = getLoc();
6379
6380 if (!parseId(Str))
6381 return MatchOperand_NoMatch;
6382
6383 unsigned Id = getTgtId(Str);
6384 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6385 Error(S, (Id == ET_INVALID) ?
6386 "invalid exp target" :
6387 "exp target is not supported on this GPU");
6388 return MatchOperand_ParseFail;
6389 }
6390
6391 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6392 AMDGPUOperand::ImmTyExpTgt));
6393 return MatchOperand_Success;
6394}
6395
6396//===----------------------------------------------------------------------===//
6397// parser helpers
6398//===----------------------------------------------------------------------===//
6399
6400bool
6401AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6402 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6403}
6404
6405bool
6406AMDGPUAsmParser::isId(const StringRef Id) const {
6407 return isId(getToken(), Id);
6408}
6409
6410bool
6411AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6412 return getTokenKind() == Kind;
6413}
6414
6415bool
6416AMDGPUAsmParser::trySkipId(const StringRef Id) {
6417 if (isId(Id)) {
6418 lex();
6419 return true;
6420 }
6421 return false;
6422}
6423
6424bool
6425AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6426 if (isToken(AsmToken::Identifier)) {
6427 StringRef Tok = getTokenStr();
6428 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6429 lex();
6430 return true;
6431 }
6432 }
6433 return false;
6434}
6435
6436bool
6437AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6438 if (isId(Id) && peekToken().is(Kind)) {
6439 lex();
6440 lex();
6441 return true;
6442 }
6443 return false;
6444}
6445
6446bool
6447AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6448 if (isToken(Kind)) {
6449 lex();
6450 return true;
6451 }
6452 return false;
6453}
6454
6455bool
6456AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6457 const StringRef ErrMsg) {
6458 if (!trySkipToken(Kind)) {
6459 Error(getLoc(), ErrMsg);
6460 return false;
6461 }
6462 return true;
6463}
6464
6465bool
6466AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6467 SMLoc S = getLoc();
6468
6469 const MCExpr *Expr;
6470 if (Parser.parseExpression(Expr))
6471 return false;
6472
6473 if (Expr->evaluateAsAbsolute(Imm))
6474 return true;
6475
6476 if (Expected.empty()) {
6477 Error(S, "expected absolute expression");
6478 } else {
6479 Error(S, Twine("expected ", Expected) +
6480 Twine(" or an absolute expression"));
6481 }
6482 return false;
6483}
6484
6485bool
6486AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6487 SMLoc S = getLoc();
6488
6489 const MCExpr *Expr;
6490 if (Parser.parseExpression(Expr))
6491 return false;
6492
6493 int64_t IntVal;
6494 if (Expr->evaluateAsAbsolute(IntVal)) {
6495 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6496 } else {
6497 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6498 }
6499 return true;
6500}
6501
6502bool
6503AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6504 if (isToken(AsmToken::String)) {
6505 Val = getToken().getStringContents();
6506 lex();
6507 return true;
6508 } else {
6509 Error(getLoc(), ErrMsg);
6510 return false;
6511 }
6512}
6513
6514bool
6515AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6516 if (isToken(AsmToken::Identifier)) {
6517 Val = getTokenStr();
6518 lex();
6519 return true;
6520 } else {
6521 if (!ErrMsg.empty())
6522 Error(getLoc(), ErrMsg);
6523 return false;
6524 }
6525}
6526
6527AsmToken
6528AMDGPUAsmParser::getToken() const {
6529 return Parser.getTok();
6530}
6531
6532AsmToken
6533AMDGPUAsmParser::peekToken() {
6534 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6535}
6536
6537void
6538AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6539 auto TokCount = getLexer().peekTokens(Tokens);
6540
6541 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6542 Tokens[Idx] = AsmToken(AsmToken::Error, "");
6543}
6544
6545AsmToken::TokenKind
6546AMDGPUAsmParser::getTokenKind() const {
6547 return getLexer().getKind();
6548}
6549
6550SMLoc
6551AMDGPUAsmParser::getLoc() const {
6552 return getToken().getLoc();
6553}
6554
6555StringRef
6556AMDGPUAsmParser::getTokenStr() const {
6557 return getToken().getString();
6558}
6559
6560void
6561AMDGPUAsmParser::lex() {
6562 Parser.Lex();
6563}
6564
6565SMLoc
6566AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6567 const OperandVector &Operands) const {
6568 for (unsigned i = Operands.size() - 1; i > 0; --i) {
6569 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6570 if (Test(Op))
6571 return Op.getStartLoc();
6572 }
6573 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6574}
6575
6576SMLoc
6577AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6578 const OperandVector &Operands) const {
6579 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6580 return getOperandLoc(Test, Operands);
6581}
6582
6583SMLoc
6584AMDGPUAsmParser::getRegLoc(unsigned Reg,
6585 const OperandVector &Operands) const {
6586 auto Test = [=](const AMDGPUOperand& Op) {
6587 return Op.isRegKind() && Op.getReg() == Reg;
6588 };
6589 return getOperandLoc(Test, Operands);
6590}
6591
6592SMLoc
6593AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6594 auto Test = [](const AMDGPUOperand& Op) {
6595 return Op.IsImmKindLiteral() || Op.isExpr();
6596 };
6597 return getOperandLoc(Test, Operands);
6598}
6599
6600SMLoc
6601AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6602 auto Test = [](const AMDGPUOperand& Op) {
6603 return Op.isImmKindConst();
6604 };
6605 return getOperandLoc(Test, Operands);
6606}
6607
6608//===----------------------------------------------------------------------===//
6609// swizzle
6610//===----------------------------------------------------------------------===//
6611
6612LLVM_READNONE__attribute__((__const__))
6613static unsigned
6614encodeBitmaskPerm(const unsigned AndMask,
6615 const unsigned OrMask,
6616 const unsigned XorMask) {
6617 using namespace llvm::AMDGPU::Swizzle;
6618
6619 return BITMASK_PERM_ENC |
6620 (AndMask << BITMASK_AND_SHIFT) |
6621 (OrMask << BITMASK_OR_SHIFT) |
6622 (XorMask << BITMASK_XOR_SHIFT);
6623}
6624
6625bool
6626AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6627 const unsigned MinVal,
6628 const unsigned MaxVal,
6629 const StringRef ErrMsg,
6630 SMLoc &Loc) {
6631 if (!skipToken(AsmToken::Comma, "expected a comma")) {
6632 return false;
6633 }
6634 Loc = getLoc();
6635 if (!parseExpr(Op)) {
6636 return false;
6637 }
6638 if (Op < MinVal || Op > MaxVal) {
6639 Error(Loc, ErrMsg);
6640 return false;
6641 }
6642
6643 return true;
6644}
6645
6646bool
6647AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6648 const unsigned MinVal,
6649 const unsigned MaxVal,
6650 const StringRef ErrMsg) {
6651 SMLoc Loc;
6652 for (unsigned i = 0; i < OpNum; ++i) {
6653 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6654 return false;
6655 }
6656
6657 return true;
6658}
6659
6660bool
6661AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6662 using namespace llvm::AMDGPU::Swizzle;
6663
6664 int64_t Lane[LANE_NUM];
6665 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6666 "expected a 2-bit lane id")) {
6667 Imm = QUAD_PERM_ENC;
6668 for (unsigned I = 0; I < LANE_NUM; ++I) {
6669 Imm |= Lane[I] << (LANE_SHIFT * I);
6670 }
6671 return true;
6672 }
6673 return false;
6674}
6675
6676bool
6677AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6678 using namespace llvm::AMDGPU::Swizzle;
6679
6680 SMLoc Loc;
6681 int64_t GroupSize;
6682 int64_t LaneIdx;
6683
6684 if (!parseSwizzleOperand(GroupSize,
6685 2, 32,
6686 "group size must be in the interval [2,32]",
6687 Loc)) {
6688 return false;
6689 }
6690 if (!isPowerOf2_64(GroupSize)) {
6691 Error(Loc, "group size must be a power of two");
6692 return false;
6693 }
6694 if (parseSwizzleOperand(LaneIdx,
6695 0, GroupSize - 1,
6696 "lane id must be in the interval [0,group size - 1]",
6697 Loc)) {
6698 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6699 return true;
6700 }
6701 return false;
6702}
6703
6704bool
6705AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6706 using namespace llvm::AMDGPU::Swizzle;
6707
6708 SMLoc Loc;
6709 int64_t GroupSize;
6710
6711 if (!parseSwizzleOperand(GroupSize,
6712 2, 32,
6713 "group size must be in the interval [2,32]",
6714 Loc)) {
6715 return false;
6716 }
6717 if (!isPowerOf2_64(GroupSize)) {
6718 Error(Loc, "group size must be a power of two");
6719 return false;
6720 }
6721
6722 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6723 return true;
6724}
6725
6726bool
6727AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6728 using namespace llvm::AMDGPU::Swizzle;
6729
6730 SMLoc Loc;
6731 int64_t GroupSize;
6732
6733 if (!parseSwizzleOperand(GroupSize,
6734 1, 16,
6735 "group size must be in the interval [1,16]",
6736 Loc)) {
6737 return false;
6738 }
6739 if (!isPowerOf2_64(GroupSize)) {
6740 Error(Loc, "group size must be a power of two");
6741 return false;
6742 }
6743
6744 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6745 return true;
6746}
6747
6748bool
6749AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6750 using namespace llvm::AMDGPU::Swizzle;
6751
6752 if (!skipToken(AsmToken::Comma, "expected a comma")) {
6753 return false;
6754 }
6755
6756 StringRef Ctl;
6757 SMLoc StrLoc = getLoc();
6758 if (!parseString(Ctl)) {
6759 return false;
6760 }
6761 if (Ctl.size() != BITMASK_WIDTH) {
6762 Error(StrLoc, "expected a 5-character mask");
6763 return false;
6764 }
6765
6766 unsigned AndMask = 0;
6767 unsigned OrMask = 0;
6768 unsigned XorMask = 0;
6769
6770 for (size_t i = 0; i < Ctl.size(); ++i) {
6771 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6772 switch(Ctl[i]) {
6773 default:
6774 Error(StrLoc, "invalid mask");
6775 return false;
6776 case '0':
6777 break;
6778 case '1':
6779 OrMask |= Mask;
6780 break;
6781 case 'p':
6782 AndMask |= Mask;
6783 break;
6784 case 'i':
6785 AndMask |= Mask;
6786 XorMask |= Mask;
6787 break;
6788 }
6789 }
6790
6791 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6792 return true;
6793}
6794
6795bool
6796AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6797
6798 SMLoc OffsetLoc = getLoc();
6799
6800 if (!parseExpr(Imm, "a swizzle macro")) {
6801 return false;
6802 }
6803 if (!isUInt<16>(Imm)) {
6804 Error(OffsetLoc, "expected a 16-bit offset");
6805 return false;
6806 }
6807 return true;
6808}
6809
6810bool
6811AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6812 using namespace llvm::AMDGPU::Swizzle;
6813
6814 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6815
6816 SMLoc ModeLoc = getLoc();
6817 bool Ok = false;
6818
6819 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6820 Ok = parseSwizzleQuadPerm(Imm);
6821 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6822 Ok = parseSwizzleBitmaskPerm(Imm);
6823 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6824 Ok = parseSwizzleBroadcast(Imm);
6825 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6826 Ok = parseSwizzleSwap(Imm);
6827 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6828 Ok = parseSwizzleReverse(Imm);
6829 } else {
6830 Error(ModeLoc, "expected a swizzle mode");
6831 }
6832
6833 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6834 }
6835
6836 return false;
6837}
6838
6839OperandMatchResultTy
6840AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6841 SMLoc S = getLoc();
6842 int64_t Imm = 0;
6843
6844 if (trySkipId("offset")) {
6845
6846 bool Ok = false;
6847 if (skipToken(AsmToken::Colon, "expected a colon")) {
6848 if (trySkipId("swizzle")) {
6849 Ok = parseSwizzleMacro(Imm);
6850 } else {
6851 Ok = parseSwizzleOffset(Imm);
6852 }
6853 }
6854
6855 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6856
6857 return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6858 } else {
6859 // Swizzle "offset" operand is optional.
6860 // If it is omitted, try parsing other optional operands.
6861 return parseOptionalOpr(Operands);
6862 }
6863}
6864
6865bool
6866AMDGPUOperand::isSwizzle() const {
6867 return isImmTy(ImmTySwizzle);
6868}
6869
6870//===----------------------------------------------------------------------===//
6871// VGPR Index Mode
6872//===----------------------------------------------------------------------===//
6873
6874int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6875
6876 using namespace llvm::AMDGPU::VGPRIndexMode;
6877
6878 if (trySkipToken(AsmToken::RParen)) {
6879 return OFF;
6880 }
6881
6882 int64_t Imm = 0;
6883
6884 while (true) {
6885 unsigned Mode = 0;
6886 SMLoc S = getLoc();
6887
6888 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6889 if (trySkipId(IdSymbolic[ModeId])) {
6890 Mode = 1 << ModeId;
6891 break;
6892 }
6893 }
6894
6895 if (Mode == 0) {
6896 Error(S, (Imm == 0)?
6897 "expected a VGPR index mode or a closing parenthesis" :
6898 "expected a VGPR index mode");
6899 return UNDEF;
6900 }
6901
6902 if (Imm & Mode) {
6903 Error(S, "duplicate VGPR index mode");
6904 return UNDEF;
6905 }
6906 Imm |= Mode;
6907
6908 if (trySkipToken(AsmToken::RParen))
6909 break;
6910 if (!skipToken(AsmToken::Comma,
6911 "expected a comma or a closing parenthesis"))
6912 return UNDEF;
6913 }
6914
6915 return Imm;
6916}
6917
6918OperandMatchResultTy
6919AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6920
6921 using namespace llvm::AMDGPU::VGPRIndexMode;
6922
6923 int64_t Imm = 0;
6924 SMLoc S = getLoc();
6925
6926 if (trySkipId("gpr_idx", AsmToken::LParen)) {
6927 Imm = parseGPRIdxMacro();
6928 if (Imm == UNDEF)
6929 return MatchOperand_ParseFail;
6930 } else {
6931 if (getParser().parseAbsoluteExpression(Imm))
6932 return MatchOperand_ParseFail;
6933 if (Imm < 0 || !isUInt<4>(Imm)) {
6934 Error(S, "invalid immediate: only 4-bit values are legal");
6935 return MatchOperand_ParseFail;
6936 }
6937 }
6938
6939 Operands.push_back(
6940 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6941 return MatchOperand_Success;
6942}
6943
6944bool AMDGPUOperand::isGPRIdxMode() const {
6945 return isImmTy(ImmTyGprIdxMode);
6946}
6947
6948//===----------------------------------------------------------------------===//
6949// sopp branch targets
6950//===----------------------------------------------------------------------===//
6951
6952OperandMatchResultTy
6953AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6954
6955 // Make sure we are not parsing something
6956 // that looks like a label or an expression but is not.
6957 // This will improve error messages.
6958 if (isRegister() || isModifier())
6959 return MatchOperand_NoMatch;
6960
6961 if (!parseExpr(Operands))
6962 return MatchOperand_ParseFail;
6963
6964 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6965 assert(Opr.isImm() || Opr.isExpr())(static_cast<void> (0));
6966 SMLoc Loc = Opr.getStartLoc();
6967
6968 // Currently we do not support arbitrary expressions as branch targets.
6969 // Only labels and absolute expressions are accepted.
6970 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6971 Error(Loc, "expected an absolute expression or a label");
6972 } else if (Opr.isImm() && !Opr.isS16Imm()) {
6973 Error(Loc, "expected a 16-bit signed jump offset");
6974 }
6975
6976 return MatchOperand_Success;
6977}
6978
6979//===----------------------------------------------------------------------===//
6980// Boolean holding registers
6981//===----------------------------------------------------------------------===//
6982
6983OperandMatchResultTy
6984AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6985 return parseReg(Operands);
6986}
6987
6988//===----------------------------------------------------------------------===//
6989// mubuf
6990//===----------------------------------------------------------------------===//
6991
6992AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
6993 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
6994}
6995
6996void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6997 const OperandVector &Operands,
6998 bool IsAtomic,
6999 bool IsLds) {
7000 bool IsLdsOpcode = IsLds;
7001 bool HasLdsModifier = false;
7002 OptionalImmIndexMap OptionalIdx;
7003 unsigned FirstOperandIdx = 1;
7004 bool IsAtomicReturn = false;
7005
7006 if (IsAtomic) {
7007 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7008 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7009 if (!Op.isCPol())
7010 continue;
7011 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7012 break;
7013 }
7014
7015 if (!IsAtomicReturn) {
7016 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7017 if (NewOpc != -1)
7018 Inst.setOpcode(NewOpc);
7019 }
7020
7021 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
7022 SIInstrFlags::IsAtomicRet;
7023 }
7024
7025 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7026 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7027
7028 // Add the register arguments
7029 if (Op.isReg()) {
7030 Op.addRegOperands(Inst, 1);
7031 // Insert a tied src for atomic return dst.
7032 // This cannot be postponed as subsequent calls to
7033 // addImmOperands rely on correct number of MC operands.
7034 if (IsAtomicReturn && i == FirstOperandIdx)
7035 Op.addRegOperands(Inst, 1);
7036 continue;
7037 }
7038
7039 // Handle the case where soffset is an immediate
7040 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7041 Op.addImmOperands(Inst, 1);
7042 continue;
7043 }
7044
7045 HasLdsModifier |= Op.isLDS();
7046
7047 // Handle tokens like 'offen' which are sometimes hard-coded into the
7048 // asm string. There are no MCInst operands for these.
7049 if (Op.isToken()) {
7050 continue;
7051 }
7052 assert(Op.isImm())(static_cast<void> (0));
7053
7054 // Handle optional arguments
7055 OptionalIdx[Op.getImmTy()] = i;
7056 }
7057
7058 // This is a workaround for an llvm quirk which may result in an
7059 // incorrect instruction selection. Lds and non-lds versions of
7060 // MUBUF instructions are identical except that lds versions
7061 // have mandatory 'lds' modifier. However this modifier follows
7062 // optional modifiers and llvm asm matcher regards this 'lds'
7063 // modifier as an optional one. As a result, an lds version
7064 // of opcode may be selected even if it has no 'lds' modifier.
7065 if (IsLdsOpcode && !HasLdsModifier) {
7066 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7067 if (NoLdsOpcode != -1) { // Got lds version - correct it.
7068 Inst.setOpcode(NoLdsOpcode);
7069 IsLdsOpcode = false;
7070 }
7071 }
7072
7073 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7074 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7075
7076 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7077 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7078 }
7079 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7080}
7081
7082void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7083 OptionalImmIndexMap OptionalIdx;
7084
7085 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7086 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7087
7088 // Add the register arguments
7089 if (Op.isReg()) {
7090 Op.addRegOperands(Inst, 1);
7091 continue;
7092 }
7093
7094 // Handle the case where soffset is an immediate
7095 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7096 Op.addImmOperands(Inst, 1);
7097 continue;
7098 }
7099
7100 // Handle tokens like 'offen' which are sometimes hard-coded into the
7101 // asm string. There are no MCInst operands for these.
7102 if (Op.isToken()) {
7103 continue;
7104 }
7105 assert(Op.isImm())(static_cast<void> (0));
7106
7107 // Handle optional arguments
7108 OptionalIdx[Op.getImmTy()] = i;
7109 }
7110
7111 addOptionalImmOperand(Inst, Operands, OptionalIdx,
7112 AMDGPUOperand::ImmTyOffset);
7113 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7114 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7115 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7116 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7117}
7118
7119//===----------------------------------------------------------------------===//
7120// mimg
7121//===----------------------------------------------------------------------===//
7122
7123void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7124 bool IsAtomic) {
7125 unsigned I = 1;
7126 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7127 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7128 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7129 }
7130
7131 if (IsAtomic) {
7132 // Add src, same as dst
7133 assert(Desc.getNumDefs() == 1)(static_cast<void> (0));
7134 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7135 }
7136
7137 OptionalImmIndexMap OptionalIdx;
7138
7139 for (unsigned E = Operands.size(); I != E; ++I) {
7140 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7141
7142 // Add the register arguments
7143 if (Op.isReg()) {
7144 Op.addRegOperands(Inst, 1);
7145 } else if (Op.isImmModifier()) {
7146 OptionalIdx[Op.getImmTy()] = I;
7147 } else if (!Op.isToken()) {
7148 llvm_unreachable("unexpected operand type")__builtin_unreachable();
7149 }
7150 }
7151
7152 bool IsGFX10Plus = isGFX10Plus();
7153
7154 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7155 if (IsGFX10Plus)
7156 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7157 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7158 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7159 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7160 if (IsGFX10Plus)
7161 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7162 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7163 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7164 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7165 if (!IsGFX10Plus)
7166 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7167 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7168}
7169
7170void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7171 cvtMIMG(Inst, Operands, true);
7172}
7173
7174void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7175 OptionalImmIndexMap OptionalIdx;
7176 bool IsAtomicReturn = false;
7177
7178 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7179 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7180 if (!Op.isCPol())
7181 continue;
7182 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7183 break;
7184 }
7185
7186 if (!IsAtomicReturn) {
7187 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7188 if (NewOpc != -1)
7189 Inst.setOpcode(NewOpc);
7190 }
7191
7192 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
7193 SIInstrFlags::IsAtomicRet;
7194
7195 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7196 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7197
7198 // Add the register arguments
7199 if (Op.isReg()) {
7200 Op.addRegOperands(Inst, 1);
7201 if (IsAtomicReturn && i == 1)
7202 Op.addRegOperands(Inst, 1);
7203 continue;
7204 }
7205
7206 // Handle the case where soffset is an immediate
7207 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7208 Op.addImmOperands(Inst, 1);
7209 continue;
7210 }
7211
7212 // Handle tokens like 'offen' which are sometimes hard-coded into the
7213 // asm string. There are no MCInst operands for these.
7214 if (Op.isToken()) {
7215 continue;
7216 }
7217 assert(Op.isImm())(static_cast<void> (0));
7218
7219 // Handle optional arguments
7220 OptionalIdx[Op.getImmTy()] = i;
7221 }
7222
7223 if ((int)Inst.getNumOperands() <=
7224 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7225 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7226 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7227}
7228
7229void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7230 const OperandVector &Operands) {
7231 for (unsigned I = 1; I < Operands.size(); ++I) {
7232 auto &Operand = (AMDGPUOperand &)*Operands[I];
7233 if (Operand.isReg())
7234 Operand.addRegOperands(Inst, 1);
7235 }
7236
7237 Inst.addOperand(MCOperand::createImm(1)); // a16
7238}
7239
7240//===----------------------------------------------------------------------===//
7241// smrd
7242//===----------------------------------------------------------------------===//
7243
7244bool AMDGPUOperand::isSMRDOffset8() const {
7245 return isImm() && isUInt<8>(getImm());
7246}
7247
7248bool AMDGPUOperand::isSMEMOffset() const {
7249 return isImm(); // Offset range is checked later by validator.
7250}
7251
7252bool AMDGPUOperand::isSMRDLiteralOffset() const {
7253 // 32-bit literals are only supported on CI and we only want to use them
7254 // when the offset is > 8-bits.
7255 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7256}
7257
7258AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7259 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7260}
7261
7262AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7263 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7264}
7265
7266AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7267 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7268}
7269
7270AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7271 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7272}
7273
7274//===----------------------------------------------------------------------===//
7275// vop3
7276//===----------------------------------------------------------------------===//
7277
7278static bool ConvertOmodMul(int64_t &Mul) {
7279 if (Mul != 1 && Mul != 2 && Mul != 4)
7280 return false;
7281
7282 Mul >>= 1;
7283 return true;
7284}
7285
7286static bool ConvertOmodDiv(int64_t &Div) {
7287 if (Div == 1) {
7288 Div = 0;
7289 return true;
7290 }
7291
7292 if (Div == 2) {
7293 Div = 3;
7294 return true;
7295 }
7296
7297 return false;
7298}
7299
7300// Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7301// This is intentional and ensures compatibility with sp3.
7302// See bug 35397 for details.
7303static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7304 if (BoundCtrl == 0 || BoundCtrl == 1) {
7305 BoundCtrl = 1;
7306 return true;
7307 }
7308 return false;
7309}
7310
7311// Note: the order in this table matches the order of operands in AsmString.
7312static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7313 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr},
7314 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr},
7315 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr},
7316 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7317 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7318 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr},
7319 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
7320 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
7321 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7322 {"", AMDGPUOperand::ImmTyCPol, false, nullptr},
7323 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr},
7324 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
7325 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
7326 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr},
7327 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr},
7328 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7329 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
7330 {"da", AMDGPUOperand::ImmTyDA, true, nullptr},
7331 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
7332 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr},
7333 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
7334 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
7335 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
7336 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr},
7337 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7338 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7339 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7340 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr},
7341 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7342 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7343 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7344 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7345 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7346 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7347 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7348 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7349 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7350 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7351 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7352 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7353 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7354};
7355
7356void AMDGPUAsmParser::onBeginOfFile() {
7357 if (!getParser().getStreamer().getTargetStreamer() ||
7358 getSTI().getTargetTriple().getArch() == Triple::r600)
7359 return;
7360
7361 if (!getTargetStreamer().getTargetID())
7362 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7363
7364 if (isHsaAbiVersion3Or4(&getSTI()))
7365 getTargetStreamer().EmitDirectiveAMDGCNTarget();
7366}
7367
7368OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7369
7370 OperandMatchResultTy res = parseOptionalOpr(Operands);
7371
7372 // This is a hack to enable hardcoded mandatory operands which follow
7373 // optional operands.
7374 //
7375 // Current design assumes that all operands after the first optional operand
7376 // are also optional. However implementation of some instructions violates
7377 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7378 //
7379 // To alleviate this problem, we have to (implicitly) parse extra operands
7380 // to make sure autogenerated parser of custom operands never hit hardcoded
7381 // mandatory operands.
7382
7383 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7384 if (res != MatchOperand_Success ||
7385 isToken(AsmToken::EndOfStatement))
7386 break;
7387
7388 trySkipToken(AsmToken::Comma);
7389 res = parseOptionalOpr(Operands);
7390 }
7391
7392 return res;
7393}
7394
7395OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7396 OperandMatchResultTy res;
7397 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7398 // try to parse any optional operand here
7399 if (Op.IsBit) {
7400 res = parseNamedBit(Op.Name, Operands, Op.Type);
7401 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7402 res = parseOModOperand(Operands);
7403 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7404 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7405 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7406 res = parseSDWASel(Operands, Op.Name, Op.Type);
7407 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7408 res = parseSDWADstUnused(Operands);
7409 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7410 Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7411 Op.Type == AMDGPUOperand::ImmTyNegLo ||
7412 Op.Type == AMDGPUOperand::ImmTyNegHi) {
7413 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7414 Op.ConvertResult);
7415 } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7416 res = parseDim(Operands);
7417 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7418 res = parseCPol(Operands);
7419 } else {
7420 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7421 }
7422 if (res != MatchOperand_NoMatch) {
7423 return res;
7424 }
7425 }
7426 return MatchOperand_NoMatch;
7427}
7428
7429OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7430 StringRef Name = getTokenStr();
7431 if (Name == "mul") {
7432 return parseIntWithPrefix("mul", Operands,
7433 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7434 }
7435
7436 if (Name == "div") {
7437 return parseIntWithPrefix("div", Operands,
7438 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7439 }
7440
7441 return MatchOperand_NoMatch;
7442}
7443
7444void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7445 cvtVOP3P(Inst, Operands);
7446
7447 int Opc = Inst.getOpcode();
7448
7449 int SrcNum;
7450 const int Ops[] = { AMDGPU::OpName::src0,
7451 AMDGPU::OpName::src1,
7452 AMDGPU::OpName::src2 };
7453 for (SrcNum = 0;
7454 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7455 ++SrcNum);
7456 assert(SrcNum > 0)(static_cast<void> (0));
7457
7458 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7459 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7460
7461 if ((OpSel & (1 << SrcNum)) != 0) {
7462 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7463 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7464 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7465 }
7466}
7467
7468static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7469 // 1. This operand is input modifiers
7470 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7471 // 2. This is not last operand
7472 && Desc.NumOperands > (OpNum + 1)
7473 // 3. Next operand is register class
7474 && Desc.OpInfo[OpNum + 1].RegClass != -1
7475 // 4. Next register is not tied to any other operand
7476 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7477}
7478
7479void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7480{
7481 OptionalImmIndexMap OptionalIdx;
7482 unsigned Opc = Inst.getOpcode();
7483
7484 unsigned I = 1;
7485 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7486 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7487 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7488 }
7489
7490 for (unsigned E = Operands.size(); I != E; ++I) {
7491 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7492 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7493 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7494 } else if (Op.isInterpSlot() ||
7495 Op.isInterpAttr() ||
7496 Op.isAttrChan()) {
7497 Inst.addOperand(MCOperand::createImm(Op.getImm()));
7498 } else if (Op.isImmModifier()) {
7499 OptionalIdx[Op.getImmTy()] = I;
7500 } else {
7501 llvm_unreachable("unhandled operand type")__builtin_unreachable();
7502 }
7503 }
7504
7505 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7506 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7507 }
7508
7509 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7510 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7511 }
7512
7513 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7514 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7515 }
7516}
7517
7518void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7519 OptionalImmIndexMap &OptionalIdx) {
7520 unsigned Opc = Inst.getOpcode();
7521
7522 unsigned I = 1;
7523 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7524 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7525 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7526 }
7527
7528 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7529 // This instruction has src modifiers
7530 for (unsigned E = Operands.size(); I != E; ++I) {
7531 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7532 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7533 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7534 } else if (Op.isImmModifier()) {
7535 OptionalIdx[Op.getImmTy()] = I;
7536 } else if (Op.isRegOrImm()) {
7537 Op.addRegOrImmOperands(Inst, 1);
7538 } else {
7539 llvm_unreachable("unhandled operand type")__builtin_unreachable();
7540 }
7541 }
7542 } else {
7543 // No src modifiers
7544 for (unsigned E = Operands.size(); I != E; ++I) {
7545 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7546 if (Op.isMod()) {
7547 OptionalIdx[Op.getImmTy()] = I;
7548 } else {
7549 Op.addRegOrImmOperands(Inst, 1);
7550 }
7551 }
7552 }
7553
7554 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7555 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7556 }
7557
7558 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7559 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7560 }
7561
7562 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7563 // it has src2 register operand that is tied to dst operand
7564 // we don't allow modifiers for this operand in assembler so src2_modifiers
7565 // should be 0.
7566 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7567 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7568 Opc == AMDGPU::V_MAC_F32_e64_vi ||
7569 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7570 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7571 Opc == AMDGPU::V_MAC_F16_e64_vi ||
7572 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7573 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7574 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7575 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7576 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7577 auto it = Inst.begin();
7578 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7579 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7580 ++it;
7581 // Copy the operand to ensure it's not invalidated when Inst grows.
7582 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7583 }
7584}
7585
7586void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7587 OptionalImmIndexMap OptionalIdx;
7588 cvtVOP3(Inst, Operands, OptionalIdx);
7589}
7590
7591void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7592 OptionalImmIndexMap &OptIdx) {
7593 const int Opc = Inst.getOpcode();
7594 const MCInstrDesc &Desc = MII.get(Opc);
7595
7596 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7597
7598 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7599 assert(!IsPacked)(static_cast<void> (0));
7600 Inst.addOperand(Inst.getOperand(0));
7601 }
7602
7603 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7604 // instruction, and then figure out where to actually put the modifiers
7605
7606 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7607 if (OpSelIdx != -1) {
7608 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7609 }
7610
7611 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7612 if (OpSelHiIdx != -1) {
7613 int DefaultVal = IsPacked ? -1 : 0;
7614 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7615 DefaultVal);
7616 }
7617
7618 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7619 if (NegLoIdx != -1) {
7620 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7621 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7622 }
7623
7624 const int Ops[] = { AMDGPU::OpName::src0,
7625 AMDGPU::OpName::src1,
7626 AMDGPU::OpName::src2 };
7627 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7628 AMDGPU::OpName::src1_modifiers,
7629 AMDGPU::OpName::src2_modifiers };
7630
7631 unsigned OpSel = 0;
7632 unsigned OpSelHi = 0;
7633 unsigned NegLo = 0;
7634 unsigned NegHi = 0;
7635
7636 if (OpSelIdx != -1)
7637 OpSel = Inst.getOperand(OpSelIdx).getImm();
7638
7639 if (OpSelHiIdx != -1)
7640 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7641
7642 if (NegLoIdx != -1) {
7643 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7644 NegLo = Inst.getOperand(NegLoIdx).getImm();
7645 NegHi = Inst.getOperand(NegHiIdx).getImm();
7646 }
7647
7648 for (int J = 0; J < 3; ++J) {
7649 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7650 if (OpIdx == -1)
7651 break;
7652
7653 uint32_t ModVal = 0;
7654
7655 if ((OpSel & (1 << J)) != 0)
7656 ModVal |= SISrcMods::OP_SEL_0;
7657
7658 if ((OpSelHi & (1 << J)) != 0)
7659 ModVal |= SISrcMods::OP_SEL_1;
7660
7661 if ((NegLo & (1 << J)) != 0)
7662 ModVal |= SISrcMods::NEG;
7663
7664 if ((NegHi & (1 << J)) != 0)
7665 ModVal |= SISrcMods::NEG_HI;
7666
7667 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7668
7669 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7670 }
7671}
7672
7673void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7674 OptionalImmIndexMap OptIdx;
7675 cvtVOP3(Inst, Operands, OptIdx);
7676 cvtVOP3P(Inst, Operands, OptIdx);
7677}
7678
7679//===----------------------------------------------------------------------===//
7680// dpp
7681//===----------------------------------------------------------------------===//
7682
7683bool AMDGPUOperand::isDPP8() const {
7684 return isImmTy(ImmTyDPP8);
7685}
7686
7687bool AMDGPUOperand::isDPPCtrl() const {
7688 using namespace AMDGPU::DPP;
7689
7690 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7691 if (result) {
7692 int64_t Imm = getImm();
7693 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7694 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7695 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7696 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7697 (Imm == DppCtrl::WAVE_SHL1) ||
7698 (Imm == DppCtrl::WAVE_ROL1) ||
7699 (Imm == DppCtrl::WAVE_SHR1) ||
7700 (Imm == DppCtrl::WAVE_ROR1) ||
7701 (Imm == DppCtrl::ROW_MIRROR) ||
7702 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7703 (Imm == DppCtrl::BCAST15) ||
7704 (Imm == DppCtrl::BCAST31) ||
7705 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7706 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7707 }
7708 return false;
7709}
7710
7711//===----------------------------------------------------------------------===//
7712// mAI
7713//===----------------------------------------------------------------------===//
7714
7715bool AMDGPUOperand::isBLGP() const {
7716 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7717}
7718
7719bool AMDGPUOperand::isCBSZ() const {
7720 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7721}
7722
7723bool AMDGPUOperand::isABID() const {
7724 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7725}
7726
7727bool AMDGPUOperand::isS16Imm() const {
7728 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7729}
7730
7731bool AMDGPUOperand::isU16Imm() const {
7732 return isImm() && isUInt<16>(getImm());
7733}
7734
7735//===----------------------------------------------------------------------===//
7736// dim
7737//===----------------------------------------------------------------------===//
7738
7739bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7740 // We want to allow "dim:1D" etc.,
7741 // but the initial 1 is tokenized as an integer.
7742 std::string Token;
7743 if (isToken(AsmToken::Integer)) {
7744 SMLoc Loc = getToken().getEndLoc();
7745 Token = std::string(getTokenStr());
7746 lex();
7747 if (getLoc() != Loc)
7748 return false;
7749 }
7750
7751 StringRef Suffix;
7752 if (!parseId(Suffix))
7753 return false;
7754 Token += Suffix;
7755
7756 StringRef DimId = Token;
7757 if (DimId.startswith("SQ_RSRC_IMG_"))
7758 DimId = DimId.drop_front(12);
7759
7760 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7761 if (!DimInfo)
7762 return false;
7763
7764 Encoding = DimInfo->Encoding;
7765 return true;
7766}
7767
7768OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7769 if (!isGFX10Plus())
7770 return MatchOperand_NoMatch;
7771
7772 SMLoc S = getLoc();
7773
7774 if (!trySkipId("dim", AsmToken::Colon))
7775 return MatchOperand_NoMatch;
7776
7777 unsigned Encoding;
7778 SMLoc Loc = getLoc();
7779 if (!parseDimId(Encoding)) {
7780 Error(Loc, "invalid dim value");
7781 return MatchOperand_ParseFail;
7782 }
7783
7784 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7785 AMDGPUOperand::ImmTyDim));
7786 return MatchOperand_Success;
7787}
7788
7789//===----------------------------------------------------------------------===//
7790// dpp
7791//===----------------------------------------------------------------------===//
7792
7793OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7794 SMLoc S = getLoc();
7795
7796 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7797 return MatchOperand_NoMatch;
7798
7799 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7800
7801 int64_t Sels[8];
7802
7803 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7804 return MatchOperand_ParseFail;
7805
7806 for (size_t i = 0; i < 8; ++i) {
7807 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7808 return MatchOperand_ParseFail;
7809
7810 SMLoc Loc = getLoc();
7811 if (getParser().parseAbsoluteExpression(Sels[i]))
7812 return MatchOperand_ParseFail;
7813 if (0 > Sels[i] || 7 < Sels[i]) {
7814 Error(Loc, "expected a 3-bit value");
7815 return MatchOperand_ParseFail;
7816 }
7817 }
7818
7819 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7820 return MatchOperand_ParseFail;
7821
7822 unsigned DPP8 = 0;
7823 for (size_t i = 0; i < 8; ++i)
7824 DPP8 |= (Sels[i] << (i * 3));
7825
7826 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7827 return MatchOperand_Success;
7828}
7829
7830bool
7831AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7832 const OperandVector &Operands) {
7833 if (Ctrl == "row_newbcast")
7834 return isGFX90A();
7835
7836 if (Ctrl == "row_share" ||
7837 Ctrl == "row_xmask")
7838 return isGFX10Plus();
7839
7840 if (Ctrl == "wave_shl" ||
7841 Ctrl == "wave_shr" ||
7842 Ctrl == "wave_rol" ||
7843 Ctrl == "wave_ror" ||
7844 Ctrl == "row_bcast")
7845 return isVI() || isGFX9();
7846
7847 return Ctrl == "row_mirror" ||
7848 Ctrl == "row_half_mirror" ||
7849 Ctrl == "quad_perm" ||
7850 Ctrl == "row_shl" ||
7851 Ctrl == "row_shr" ||
7852 Ctrl == "row_ror";
7853}
7854
7855int64_t
7856AMDGPUAsmParser::parseDPPCtrlPerm() {
7857 // quad_perm:[%d,%d,%d,%d]
7858
7859 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7860 return -1;
7861
7862 int64_t Val = 0;
7863 for (int i = 0; i < 4; ++i) {
7864 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7865 return -1;
7866
7867 int64_t Temp;
7868 SMLoc Loc = getLoc();
7869 if (getParser().parseAbsoluteExpression(Temp))
7870 return -1;
7871 if (Temp < 0 || Temp > 3) {
7872 Error(Loc, "expected a 2-bit value");
7873 return -1;
7874 }
7875
7876 Val += (Temp << i * 2);
7877 }
7878
7879 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7880 return -1;
7881
7882 return Val;
7883}
7884
7885int64_t
7886AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7887 using namespace AMDGPU::DPP;
7888
7889 // sel:%d
7890
7891 int64_t Val;
7892 SMLoc Loc = getLoc();
7893
7894 if (getParser().parseAbsoluteExpression(Val))
7895 return -1;
7896
7897 struct DppCtrlCheck {
7898 int64_t Ctrl;
7899 int Lo;
7900 int Hi;
7901 };
7902
7903 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7904 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
7905 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
7906 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
7907 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
7908 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
7909 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
7910 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
7911 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7912 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7913 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7914 .Default({-1, 0, 0});
7915
7916 bool Valid;
7917 if (Check.Ctrl == -1) {
7918 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7919 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7920 } else {
7921 Valid = Check.Lo <= Val && Val <= Check.Hi;
7922 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7923 }
7924
7925 if (!Valid) {
7926 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7927 return -1;
7928 }
7929
7930 return Val;
7931}
7932
7933OperandMatchResultTy
7934AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7935 using namespace AMDGPU::DPP;
7936
7937 if (!isToken(AsmToken::Identifier) ||
7938 !isSupportedDPPCtrl(getTokenStr(), Operands))
7939 return MatchOperand_NoMatch;
7940
7941 SMLoc S = getLoc();
7942 int64_t Val = -1;
7943 StringRef Ctrl;
7944
7945 parseId(Ctrl);
7946
7947 if (Ctrl == "row_mirror") {
7948 Val = DppCtrl::ROW_MIRROR;
7949 } else if (Ctrl == "row_half_mirror") {
7950 Val = DppCtrl::ROW_HALF_MIRROR;
7951 } else {
7952 if (skipToken(AsmToken::Colon, "expected a colon")) {
7953 if (Ctrl == "quad_perm") {
7954 Val = parseDPPCtrlPerm();
7955 } else {
7956 Val = parseDPPCtrlSel(Ctrl);
7957 }
7958 }
7959 }
7960
7961 if (Val == -1)
7962 return MatchOperand_ParseFail;
7963
7964 Operands.push_back(
7965 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7966 return MatchOperand_Success;
7967}
7968
7969AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7970 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7971}
7972
7973AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7974 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7975}
7976
7977AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7978 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7979}
7980
7981AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7982 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7983}
7984
7985AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7986 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7987}
7988
7989void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7990 OptionalImmIndexMap OptionalIdx;
7991
7992 unsigned Opc = Inst.getOpcode();
7993 bool HasModifiers =
7994 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
7995 unsigned I = 1;
7996 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7997 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7998 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7999 }
8000
8001 int Fi = 0;
8002 for (unsigned E = Operands.size(); I != E; ++I) {
8003 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8004 MCOI::TIED_TO);
8005 if (TiedTo != -1) {
8006 assert((unsigned)TiedTo < Inst.getNumOperands())(static_cast<void> (0));
8007 // handle tied old or src2 for MAC instructions
8008 Inst.addOperand(Inst.getOperand(TiedTo));
8009 }
8010 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8011 // Add the register arguments
8012 if (Op.isReg() && validateVccOperand(Op.getReg())) {
8013 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8014 // Skip it.
8015 continue;
8016 }
8017
8018 if (IsDPP8) {
8019 if (Op.isDPP8()) {
8020 Op.addImmOperands(Inst, 1);
8021 } else if (HasModifiers &&
8022 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8023 Op.addRegWithFPInputModsOperands(Inst, 2);
8024 } else if (Op.isFI()) {
8025 Fi = Op.getImm();
8026 } else if (Op.isReg()) {
8027 Op.addRegOperands(Inst, 1);
8028 } else {
8029 llvm_unreachable("Invalid operand type")__builtin_unreachable();
8030 }
8031 } else {
8032 if (HasModifiers &&
8033 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8034 Op.addRegWithFPInputModsOperands(Inst, 2);
8035 } else if (Op.isReg()) {
8036 Op.addRegOperands(Inst, 1);
8037 } else if (Op.isDPPCtrl()) {
8038 Op.addImmOperands(Inst, 1);
8039 } else if (Op.isImm()) {
8040 // Handle optional arguments
8041 OptionalIdx[Op.getImmTy()] = I;
8042 } else {
8043 llvm_unreachable("Invalid operand type")__builtin_unreachable();
8044 }
8045 }
8046 }
8047
8048 if (IsDPP8) {
8049 using namespace llvm::AMDGPU::DPP;
8050 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8051 } else {
8052 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8053 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8054 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8055 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8056 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8057 }
8058 }
8059}
8060
8061//===----------------------------------------------------------------------===//
8062// sdwa
8063//===----------------------------------------------------------------------===//
8064
8065OperandMatchResultTy
8066AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8067 AMDGPUOperand::ImmTy Type) {
8068 using namespace llvm::AMDGPU::SDWA;
8069
8070 SMLoc S = getLoc();
8071 StringRef Value;
8072 OperandMatchResultTy res;
8073
8074 SMLoc StringLoc;
8075 res = parseStringWithPrefix(Prefix, Value, StringLoc);
8076 if (res != MatchOperand_Success) {
8077 return res;
8078 }
8079
8080 int64_t Int;
8081 Int = StringSwitch<int64_t>(Value)
8082 .Case("BYTE_0", SdwaSel::BYTE_0)
8083 .Case("BYTE_1", SdwaSel::BYTE_1)
8084 .Case("BYTE_2", SdwaSel::BYTE_2)
8085 .Case("BYTE_3", SdwaSel::BYTE_3)
8086 .Case("WORD_0", SdwaSel::WORD_0)
8087 .Case("WORD_1", SdwaSel::WORD_1)
8088 .Case("DWORD", SdwaSel::DWORD)
8089 .Default(0xffffffff);
8090
8091 if (Int == 0xffffffff) {
8092 Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8093 return MatchOperand_ParseFail;
8094 }
8095
8096 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8097 return MatchOperand_Success;
8098}
8099
8100OperandMatchResultTy
8101AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8102 using namespace llvm::AMDGPU::SDWA;
8103
8104 SMLoc S = getLoc();
8105 StringRef Value;
8106 OperandMatchResultTy res;
8107
8108 SMLoc StringLoc;
8109 res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8110 if (res != MatchOperand_Success) {
8111 return res;
8112 }
8113
8114 int64_t Int;
8115 Int = StringSwitch<int64_t>(Value)
8116 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8117 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8118 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8119 .Default(0xffffffff);
8120
8121 if (Int == 0xffffffff) {
8122 Error(StringLoc, "invalid dst_unused value");
8123 return MatchOperand_ParseFail;
8124 }
8125
8126 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8127 return MatchOperand_Success;
8128}
8129
8130void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8131 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8132}
8133
8134void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8135 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8136}
8137
8138void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8139 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8140}
8141
8142void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8143 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8144}
8145
8146void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8147 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8148}
8149
8150void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8151 uint64_t BasicInstType,
8152 bool SkipDstVcc,
8153 bool SkipSrcVcc) {
8154 using namespace llvm::AMDGPU::SDWA;
8155
8156 OptionalImmIndexMap OptionalIdx;
8157 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8158 bool SkippedVcc = false;
8159
8160 unsigned I = 1;
8161 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8162 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8163 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8164 }
8165
8166 for (unsigned E = Operands.size(); I != E; ++I) {
8167 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8168 if (SkipVcc && !SkippedVcc && Op.isReg() &&
8169 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8170 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8171 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8172 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8173 // Skip VCC only if we didn't skip it on previous iteration.
8174 // Note that src0 and src1 occupy 2 slots each because of modifiers.
8175 if (BasicInstType == SIInstrFlags::VOP2 &&
8176 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8177 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8178 SkippedVcc = true;
8179 continue;
8180 } else if (BasicInstType == SIInstrFlags::VOPC &&
8181 Inst.getNumOperands() == 0) {
8182 SkippedVcc = true;
8183 continue;
8184 }
8185 }
8186 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8187 Op.addRegOrImmWithInputModsOperands(Inst, 2);
8188 } else if (Op.isImm()) {
8189 // Handle optional arguments
8190 OptionalIdx[Op.getImmTy()] = I;
8191 } else {
8192 llvm_unreachable("Invalid operand type")__builtin_unreachable();
8193 }
8194 SkippedVcc = false;
8195 }
8196
8197 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8198 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8199 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8200 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8201 switch (BasicInstType) {
8202 case SIInstrFlags::VOP1:
8203 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8204 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8205 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8206 }
8207 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8208 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8209 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8210 break;
8211
8212 case SIInstrFlags::VOP2:
8213 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8214 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8215 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8216 }
8217 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8218 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8219 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8220 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8221 break;
8222
8223 case SIInstrFlags::VOPC:
8224 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8225 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8226 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8227 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8228 break;
8229
8230 default:
8231 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed")__builtin_unreachable();
8232 }
8233 }
8234
8235 // special case v_mac_{f16, f32}:
8236 // it has src2 register operand that is tied to dst operand
8237 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8238 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
8239 auto it = Inst.begin();
8240 std::advance(
8241 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8242 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8243 }
8244}
8245
8246//===----------------------------------------------------------------------===//
8247// mAI
8248//===----------------------------------------------------------------------===//
8249
8250AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8251 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8252}
8253
8254AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8255 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8256}
8257
8258AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8259 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8260}
8261
8262/// Force static initialization.
8263extern "C" LLVM_EXTERNAL_VISIBILITY__attribute__ ((visibility("default"))) void LLVMInitializeAMDGPUAsmParser() {
8264 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8265 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8266}
8267
8268#define GET_REGISTER_MATCHER
8269#define GET_MATCHER_IMPLEMENTATION
8270#define GET_MNEMONIC_SPELL_CHECKER
8271#define GET_MNEMONIC_CHECKER
8272#include "AMDGPUGenAsmMatcher.inc"
8273
8274// This fuction should be defined after auto-generated include so that we have
8275// MatchClassKind enum defined
8276unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8277 unsigned Kind) {
8278 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8279 // But MatchInstructionImpl() expects to meet token and fails to validate
8280 // operand. This method checks if we are given immediate operand but expect to
8281 // get corresponding token.
8282 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8283 switch (Kind) {
8284 case MCK_addr64:
8285 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8286 case MCK_gds:
8287 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8288 case MCK_lds:
8289 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8290 case MCK_idxen:
8291 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8292 case MCK_offen:
8293 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8294 case MCK_SSrcB32:
8295 // When operands have expression values, they will return true for isToken,
8296 // because it is not possible to distinguish between a token and an
8297 // expression at parse time. MatchInstructionImpl() will always try to
8298 // match an operand as a token, when isToken returns true, and when the
8299 // name of the expression is not a valid token, the match will fail,
8300 // so we need to handle it here.
8301 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8302 case MCK_SSrcF32:
8303 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8304 case MCK_SoppBrTarget:
8305 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8306 case MCK_VReg32OrOff:
8307 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8308 case MCK_InterpSlot:
8309 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8310 case MCK_Attr:
8311 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8312 case MCK_AttrChan:
8313 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8314 case MCK_ImmSMEMOffset:
8315 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8316 case MCK_SReg_64:
8317 case MCK_SReg_64_XEXEC:
8318 // Null is defined as a 32-bit register but
8319 // it should also be enabled with 64-bit operands.
8320 // The following code enables it for SReg_64 operands
8321 // used as source and destination. Remaining source
8322 // operands are handled in isInlinableImm.
8323 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8324 default:
8325 return Match_InvalidOperand;
8326 }
8327}
8328
8329//===----------------------------------------------------------------------===//
8330// endpgm
8331//===----------------------------------------------------------------------===//
8332
8333OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8334 SMLoc S = getLoc();
8335 int64_t Imm = 0;
8336
8337 if (!parseExpr(Imm)) {
8338 // The operand is optional, if not present default to 0
8339 Imm = 0;
8340 }
8341
8342 if (!isUInt<16>(Imm)) {
8343 Error(S, "expected a 16-bit value");
8344 return MatchOperand_ParseFail;
8345 }
8346
8347 Operands.push_back(
8348 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8349 return MatchOperand_Success;
8350}
8351
8352bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }

/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/stl_tree.h

1// RB tree implementation -*- C++ -*-
2
3// Copyright (C) 2001-2020 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/*
26 *
27 * Copyright (c) 1996,1997
28 * Silicon Graphics Computer Systems, Inc.
29 *
30 * Permission to use, copy, modify, distribute and sell this software
31 * and its documentation for any purpose is hereby granted without fee,
32 * provided that the above copyright notice appear in all copies and
33 * that both that copyright notice and this permission notice appear
34 * in supporting documentation. Silicon Graphics makes no
35 * representations about the suitability of this software for any
36 * purpose. It is provided "as is" without express or implied warranty.
37 *
38 *
39 * Copyright (c) 1994
40 * Hewlett-Packard Company
41 *
42 * Permission to use, copy, modify, distribute and sell this software
43 * and its documentation for any purpose is hereby granted without fee,
44 * provided that the above copyright notice appear in all copies and
45 * that both that copyright notice and this permission notice appear
46 * in supporting documentation. Hewlett-Packard Company makes no
47 * representations about the suitability of this software for any
48 * purpose. It is provided "as is" without express or implied warranty.
49 *
50 *
51 */
52
53/** @file bits/stl_tree.h
54 * This is an internal header file, included by other library headers.
55 * Do not attempt to use it directly. @headername{map,set}
56 */
57
58#ifndef _STL_TREE_H1
59#define _STL_TREE_H1 1
60
61#pragma GCC system_header
62
63#include <bits/stl_algobase.h>
64#include <bits/allocator.h>
65#include <bits/stl_function.h>
66#include <bits/cpp_type_traits.h>
67#include <ext/alloc_traits.h>
68#if __cplusplus201402L >= 201103L
69# include <ext/aligned_buffer.h>
70#endif
71#if __cplusplus201402L > 201402L
72# include <bits/node_handle.h>
73#endif
74
75namespace std _GLIBCXX_VISIBILITY(default)__attribute__ ((__visibility__ ("default")))
76{
77_GLIBCXX_BEGIN_NAMESPACE_VERSION
78
79#if __cplusplus201402L > 201103L
80# define __cpp_lib_generic_associative_lookup201304 201304
81#endif
82
83 // Red-black tree class, designed for use in implementing STL
84 // associative containers (set, multiset, map, and multimap). The
85 // insertion and deletion algorithms are based on those in Cormen,
86 // Leiserson, and Rivest, Introduction to Algorithms (MIT Press,
87 // 1990), except that
88 //
89 // (1) the header cell is maintained with links not only to the root
90 // but also to the leftmost node of the tree, to enable constant
91 // time begin(), and to the rightmost node of the tree, to enable
92 // linear time performance when used with the generic set algorithms
93 // (set_union, etc.)
94 //
95 // (2) when a node being deleted has two children its successor node
96 // is relinked into its place, rather than copied, so that the only
97 // iterators invalidated are those referring to the deleted node.
98
99 enum _Rb_tree_color { _S_red = false, _S_black = true };
100
101 struct _Rb_tree_node_base
102 {
103 typedef _Rb_tree_node_base* _Base_ptr;
104 typedef const _Rb_tree_node_base* _Const_Base_ptr;
105
106 _Rb_tree_color _M_color;
107 _Base_ptr _M_parent;
108 _Base_ptr _M_left;
109 _Base_ptr _M_right;
110
111 static _Base_ptr
112 _S_minimum(_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept
113 {
114 while (__x->_M_left != 0) __x = __x->_M_left;
115 return __x;
116 }
117
118 static _Const_Base_ptr
119 _S_minimum(_Const_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept
120 {
121 while (__x->_M_left != 0) __x = __x->_M_left;
122 return __x;
123 }
124
125 static _Base_ptr
126 _S_maximum(_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept
127 {
128 while (__x->_M_right != 0) __x = __x->_M_right;
129 return __x;
130 }
131
132 static _Const_Base_ptr
133 _S_maximum(_Const_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept
134 {
135 while (__x->_M_right != 0) __x = __x->_M_right;
136 return __x;
137 }
138 };
139
140 // Helper type offering value initialization guarantee on the compare functor.
141 template<typename _Key_compare>
142 struct _Rb_tree_key_compare
143 {
144 _Key_compare _M_key_compare;
145
146 _Rb_tree_key_compare()
147 _GLIBCXX_NOEXCEPT_IF(noexcept(is_nothrow_default_constructible<_Key_compare>
::value)
148 is_nothrow_default_constructible<_Key_compare>::value)noexcept(is_nothrow_default_constructible<_Key_compare>
::value)
149 : _M_key_compare()
150 { }
151
152 _Rb_tree_key_compare(const _Key_compare& __comp)
153 : _M_key_compare(__comp)
154 { }
155
156#if __cplusplus201402L >= 201103L
157 // Copy constructor added for consistency with C++98 mode.
158 _Rb_tree_key_compare(const _Rb_tree_key_compare&) = default;
159
160 _Rb_tree_key_compare(_Rb_tree_key_compare&& __x)
161 noexcept(is_nothrow_copy_constructible<_Key_compare>::value)
162 : _M_key_compare(__x._M_key_compare)
163 { }
164#endif
165 };
166
167 // Helper type to manage default initialization of node count and header.
168 struct _Rb_tree_header
169 {
170 _Rb_tree_node_base _M_header;
171 size_t _M_node_count; // Keeps track of size of tree.
172
173 _Rb_tree_header() _GLIBCXX_NOEXCEPTnoexcept
174 {
175 _M_header._M_color = _S_red;
176 _M_reset();
177 }
178
179#if __cplusplus201402L >= 201103L
180 _Rb_tree_header(_Rb_tree_header&& __x) noexcept
181 {
182 if (__x._M_header._M_parent != nullptr)
183 _M_move_data(__x);
184 else
185 {
186 _M_header._M_color = _S_red;
187 _M_reset();
188 }
189 }
190#endif
191
192 void
193 _M_move_data(_Rb_tree_header& __from)
194 {
195 _M_header._M_color = __from._M_header._M_color;
196 _M_header._M_parent = __from._M_header._M_parent;
197 _M_header._M_left = __from._M_header._M_left;
198 _M_header._M_right = __from._M_header._M_right;
199 _M_header._M_parent->_M_parent = &_M_header;
200 _M_node_count = __from._M_node_count;
201
202 __from._M_reset();
203 }
204
205 void
206 _M_reset()
207 {
208 _M_header._M_parent = 0;
209 _M_header._M_left = &_M_header;
210 _M_header._M_right = &_M_header;
211 _M_node_count = 0;
212 }
213 };
214
215 template<typename _Val>
216 struct _Rb_tree_node : public _Rb_tree_node_base
217 {
218 typedef _Rb_tree_node<_Val>* _Link_type;
219
220#if __cplusplus201402L < 201103L
221 _Val _M_value_field;
222
223 _Val*
224 _M_valptr()
225 { return std::__addressof(_M_value_field); }
226
227 const _Val*
228 _M_valptr() const
229 { return std::__addressof(_M_value_field); }
230#else
231 __gnu_cxx::__aligned_membuf<_Val> _M_storage;
232
233 _Val*
234 _M_valptr()
235 { return _M_storage._M_ptr(); }
236
237 const _Val*
238 _M_valptr() const
239 { return _M_storage._M_ptr(); }
240#endif
241 };
242
243 _GLIBCXX_PURE__attribute__ ((__pure__)) _Rb_tree_node_base*
244 _Rb_tree_increment(_Rb_tree_node_base* __x) throw ();
245
246 _GLIBCXX_PURE__attribute__ ((__pure__)) const _Rb_tree_node_base*
247 _Rb_tree_increment(const _Rb_tree_node_base* __x) throw ();
248
249 _GLIBCXX_PURE__attribute__ ((__pure__)) _Rb_tree_node_base*
250 _Rb_tree_decrement(_Rb_tree_node_base* __x) throw ();
251
252 _GLIBCXX_PURE__attribute__ ((__pure__)) const _Rb_tree_node_base*
253 _Rb_tree_decrement(const _Rb_tree_node_base* __x) throw ();
254
255 template<typename _Tp>
256 struct _Rb_tree_iterator
257 {
258 typedef _Tp value_type;
259 typedef _Tp& reference;
260 typedef _Tp* pointer;
261
262 typedef bidirectional_iterator_tag iterator_category;
263 typedef ptrdiff_t difference_type;
264
265 typedef _Rb_tree_iterator<_Tp> _Self;
266 typedef _Rb_tree_node_base::_Base_ptr _Base_ptr;
267 typedef _Rb_tree_node<_Tp>* _Link_type;
268
269 _Rb_tree_iterator() _GLIBCXX_NOEXCEPTnoexcept
270 : _M_node() { }
271
272 explicit
273 _Rb_tree_iterator(_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept
274 : _M_node(__x) { }
275
276 reference
277 operator*() const _GLIBCXX_NOEXCEPTnoexcept
278 { return *static_cast<_Link_type>(_M_node)->_M_valptr(); }
279
280 pointer
281 operator->() const _GLIBCXX_NOEXCEPTnoexcept
282 { return static_cast<_Link_type> (_M_node)->_M_valptr(); }
283
284 _Self&
285 operator++() _GLIBCXX_NOEXCEPTnoexcept
286 {
287 _M_node = _Rb_tree_increment(_M_node);
288 return *this;
289 }
290
291 _Self
292 operator++(int) _GLIBCXX_NOEXCEPTnoexcept
293 {
294 _Self __tmp = *this;
295 _M_node = _Rb_tree_increment(_M_node);
296 return __tmp;
297 }
298
299 _Self&
300 operator--() _GLIBCXX_NOEXCEPTnoexcept
301 {
302 _M_node = _Rb_tree_decrement(_M_node);
303 return *this;
304 }
305
306 _Self
307 operator--(int) _GLIBCXX_NOEXCEPTnoexcept
308 {
309 _Self __tmp = *this;
310 _M_node = _Rb_tree_decrement(_M_node);
311 return __tmp;
312 }
313
314 friend bool
315 operator==(const _Self& __x, const _Self& __y) _GLIBCXX_NOEXCEPTnoexcept
316 { return __x._M_node == __y._M_node; }
317
318#if ! __cpp_lib_three_way_comparison
319 friend bool
320 operator!=(const _Self& __x, const _Self& __y) _GLIBCXX_NOEXCEPTnoexcept
321 { return __x._M_node != __y._M_node; }
14
Assuming '__x._M_node' is not equal to '__y._M_node'
15
Returning the value 1, which participates in a condition later
322#endif
323
324 _Base_ptr _M_node;
325 };
326
327 template<typename _Tp>
328 struct _Rb_tree_const_iterator
329 {
330 typedef _Tp value_type;
331 typedef const _Tp& reference;
332 typedef const _Tp* pointer;
333
334 typedef _Rb_tree_iterator<_Tp> iterator;
335
336 typedef bidirectional_iterator_tag iterator_category;
337 typedef ptrdiff_t difference_type;
338
339 typedef _Rb_tree_const_iterator<_Tp> _Self;
340 typedef _Rb_tree_node_base::_Const_Base_ptr _Base_ptr;
341 typedef const _Rb_tree_node<_Tp>* _Link_type;
342
343 _Rb_tree_const_iterator() _GLIBCXX_NOEXCEPTnoexcept
344 : _M_node() { }
345
346 explicit
347 _Rb_tree_const_iterator(_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept
348 : _M_node(__x) { }
349
350 _Rb_tree_const_iterator(const iterator& __it) _GLIBCXX_NOEXCEPTnoexcept
351 : _M_node(__it._M_node) { }
352
353 iterator
354 _M_const_cast() const _GLIBCXX_NOEXCEPTnoexcept
355 { return iterator(const_cast<typename iterator::_Base_ptr>(_M_node)); }
356
357 reference
358 operator*() const _GLIBCXX_NOEXCEPTnoexcept
359 { return *static_cast<_Link_type>(_M_node)->_M_valptr(); }
360
361 pointer
362 operator->() const _GLIBCXX_NOEXCEPTnoexcept
363 { return static_cast<_Link_type>(_M_node)->_M_valptr(); }
364
365 _Self&
366 operator++() _GLIBCXX_NOEXCEPTnoexcept
367 {
368 _M_node = _Rb_tree_increment(_M_node);
369 return *this;
370 }
371
372 _Self
373 operator++(int) _GLIBCXX_NOEXCEPTnoexcept
374 {
375 _Self __tmp = *this;
376 _M_node = _Rb_tree_increment(_M_node);
377 return __tmp;
378 }
379
380 _Self&
381 operator--() _GLIBCXX_NOEXCEPTnoexcept
382 {
383 _M_node = _Rb_tree_decrement(_M_node);
384 return *this;
385 }
386
387 _Self
388 operator--(int) _GLIBCXX_NOEXCEPTnoexcept
389 {
390 _Self __tmp = *this;
391 _M_node = _Rb_tree_decrement(_M_node);
392 return __tmp;
393 }
394
395 friend bool
396 operator==(const _Self& __x, const _Self& __y) _GLIBCXX_NOEXCEPTnoexcept
397 { return __x._M_node == __y._M_node; }
398
399#if ! __cpp_lib_three_way_comparison
400 friend bool
401 operator!=(const _Self& __x, const _Self& __y) _GLIBCXX_NOEXCEPTnoexcept
402 { return __x._M_node != __y._M_node; }
403#endif
404
405 _Base_ptr _M_node;
406 };
407
408 void
409 _Rb_tree_insert_and_rebalance(const bool __insert_left,
410 _Rb_tree_node_base* __x,
411 _Rb_tree_node_base* __p,
412 _Rb_tree_node_base& __header) throw ();
413
414 _Rb_tree_node_base*
415 _Rb_tree_rebalance_for_erase(_Rb_tree_node_base* const __z,
416 _Rb_tree_node_base& __header) throw ();
417
418#if __cplusplus201402L >= 201402L
419 template<typename _Cmp, typename _SfinaeType, typename = __void_t<>>
420 struct __has_is_transparent
421 { };
422
423 template<typename _Cmp, typename _SfinaeType>
424 struct __has_is_transparent<_Cmp, _SfinaeType,
425 __void_t<typename _Cmp::is_transparent>>
426 { typedef void type; };
427
428 template<typename _Cmp, typename _SfinaeType>
429 using __has_is_transparent_t
430 = typename __has_is_transparent<_Cmp, _SfinaeType>::type;
431#endif
432
433#if __cplusplus201402L > 201402L
434 template<typename _Tree1, typename _Cmp2>
435 struct _Rb_tree_merge_helper { };
436#endif
437
438 template<typename _Key, typename _Val, typename _KeyOfValue,
439 typename _Compare, typename _Alloc = allocator<_Val> >
440 class _Rb_tree
441 {
442 typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template
443 rebind<_Rb_tree_node<_Val> >::other _Node_allocator;
444
445 typedef __gnu_cxx::__alloc_traits<_Node_allocator> _Alloc_traits;
446
447 protected:
448 typedef _Rb_tree_node_base* _Base_ptr;
449 typedef const _Rb_tree_node_base* _Const_Base_ptr;
450 typedef _Rb_tree_node<_Val>* _Link_type;
451 typedef const _Rb_tree_node<_Val>* _Const_Link_type;
452
453 private:
454 // Functor recycling a pool of nodes and using allocation once the pool
455 // is empty.
456 struct _Reuse_or_alloc_node
457 {
458 _Reuse_or_alloc_node(_Rb_tree& __t)
459 : _M_root(__t._M_root()), _M_nodes(__t._M_rightmost()), _M_t(__t)
460 {
461 if (_M_root)
462 {
463 _M_root->_M_parent = 0;
464
465 if (_M_nodes->_M_left)
466 _M_nodes = _M_nodes->_M_left;
467 }
468 else
469 _M_nodes = 0;
470 }
471
472#if __cplusplus201402L >= 201103L
473 _Reuse_or_alloc_node(const _Reuse_or_alloc_node&) = delete;
474#endif
475
476 ~_Reuse_or_alloc_node()
477 { _M_t._M_erase(static_cast<_Link_type>(_M_root)); }
478
479 template<typename _Arg>
480 _Link_type
481#if __cplusplus201402L < 201103L
482 operator()(const _Arg& __arg)
483#else
484 operator()(_Arg&& __arg)
485#endif
486 {
487 _Link_type __node = static_cast<_Link_type>(_M_extract());
488 if (__node)
489 {
490 _M_t._M_destroy_node(__node);
491 _M_t._M_construct_node(__node, _GLIBCXX_FORWARD(_Arg, __arg)std::forward<_Arg>(__arg));
492 return __node;
493 }
494
495 return _M_t._M_create_node(_GLIBCXX_FORWARD(_Arg, __arg)std::forward<_Arg>(__arg));
496 }
497
498 private:
499 _Base_ptr
500 _M_extract()
501 {
502 if (!_M_nodes)
503 return _M_nodes;
504
505 _Base_ptr __node = _M_nodes;
506 _M_nodes = _M_nodes->_M_parent;
507 if (_M_nodes)
508 {
509 if (_M_nodes->_M_right == __node)
510 {
511 _M_nodes->_M_right = 0;
512
513 if (_M_nodes->_M_left)
514 {
515 _M_nodes = _M_nodes->_M_left;
516
517 while (_M_nodes->_M_right)
518 _M_nodes = _M_nodes->_M_right;
519
520 if (_M_nodes->_M_left)
521 _M_nodes = _M_nodes->_M_left;
522 }
523 }
524 else // __node is on the left.
525 _M_nodes->_M_left = 0;
526 }
527 else
528 _M_root = 0;
529
530 return __node;
531 }
532
533 _Base_ptr _M_root;
534 _Base_ptr _M_nodes;
535 _Rb_tree& _M_t;
536 };
537
538 // Functor similar to the previous one but without any pool of nodes to
539 // recycle.
540 struct _Alloc_node
541 {
542 _Alloc_node(_Rb_tree& __t)
543 : _M_t(__t) { }
544
545 template<typename _Arg>
546 _Link_type
547#if __cplusplus201402L < 201103L
548 operator()(const _Arg& __arg) const
549#else
550 operator()(_Arg&& __arg) const
551#endif
552 { return _M_t._M_create_node(_GLIBCXX_FORWARD(_Arg, __arg)std::forward<_Arg>(__arg)); }
553
554 private:
555 _Rb_tree& _M_t;
556 };
557
558 public:
559 typedef _Key key_type;
560 typedef _Val value_type;
561 typedef value_type* pointer;
562 typedef const value_type* const_pointer;
563 typedef value_type& reference;
564 typedef const value_type& const_reference;
565 typedef size_t size_type;
566 typedef ptrdiff_t difference_type;
567 typedef _Alloc allocator_type;
568
569 _Node_allocator&
570 _M_get_Node_allocator() _GLIBCXX_NOEXCEPTnoexcept
571 { return this->_M_impl; }
572
573 const _Node_allocator&
574 _M_get_Node_allocator() const _GLIBCXX_NOEXCEPTnoexcept
575 { return this->_M_impl; }
576
577 allocator_type
578 get_allocator() const _GLIBCXX_NOEXCEPTnoexcept
579 { return allocator_type(_M_get_Node_allocator()); }
580
581 protected:
582 _Link_type
583 _M_get_node()
584 { return _Alloc_traits::allocate(_M_get_Node_allocator(), 1); }
585
586 void
587 _M_put_node(_Link_type __p) _GLIBCXX_NOEXCEPTnoexcept
588 { _Alloc_traits::deallocate(_M_get_Node_allocator(), __p, 1); }
589
590#if __cplusplus201402L < 201103L
591 void
592 _M_construct_node(_Link_type __node, const value_type& __x)
593 {
594 __tryif (true)
595 { get_allocator().construct(__node->_M_valptr(), __x); }
596 __catch(...)if (false)
597 {
598 _M_put_node(__node);
599 __throw_exception_again;
600 }
601 }
602
603 _Link_type
604 _M_create_node(const value_type& __x)
605 {
606 _Link_type __tmp = _M_get_node();
607 _M_construct_node(__tmp, __x);
608 return __tmp;
609 }
610#else
611 template<typename... _Args>
612 void
613 _M_construct_node(_Link_type __node, _Args&&... __args)
614 {
615 __tryif (true)
616 {
617 ::new(__node) _Rb_tree_node<_Val>;
618 _Alloc_traits::construct(_M_get_Node_allocator(),
619 __node->_M_valptr(),
620 std::forward<_Args>(__args)...);
621 }
622 __catch(...)if (false)
623 {
624 __node->~_Rb_tree_node<_Val>();
625 _M_put_node(__node);
626 __throw_exception_again;
627 }
628 }
629
630 template<typename... _Args>
631 _Link_type
632 _M_create_node(_Args&&... __args)
633 {
634 _Link_type __tmp = _M_get_node();
635 _M_construct_node(__tmp, std::forward<_Args>(__args)...);
636 return __tmp;
637 }
638#endif
639
640 void
641 _M_destroy_node(_Link_type __p) _GLIBCXX_NOEXCEPTnoexcept
642 {
643#if __cplusplus201402L < 201103L
644 get_allocator().destroy(__p->_M_valptr());
645#else
646 _Alloc_traits::destroy(_M_get_Node_allocator(), __p->_M_valptr());
647 __p->~_Rb_tree_node<_Val>();
648#endif
649 }
650
651 void
652 _M_drop_node(_Link_type __p) _GLIBCXX_NOEXCEPTnoexcept
653 {
654 _M_destroy_node(__p);
655 _M_put_node(__p);
656 }
657
658 template<typename _NodeGen>
659 _Link_type
660 _M_clone_node(_Const_Link_type __x, _NodeGen& __node_gen)
661 {
662 _Link_type __tmp = __node_gen(*__x->_M_valptr());
663 __tmp->_M_color = __x->_M_color;
664 __tmp->_M_left = 0;
665 __tmp->_M_right = 0;
666 return __tmp;
667 }
668
669 protected:
670#if _GLIBCXX_INLINE_VERSION0
671 template<typename _Key_compare>
672#else
673 // Unused _Is_pod_comparator is kept as it is part of mangled name.
674 template<typename _Key_compare,
675 bool /* _Is_pod_comparator */ = __is_pod(_Key_compare)>
676#endif
677 struct _Rb_tree_impl
678 : public _Node_allocator
679 , public _Rb_tree_key_compare<_Key_compare>
680 , public _Rb_tree_header
681 {
682 typedef _Rb_tree_key_compare<_Key_compare> _Base_key_compare;
683
684 _Rb_tree_impl()
685 _GLIBCXX_NOEXCEPT_IF(noexcept(is_nothrow_default_constructible<_Node_allocator>
::value && is_nothrow_default_constructible<_Base_key_compare
>::value)
686 is_nothrow_default_constructible<_Node_allocator>::valuenoexcept(is_nothrow_default_constructible<_Node_allocator>
::value && is_nothrow_default_constructible<_Base_key_compare
>::value)
687 && is_nothrow_default_constructible<_Base_key_compare>::value )noexcept(is_nothrow_default_constructible<_Node_allocator>
::value && is_nothrow_default_constructible<_Base_key_compare
>::value)
688 : _Node_allocator()
689 { }
690
691 _Rb_tree_impl(const _Rb_tree_impl& __x)
692 : _Node_allocator(_Alloc_traits::_S_select_on_copy(__x))
693 , _Base_key_compare(__x._M_key_compare)
694 { }
695
696#if __cplusplus201402L < 201103L
697 _Rb_tree_impl(const _Key_compare& __comp, const _Node_allocator& __a)
698 : _Node_allocator(__a), _Base_key_compare(__comp)
699 { }
700#else
701 _Rb_tree_impl(_Rb_tree_impl&&) = default;
702
703 explicit
704 _Rb_tree_impl(_Node_allocator&& __a)
705 : _Node_allocator(std::move(__a))
706 { }
707
708 _Rb_tree_impl(_Rb_tree_impl&& __x, _Node_allocator&& __a)
709 : _Node_allocator(std::move(__a)),
710 _Base_key_compare(std::move(__x)),
711 _Rb_tree_header(std::move(__x))
712 { }
713
714 _Rb_tree_impl(const _Key_compare& __comp, _Node_allocator&& __a)
715 : _Node_allocator(std::move(__a)), _Base_key_compare(__comp)
716 { }
717#endif
718 };
719
720 _Rb_tree_impl<_Compare> _M_impl;
721
722 protected:
723 _Base_ptr&
724 _M_root() _GLIBCXX_NOEXCEPTnoexcept
725 { return this->_M_impl._M_header._M_parent; }
726
727 _Const_Base_ptr
728 _M_root() const _GLIBCXX_NOEXCEPTnoexcept
729 { return this->_M_impl._M_header._M_parent; }
730
731 _Base_ptr&
732 _M_leftmost() _GLIBCXX_NOEXCEPTnoexcept
733 { return this->_M_impl._M_header._M_left; }
734
735 _Const_Base_ptr
736 _M_leftmost() const _GLIBCXX_NOEXCEPTnoexcept
737 { return this->_M_impl._M_header._M_left; }
738
739 _Base_ptr&
740 _M_rightmost() _GLIBCXX_NOEXCEPTnoexcept
741 { return this->_M_impl._M_header._M_right; }
742
743 _Const_Base_ptr
744 _M_rightmost() const _GLIBCXX_NOEXCEPTnoexcept
745 { return this->_M_impl._M_header._M_right; }
746
747 _Link_type
748 _M_begin() _GLIBCXX_NOEXCEPTnoexcept
749 { return static_cast<_Link_type>(this->_M_impl._M_header._M_parent); }
750
751 _Const_Link_type
752 _M_begin() const _GLIBCXX_NOEXCEPTnoexcept
753 {
754 return static_cast<_Const_Link_type>
755 (this->_M_impl._M_header._M_parent);
756 }
757
758 _Base_ptr
759 _M_end() _GLIBCXX_NOEXCEPTnoexcept
760 { return &this->_M_impl._M_header; }
761
762 _Const_Base_ptr
763 _M_end() const _GLIBCXX_NOEXCEPTnoexcept
764 { return &this->_M_impl._M_header; }
765
766 static const _Key&
767 _S_key(_Const_Link_type __x)
768 {
769#if __cplusplus201402L >= 201103L
770 // If we're asking for the key we're presumably using the comparison
771 // object, and so this is a good place to sanity check it.
772 static_assert(__is_invocable<_Compare&, const _Key&, const _Key&>{},
773 "comparison object must be invocable "
774 "with two arguments of key type");
775# if __cplusplus201402L >= 201703L
776 // _GLIBCXX_RESOLVE_LIB_DEFECTS
777 // 2542. Missing const requirements for associative containers
778 if constexpr (__is_invocable<_Compare&, const _Key&, const _Key&>{})
779 static_assert(
780 is_invocable_v<const _Compare&, const _Key&, const _Key&>,
781 "comparison object must be invocable as const");
782# endif // C++17
783#endif // C++11
784
785 return _KeyOfValue()(*__x->_M_valptr());
786 }
787
788 static _Link_type
789 _S_left(_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept
790 { return static_cast<_Link_type>(__x->_M_left); }
791
792 static _Const_Link_type
793 _S_left(_Const_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept
794 { return static_cast<_Const_Link_type>(__x->_M_left); }
795
796 static _Link_type
797 _S_right(_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept
798 { return static_cast<_Link_type>(__x->_M_right); }
799
800 static _Const_Link_type
801 _S_right(_Const_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept
802 { return static_cast<_Const_Link_type>(__x->_M_right); }
803
804 static const _Key&
805 _S_key(_Const_Base_ptr __x)
806 { return _S_key(static_cast<_Const_Link_type>(__x)); }
807
808 static _Base_ptr
809 _S_minimum(_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept
810 { return _Rb_tree_node_base::_S_minimum(__x); }
811
812 static _Const_Base_ptr
813 _S_minimum(_Const_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept
814 { return _Rb_tree_node_base::_S_minimum(__x); }
815
816 static _Base_ptr
817 _S_maximum(_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept
818 { return _Rb_tree_node_base::_S_maximum(__x); }
819
820 static _Const_Base_ptr
821 _S_maximum(_Const_Base_ptr __x) _GLIBCXX_NOEXCEPTnoexcept
822 { return _Rb_tree_node_base::_S_maximum(__x); }
823
824 public:
825 typedef _Rb_tree_iterator<value_type> iterator;
826 typedef _Rb_tree_const_iterator<value_type> const_iterator;
827
828 typedef std::reverse_iterator<iterator> reverse_iterator;
829 typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
830
831#if __cplusplus201402L > 201402L
832 using node_type = _Node_handle<_Key, _Val, _Node_allocator>;
833 using insert_return_type = _Node_insert_return<
834 conditional_t<is_same_v<_Key, _Val>, const_iterator, iterator>,
835 node_type>;
836#endif
837
838 pair<_Base_ptr, _Base_ptr>
839 _M_get_insert_unique_pos(const key_type& __k);
840
841 pair<_Base_ptr, _Base_ptr>
842 _M_get_insert_equal_pos(const key_type& __k);
843
844 pair<_Base_ptr, _Base_ptr>
845 _M_get_insert_hint_unique_pos(const_iterator __pos,
846 const key_type& __k);
847
848 pair<_Base_ptr, _Base_ptr>
849 _M_get_insert_hint_equal_pos(const_iterator __pos,
850 const key_type& __k);
851
852 private:
853#if __cplusplus201402L >= 201103L
854 template<typename _Arg, typename _NodeGen>
855 iterator
856 _M_insert_(_Base_ptr __x, _Base_ptr __y, _Arg&& __v, _NodeGen&);
857
858 iterator
859 _M_insert_node(_Base_ptr __x, _Base_ptr __y, _Link_type __z);
860
861 template<typename _Arg>
862 iterator
863 _M_insert_lower(_Base_ptr __y, _Arg&& __v);
864
865 template<typename _Arg>
866 iterator
867 _M_insert_equal_lower(_Arg&& __x);
868
869 iterator
870 _M_insert_lower_node(_Base_ptr __p, _Link_type __z);
871
872 iterator
873 _M_insert_equal_lower_node(_Link_type __z);
874#else
875 template<typename _NodeGen>
876 iterator
877 _M_insert_(_Base_ptr __x, _Base_ptr __y,
878 const value_type& __v, _NodeGen&);
879
880 // _GLIBCXX_RESOLVE_LIB_DEFECTS
881 // 233. Insertion hints in associative containers.
882 iterator
883 _M_insert_lower(_Base_ptr __y, const value_type& __v);
884
885 iterator
886 _M_insert_equal_lower(const value_type& __x);
887#endif
888
889 template<typename _NodeGen>
890 _Link_type
891 _M_copy(_Const_Link_type __x, _Base_ptr __p, _NodeGen&);
892
893 template<typename _NodeGen>
894 _Link_type
895 _M_copy(const _Rb_tree& __x, _NodeGen& __gen)
896 {
897 _Link_type __root = _M_copy(__x._M_begin(), _M_end(), __gen);
898 _M_leftmost() = _S_minimum(__root);
899 _M_rightmost() = _S_maximum(__root);
900 _M_impl._M_node_count = __x._M_impl._M_node_count;
901 return __root;
902 }
903
904 _Link_type
905 _M_copy(const _Rb_tree& __x)
906 {
907 _Alloc_node __an(*this);
908 return _M_copy(__x, __an);
909 }
910
911 void
912 _M_erase(_Link_type __x);
913
914 iterator
915 _M_lower_bound(_Link_type __x, _Base_ptr __y,
916 const _Key& __k);
917
918 const_iterator
919 _M_lower_bound(_Const_Link_type __x, _Const_Base_ptr __y,
920 const _Key& __k) const;
921
922 iterator
923 _M_upper_bound(_Link_type __x, _Base_ptr __y,
924 const _Key& __k);
925
926 const_iterator
927 _M_upper_bound(_Const_Link_type __x, _Const_Base_ptr __y,
928 const _Key& __k) const;
929
930 public:
931 // allocation/deallocation
932#if __cplusplus201402L < 201103L
933 _Rb_tree() { }
934#else
935 _Rb_tree() = default;
936#endif
937
938 _Rb_tree(const _Compare& __comp,
939 const allocator_type& __a = allocator_type())
940 : _M_impl(__comp, _Node_allocator(__a)) { }
941
942 _Rb_tree(const _Rb_tree& __x)
943 : _M_impl(__x._M_impl)
944 {
945 if (__x._M_root() != 0)
946 _M_root() = _M_copy(__x);
947 }
948
949#if __cplusplus201402L >= 201103L
950 _Rb_tree(const allocator_type& __a)
951 : _M_impl(_Node_allocator(__a))
952 { }
953
954 _Rb_tree(const _Rb_tree& __x, const allocator_type& __a)
955 : _M_impl(__x._M_impl._M_key_compare, _Node_allocator(__a))
956 {
957 if (__x._M_root() != nullptr)
958 _M_root() = _M_copy(__x);
959 }
960
961 _Rb_tree(_Rb_tree&&) = default;
962
963 _Rb_tree(_Rb_tree&& __x, const allocator_type& __a)
964 : _Rb_tree(std::move(__x), _Node_allocator(__a))
965 { }
966
967 private:
968 _Rb_tree(_Rb_tree&& __x, _Node_allocator&& __a, true_type)
969 noexcept(is_nothrow_default_constructible<_Compare>::value)
970 : _M_impl(std::move(__x._M_impl), std::move(__a))
971 { }
972
973 _Rb_tree(_Rb_tree&& __x, _Node_allocator&& __a, false_type)
974 : _M_impl(__x._M_impl._M_key_compare, std::move(__a))
975 {
976 if (__x._M_root() != nullptr)
977 _M_move_data(__x, false_type{});
978 }
979
980 public:
981 _Rb_tree(_Rb_tree&& __x, _Node_allocator&& __a)
982 noexcept( noexcept(
983 _Rb_tree(std::declval<_Rb_tree&&>(), std::declval<_Node_allocator&&>(),
984 std::declval<typename _Alloc_traits::is_always_equal>())) )
985 : _Rb_tree(std::move(__x), std::move(__a),
986 typename _Alloc_traits::is_always_equal{})
987 { }
988#endif
989
990 ~_Rb_tree() _GLIBCXX_NOEXCEPTnoexcept
991 { _M_erase(_M_begin()); }
992
993 _Rb_tree&
994 operator=(const _Rb_tree& __x);
995
996 // Accessors.
997 _Compare
998 key_comp() const
999 { return _M_impl._M_key_compare; }
1000
1001 iterator
1002 begin() _GLIBCXX_NOEXCEPTnoexcept
1003 { return iterator(this->_M_impl._M_header._M_left); }
1004
1005 const_iterator
1006 begin() const _GLIBCXX_NOEXCEPTnoexcept
1007 { return const_iterator(this->_M_impl._M_header._M_left); }
1008
1009 iterator
1010 end() _GLIBCXX_NOEXCEPTnoexcept
1011 { return iterator(&this->_M_impl._M_header); }
1012
1013 const_iterator
1014 end() const _GLIBCXX_NOEXCEPTnoexcept
1015 { return const_iterator(&this->_M_impl._M_header); }
1016
1017 reverse_iterator
1018 rbegin() _GLIBCXX_NOEXCEPTnoexcept
1019 { return reverse_iterator(end()); }
1020
1021 const_reverse_iterator
1022 rbegin() const _GLIBCXX_NOEXCEPTnoexcept
1023 { return const_reverse_iterator(end()); }
1024
1025 reverse_iterator
1026 rend() _GLIBCXX_NOEXCEPTnoexcept
1027 { return reverse_iterator(begin()); }
1028
1029 const_reverse_iterator
1030 rend() const _GLIBCXX_NOEXCEPTnoexcept
1031 { return const_reverse_iterator(begin()); }
1032
1033 _GLIBCXX_NODISCARD bool
1034 empty() const _GLIBCXX_NOEXCEPTnoexcept
1035 { return _M_impl._M_node_count == 0; }
1036
1037 size_type
1038 size() const _GLIBCXX_NOEXCEPTnoexcept
1039 { return _M_impl._M_node_count; }
1040
1041 size_type
1042 max_size() const _GLIBCXX_NOEXCEPTnoexcept
1043 { return _Alloc_traits::max_size(_M_get_Node_allocator()); }
1044
1045 void
1046 swap(_Rb_tree& __t)
1047 _GLIBCXX_NOEXCEPT_IF(__is_nothrow_swappable<_Compare>::value)noexcept(__is_nothrow_swappable<_Compare>::value);
1048
1049 // Insert/erase.
1050#if __cplusplus201402L >= 201103L
1051 template<typename _Arg>
1052 pair<iterator, bool>
1053 _M_insert_unique(_Arg&& __x);
1054
1055 template<typename _Arg>
1056 iterator
1057 _M_insert_equal(_Arg&& __x);
1058
1059 template<typename _Arg, typename _NodeGen>
1060 iterator
1061 _M_insert_unique_(const_iterator __pos, _Arg&& __x, _NodeGen&);
1062
1063 template<typename _Arg>
1064 iterator
1065 _M_insert_unique_(const_iterator __pos, _Arg&& __x)
1066 {
1067 _Alloc_node __an(*this);
1068 return _M_insert_unique_(__pos, std::forward<_Arg>(__x), __an);
1069 }
1070
1071 template<typename _Arg, typename _NodeGen>
1072 iterator
1073 _M_insert_equal_(const_iterator __pos, _Arg&& __x, _NodeGen&);
1074
1075 template<typename _Arg>
1076 iterator
1077 _M_insert_equal_(const_iterator __pos, _Arg&& __x)
1078 {
1079 _Alloc_node __an(*this);
1080 return _M_insert_equal_(__pos, std::forward<_Arg>(__x), __an);
1081 }
1082
1083 template<typename... _Args>
1084 pair<iterator, bool>
1085 _M_emplace_unique(_Args&&... __args);
1086
1087 template<typename... _Args>
1088 iterator
1089 _M_emplace_equal(_Args&&... __args);
1090
1091 template<typename... _Args>
1092 iterator
1093 _M_emplace_hint_unique(const_iterator __pos, _Args&&... __args);
1094
1095 template<typename... _Args>
1096 iterator
1097 _M_emplace_hint_equal(const_iterator __pos, _Args&&... __args);
1098
1099 template<typename _Iter>
1100 using __same_value_type
1101 = is_same<value_type, typename iterator_traits<_Iter>::value_type>;
1102
1103 template<typename _InputIterator>
1104 __enable_if_t<__same_value_type<_InputIterator>::value>
1105 _M_insert_range_unique(_InputIterator __first, _InputIterator __last)
1106 {
1107 _Alloc_node __an(*this);
1108 for (; __first != __last; ++__first)
1109 _M_insert_unique_(end(), *__first, __an);
1110 }
1111
1112 template<typename _InputIterator>
1113 __enable_if_t<!__same_value_type<_InputIterator>::value>
1114 _M_insert_range_unique(_InputIterator __first, _InputIterator __last)
1115 {
1116 for (; __first != __last; ++__first)
1117 _M_emplace_unique(*__first);
1118 }
1119
1120 template<typename _InputIterator>
1121 __enable_if_t<__same_value_type<_InputIterator>::value>
1122 _M_insert_range_equal(_InputIterator __first, _InputIterator __last)
1123 {
1124 _Alloc_node __an(*this);
1125 for (; __first != __last; ++__first)
1126 _M_insert_equal_(end(), *__first, __an);
1127 }
1128
1129 template<typename _InputIterator>
1130 __enable_if_t<!__same_value_type<_InputIterator>::value>
1131 _M_insert_range_equal(_InputIterator __first, _InputIterator __last)
1132 {
1133 _Alloc_node __an(*this);
1134 for (; __first != __last; ++__first)
1135 _M_emplace_equal(*__first);
1136 }
1137#else
1138 pair<iterator, bool>
1139 _M_insert_unique(const value_type& __x);
1140
1141 iterator
1142 _M_insert_equal(const value_type& __x);
1143
1144 template<typename _NodeGen>
1145 iterator
1146 _M_insert_unique_(const_iterator __pos, const value_type& __x,
1147 _NodeGen&);
1148
1149 iterator
1150 _M_insert_unique_(const_iterator __pos, const value_type& __x)
1151 {
1152 _Alloc_node __an(*this);
1153 return _M_insert_unique_(__pos, __x, __an);
1154 }
1155
1156 template<typename _NodeGen>
1157 iterator
1158 _M_insert_equal_(const_iterator __pos, const value_type& __x,
1159 _NodeGen&);
1160 iterator
1161 _M_insert_equal_(const_iterator __pos, const value_type& __x)
1162 {
1163 _Alloc_node __an(*this);
1164 return _M_insert_equal_(__pos, __x, __an);
1165 }
1166
1167 template<typename _InputIterator>
1168 void
1169 _M_insert_range_unique(_InputIterator __first, _InputIterator __last)
1170 {
1171 _Alloc_node __an(*this);
1172 for (; __first != __last; ++__first)
1173 _M_insert_unique_(end(), *__first, __an);
1174 }
1175
1176 template<typename _InputIterator>
1177 void
1178 _M_insert_range_equal(_InputIterator __first, _InputIterator __last)
1179 {
1180 _Alloc_node __an(*this);
1181 for (; __first != __last; ++__first)
1182 _M_insert_equal_(end(), *__first, __an);
1183 }
1184#endif
1185
1186 private:
1187 void
1188 _M_erase_aux(const_iterator __position);
1189
1190 void
1191 _M_erase_aux(const_iterator __first, const_iterator __last);
1192
1193 public:
1194#if __cplusplus201402L >= 201103L
1195 // _GLIBCXX_RESOLVE_LIB_DEFECTS
1196 // DR 130. Associative erase should return an iterator.
1197 _GLIBCXX_ABI_TAG_CXX11__attribute ((__abi_tag__ ("cxx11")))
1198 iterator
1199 erase(const_iterator __position)
1200 {
1201 __glibcxx_assert(__position != end());
1202 const_iterator __result = __position;
1203 ++__result;
1204 _M_erase_aux(__position);
1205 return __result._M_const_cast();
1206 }
1207
1208 // LWG 2059.
1209 _GLIBCXX_ABI_TAG_CXX11__attribute ((__abi_tag__ ("cxx11")))
1210 iterator
1211 erase(iterator __position)
1212 {
1213 __glibcxx_assert(__position != end());
1214 iterator __result = __position;
1215 ++__result;
1216 _M_erase_aux(__position);
1217 return __result;
1218 }
1219#else
1220 void
1221 erase(iterator __position)
1222 {
1223 __glibcxx_assert(__position != end());
1224 _M_erase_aux(__position);
1225 }
1226
1227 void
1228 erase(const_iterator __position)
1229 {
1230 __glibcxx_assert(__position != end());
1231 _M_erase_aux(__position);
1232 }
1233#endif
1234
1235 size_type
1236 erase(const key_type& __x);
1237
1238#if __cplusplus201402L >= 201103L
1239 // _GLIBCXX_RESOLVE_LIB_DEFECTS
1240 // DR 130. Associative erase should return an iterator.
1241 _GLIBCXX_ABI_TAG_CXX11__attribute ((__abi_tag__ ("cxx11")))
1242 iterator
1243 erase(const_iterator __first, const_iterator __last)
1244 {
1245 _M_erase_aux(__first, __last);
1246 return __last._M_const_cast();
1247 }
1248#else
1249 void
1250 erase(iterator __first, iterator __last)
1251 { _M_erase_aux(__first, __last); }
1252
1253 void
1254 erase(const_iterator __first, const_iterator __last)
1255 { _M_erase_aux(__first, __last); }
1256#endif
1257
1258 void
1259 clear() _GLIBCXX_NOEXCEPTnoexcept
1260 {
1261 _M_erase(_M_begin());
1262 _M_impl._M_reset();
1263 }
1264
1265 // Set operations.
1266 iterator
1267 find(const key_type& __k);
1268
1269 const_iterator
1270 find(const key_type& __k) const;
1271
1272 size_type
1273 count(const key_type& __k) const;
1274
1275 iterator
1276 lower_bound(const key_type& __k)
1277 { return _M_lower_bound(_M_begin(), _M_end(), __k); }
1278
1279 const_iterator
1280 lower_bound(const key_type& __k) const
1281 { return _M_lower_bound(_M_begin(), _M_end(), __k); }
1282
1283 iterator
1284 upper_bound(const key_type& __k)
1285 { return _M_upper_bound(_M_begin(), _M_end(), __k); }
1286
1287 const_iterator
1288 upper_bound(const key_type& __k) const
1289 { return _M_upper_bound(_M_begin(), _M_end(), __k); }
1290
1291 pair<iterator, iterator>
1292 equal_range(const key_type& __k);
1293
1294 pair<const_iterator, const_iterator>
1295 equal_range(const key_type& __k) const;
1296
1297#if __cplusplus201402L >= 201402L
1298 template<typename _Kt,
1299 typename _Req = __has_is_transparent_t<_Compare, _Kt>>
1300 iterator
1301 _M_find_tr(const _Kt& __k)
1302 {
1303 const _Rb_tree* __const_this = this;
1304 return __const_this->_M_find_tr(__k)._M_const_cast();
1305 }
1306
1307 template<typename _Kt,
1308 typename _Req = __has_is_transparent_t<_Compare, _Kt>>
1309 const_iterator
1310 _M_find_tr(const _Kt& __k) const
1311 {
1312 auto __j = _M_lower_bound_tr(__k);
1313 if (__j != end() && _M_impl._M_key_compare(__k, _S_key(__j._M_node)))
1314 __j = end();
1315 return __j;
1316 }
1317
1318 template<typename _Kt,
1319 typename _Req = __has_is_transparent_t<_Compare, _Kt>>
1320 size_type
1321 _M_count_tr(const _Kt& __k) const
1322 {
1323 auto __p = _M_equal_range_tr(__k);
1324 return std::distance(__p.first, __p.second);
1325 }
1326
1327 template<typename _Kt,
1328 typename _Req = __has_is_transparent_t<_Compare, _Kt>>
1329 iterator
1330 _M_lower_bound_tr(const _Kt& __k)
1331 {
1332 const _Rb_tree* __const_this = this;
1333 return __const_this->_M_lower_bound_tr(__k)._M_const_cast();
1334 }
1335
1336 template<typename _Kt,
1337 typename _Req = __has_is_transparent_t<_Compare, _Kt>>
1338 const_iterator
1339 _M_lower_bound_tr(const _Kt& __k) const
1340 {
1341 auto __x = _M_begin();
1342 auto __y = _M_end();
1343 while (__x != 0)
1344 if (!_M_impl._M_key_compare(_S_key(__x), __k))
1345 {
1346 __y = __x;
1347 __x = _S_left(__x);
1348 }
1349 else
1350 __x = _S_right(__x);
1351 return const_iterator(__y);
1352 }
1353
1354 template<typename _Kt,
1355 typename _Req = __has_is_transparent_t<_Compare, _Kt>>
1356 iterator
1357 _M_upper_bound_tr(const _Kt& __k)
1358 {
1359 const _Rb_tree* __const_this = this;
1360 return __const_this->_M_upper_bound_tr(__k)._M_const_cast();
1361 }
1362
1363 template<typename _Kt,
1364 typename _Req = __has_is_transparent_t<_Compare, _Kt>>
1365 const_iterator
1366 _M_upper_bound_tr(const _Kt& __k) const
1367 {
1368 auto __x = _M_begin();
1369 auto __y = _M_end();
1370 while (__x != 0)
1371 if (_M_impl._M_key_compare(__k, _S_key(__x)))
1372 {
1373 __y = __x;
1374 __x = _S_left(__x);
1375 }
1376 else
1377 __x = _S_right(__x);
1378 return const_iterator(__y);
1379 }
1380
1381 template<typename _Kt,
1382 typename _Req = __has_is_transparent_t<_Compare, _Kt>>
1383 pair<iterator, iterator>
1384 _M_equal_range_tr(const _Kt& __k)
1385 {
1386 const _Rb_tree* __const_this = this;
1387 auto __ret = __const_this->_M_equal_range_tr(__k);
1388 return { __ret.first._M_const_cast(), __ret.second._M_const_cast() };
1389 }
1390
1391 template<typename _Kt,
1392 typename _Req = __has_is_transparent_t<_Compare, _Kt>>
1393 pair<const_iterator, const_iterator>
1394 _M_equal_range_tr(const _Kt& __k) const
1395 {
1396 auto __low = _M_lower_bound_tr(__k);
1397 auto __high = __low;
1398 auto& __cmp = _M_impl._M_key_compare;
1399 while (__high != end() && !__cmp(__k, _S_key(__high._M_node)))
1400 ++__high;
1401 return { __low, __high };
1402 }
1403#endif
1404
1405 // Debugging.
1406 bool
1407 __rb_verify() const;
1408
1409#if __cplusplus201402L >= 201103L
1410 _Rb_tree&
1411 operator=(_Rb_tree&&)
1412 noexcept(_Alloc_traits::_S_nothrow_move()
1413 && is_nothrow_move_assignable<_Compare>::value);
1414
1415 template<typename _Iterator>
1416 void
1417 _M_assign_unique(_Iterator, _Iterator);
1418
1419 template<typename _Iterator>
1420 void
1421 _M_assign_equal(_Iterator, _Iterator);
1422
1423 private:
1424 // Move elements from container with equal allocator.
1425 void
1426 _M_move_data(_Rb_tree& __x, true_type)
1427 { _M_impl._M_move_data(__x._M_impl); }
1428
1429 // Move elements from container with possibly non-equal allocator,
1430 // which might result in a copy not a move.
1431 void
1432 _M_move_data(_Rb_tree&, false_type);
1433
1434 // Move assignment from container with equal allocator.
1435 void
1436 _M_move_assign(_Rb_tree&, true_type);
1437
1438 // Move assignment from container with possibly non-equal allocator,
1439 // which might result in a copy not a move.
1440 void
1441 _M_move_assign(_Rb_tree&, false_type);
1442#endif
1443
1444#if __cplusplus201402L > 201402L
1445 public:
1446 /// Re-insert an extracted node.
1447 insert_return_type
1448 _M_reinsert_node_unique(node_type&& __nh)
1449 {
1450 insert_return_type __ret;
1451 if (__nh.empty())
1452 __ret.position = end();
1453 else
1454 {
1455 __glibcxx_assert(_M_get_Node_allocator() == *__nh._M_alloc);
1456
1457 auto __res = _M_get_insert_unique_pos(__nh._M_key());
1458 if (__res.second)
1459 {
1460 __ret.position
1461 = _M_insert_node(__res.first, __res.second, __nh._M_ptr);
1462 __nh._M_ptr = nullptr;
1463 __ret.inserted = true;
1464 }
1465 else
1466 {
1467 __ret.node = std::move(__nh);
1468 __ret.position = iterator(__res.first);
1469 __ret.inserted = false;
1470 }
1471 }
1472 return __ret;
1473 }
1474
1475 /// Re-insert an extracted node.
1476 iterator
1477 _M_reinsert_node_equal(node_type&& __nh)
1478 {
1479 iterator __ret;
1480 if (__nh.empty())
1481 __ret = end();
1482 else
1483 {
1484 __glibcxx_assert(_M_get_Node_allocator() == *__nh._M_alloc);
1485 auto __res = _M_get_insert_equal_pos(__nh._M_key());
1486 if (__res.second)
1487 __ret = _M_insert_node(__res.first, __res.second, __nh._M_ptr);
1488 else
1489 __ret = _M_insert_equal_lower_node(__nh._M_ptr);
1490 __nh._M_ptr = nullptr;
1491 }
1492 return __ret;
1493 }
1494
1495 /// Re-insert an extracted node.
1496 iterator
1497 _M_reinsert_node_hint_unique(const_iterator __hint, node_type&& __nh)
1498 {
1499 iterator __ret;
1500 if (__nh.empty())
1501 __ret = end();
1502 else
1503 {
1504 __glibcxx_assert(_M_get_Node_allocator() == *__nh._M_alloc);
1505 auto __res = _M_get_insert_hint_unique_pos(__hint, __nh._M_key());
1506 if (__res.second)
1507 {
1508 __ret = _M_insert_node(__res.first, __res.second, __nh._M_ptr);
1509 __nh._M_ptr = nullptr;
1510 }
1511 else
1512 __ret = iterator(__res.first);
1513 }
1514 return __ret;
1515 }
1516
1517 /// Re-insert an extracted node.
1518 iterator
1519 _M_reinsert_node_hint_equal(const_iterator __hint, node_type&& __nh)
1520 {
1521 iterator __ret;
1522 if (__nh.empty())
1523 __ret = end();
1524 else
1525 {
1526 __glibcxx_assert(_M_get_Node_allocator() == *__nh._M_alloc);
1527 auto __res = _M_get_insert_hint_equal_pos(__hint, __nh._M_key());
1528 if (__res.second)
1529 __ret = _M_insert_node(__res.first, __res.second, __nh._M_ptr);
1530 else
1531 __ret = _M_insert_equal_lower_node(__nh._M_ptr);
1532 __nh._M_ptr = nullptr;
1533 }
1534 return __ret;
1535 }
1536
1537 /// Extract a node.
1538 node_type
1539 extract(const_iterator __pos)
1540 {
1541 auto __ptr = _Rb_tree_rebalance_for_erase(
1542 __pos._M_const_cast()._M_node, _M_impl._M_header);
1543 --_M_impl._M_node_count;
1544 return { static_cast<_Link_type>(__ptr), _M_get_Node_allocator() };
1545 }
1546
1547 /// Extract a node.
1548 node_type
1549 extract(const key_type& __k)
1550 {
1551 node_type __nh;
1552 auto __pos = find(__k);
1553 if (__pos != end())
1554 __nh = extract(const_iterator(__pos));
1555 return __nh;
1556 }
1557
1558 template<typename _Compare2>
1559 using _Compatible_tree
1560 = _Rb_tree<_Key, _Val, _KeyOfValue, _Compare2, _Alloc>;
1561
1562 template<typename, typename>
1563 friend class _Rb_tree_merge_helper;
1564
1565 /// Merge from a compatible container into one with unique keys.
1566 template<typename _Compare2>
1567 void
1568 _M_merge_unique(_Compatible_tree<_Compare2>& __src) noexcept
1569 {
1570 using _Merge_helper = _Rb_tree_merge_helper<_Rb_tree, _Compare2>;
1571 for (auto __i = __src.begin(), __end = __src.end(); __i != __end;)
1572 {
1573 auto __pos = __i++;
1574 auto __res = _M_get_insert_unique_pos(_KeyOfValue()(*__pos));
1575 if (__res.second)
1576 {
1577 auto& __src_impl = _Merge_helper::_S_get_impl(__src);
1578 auto __ptr = _Rb_tree_rebalance_for_erase(
1579 __pos._M_node, __src_impl._M_header);
1580 --__src_impl._M_node_count;
1581 _M_insert_node(__res.first, __res.second,
1582 static_cast<_Link_type>(__ptr));
1583 }
1584 }
1585 }
1586
1587 /// Merge from a compatible container into one with equivalent keys.
1588 template<typename _Compare2>
1589 void
1590 _M_merge_equal(_Compatible_tree<_Compare2>& __src) noexcept
1591 {
1592 using _Merge_helper = _Rb_tree_merge_helper<_Rb_tree, _Compare2>;
1593 for (auto __i = __src.begin(), __end = __src.end(); __i != __end;)
1594 {
1595 auto __pos = __i++;
1596 auto __res = _M_get_insert_equal_pos(_KeyOfValue()(*__pos));
1597 if (__res.second)
1598 {
1599 auto& __src_impl = _Merge_helper::_S_get_impl(__src);
1600 auto __ptr = _Rb_tree_rebalance_for_erase(
1601 __pos._M_node, __src_impl._M_header);
1602 --__src_impl._M_node_count;
1603 _M_insert_node(__res.first, __res.second,
1604 static_cast<_Link_type>(__ptr));
1605 }
1606 }
1607 }
1608#endif // C++17
1609
1610 friend bool
1611 operator==(const _Rb_tree& __x, const _Rb_tree& __y)
1612 {
1613 return __x.size() == __y.size()
1614 && std::equal(__x.begin(), __x.end(), __y.begin());
1615 }
1616
1617#if __cpp_lib_three_way_comparison
1618 friend auto
1619 operator<=>(const _Rb_tree& __x, const _Rb_tree& __y)
1620 {
1621 if constexpr (requires { typename __detail::__synth3way_t<_Val>; })
1622 return std::lexicographical_compare_three_way(__x.begin(), __x.end(),
1623 __y.begin(), __y.end(),
1624 __detail::__synth3way);
1625 }
1626#else
1627 friend bool
1628 operator<(const _Rb_tree& __x, const _Rb_tree& __y)
1629 {
1630 return std::lexicographical_compare(__x.begin(), __x.end(),
1631 __y.begin(), __y.end());
1632 }
1633
1634 friend bool _GLIBCXX_DEPRECATED__attribute__ ((__deprecated__))
1635 operator!=(const _Rb_tree& __x, const _Rb_tree& __y)
1636 { return !(__x == __y); }
1637
1638 friend bool _GLIBCXX_DEPRECATED__attribute__ ((__deprecated__))
1639 operator>(const _Rb_tree& __x, const _Rb_tree& __y)
1640 { return __y < __x; }
1641
1642 friend bool _GLIBCXX_DEPRECATED__attribute__ ((__deprecated__))
1643 operator<=(const _Rb_tree& __x, const _Rb_tree& __y)
1644 { return !(__y < __x); }
1645
1646 friend bool _GLIBCXX_DEPRECATED__attribute__ ((__deprecated__))
1647 operator>=(const _Rb_tree& __x, const _Rb_tree& __y)
1648 { return !(__x < __y); }
1649#endif
1650 };
1651
1652 template<typename _Key, typename _Val, typename _KeyOfValue,
1653 typename _Compare, typename _Alloc>
1654 inline void
1655 swap(_Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>& __x,
1656 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>& __y)
1657 { __x.swap(__y); }
1658
1659#if __cplusplus201402L >= 201103L
1660 template<typename _Key, typename _Val, typename _KeyOfValue,
1661 typename _Compare, typename _Alloc>
1662 void
1663 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
1664 _M_move_data(_Rb_tree& __x, false_type)
1665 {
1666 if (_M_get_Node_allocator() == __x._M_get_Node_allocator())
1667 _M_move_data(__x, true_type());
1668 else
1669 {
1670 _Alloc_node __an(*this);
1671 auto __lbd =
1672 [&__an](const value_type& __cval)
1673 {
1674 auto& __val = const_cast<value_type&>(__cval);
1675 return __an(std::move_if_noexcept(__val));
1676 };
1677 _M_root() = _M_copy(__x, __lbd);
1678 }
1679 }
1680
1681 template<typename _Key, typename _Val, typename _KeyOfValue,
1682 typename _Compare, typename _Alloc>
1683 inline void
1684 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
1685 _M_move_assign(_Rb_tree& __x, true_type)
1686 {
1687 clear();
1688 if (__x._M_root() != nullptr)
1689 _M_move_data(__x, true_type());
1690 std::__alloc_on_move(_M_get_Node_allocator(),
1691 __x._M_get_Node_allocator());
1692 }
1693
1694 template<typename _Key, typename _Val, typename _KeyOfValue,
1695 typename _Compare, typename _Alloc>
1696 void
1697 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
1698 _M_move_assign(_Rb_tree& __x, false_type)
1699 {
1700 if (_M_get_Node_allocator() == __x._M_get_Node_allocator())
1701 return _M_move_assign(__x, true_type{});
1702
1703 // Try to move each node reusing existing nodes and copying __x nodes
1704 // structure.
1705 _Reuse_or_alloc_node __roan(*this);
1706 _M_impl._M_reset();
1707 if (__x._M_root() != nullptr)
1708 {
1709 auto __lbd =
1710 [&__roan](const value_type& __cval)
1711 {
1712 auto& __val = const_cast<value_type&>(__cval);
1713 return __roan(std::move(__val));
1714 };
1715 _M_root() = _M_copy(__x, __lbd);
1716 __x.clear();
1717 }
1718 }
1719
1720 template<typename _Key, typename _Val, typename _KeyOfValue,
1721 typename _Compare, typename _Alloc>
1722 inline _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>&
1723 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
1724 operator=(_Rb_tree&& __x)
1725 noexcept(_Alloc_traits::_S_nothrow_move()
1726 && is_nothrow_move_assignable<_Compare>::value)
1727 {
1728 _M_impl._M_key_compare = std::move(__x._M_impl._M_key_compare);
1729 _M_move_assign(__x, __bool_constant<_Alloc_traits::_S_nothrow_move()>());
1730 return *this;
1731 }
1732
1733 template<typename _Key, typename _Val, typename _KeyOfValue,
1734 typename _Compare, typename _Alloc>
1735 template<typename _Iterator>
1736 void
1737 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
1738 _M_assign_unique(_Iterator __first, _Iterator __last)
1739 {
1740 _Reuse_or_alloc_node __roan(*this);
1741 _M_impl._M_reset();
1742 for (; __first != __last; ++__first)
1743 _M_insert_unique_(end(), *__first, __roan);
1744 }
1745
1746 template<typename _Key, typename _Val, typename _KeyOfValue,
1747 typename _Compare, typename _Alloc>
1748 template<typename _Iterator>
1749 void
1750 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
1751 _M_assign_equal(_Iterator __first, _Iterator __last)
1752 {
1753 _Reuse_or_alloc_node __roan(*this);
1754 _M_impl._M_reset();
1755 for (; __first != __last; ++__first)
1756 _M_insert_equal_(end(), *__first, __roan);
1757 }
1758#endif
1759
1760 template<typename _Key, typename _Val, typename _KeyOfValue,
1761 typename _Compare, typename _Alloc>
1762 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>&
1763 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
1764 operator=(const _Rb_tree& __x)
1765 {
1766 if (this != &__x)
1767 {
1768 // Note that _Key may be a constant type.
1769#if __cplusplus201402L >= 201103L
1770 if (_Alloc_traits::_S_propagate_on_copy_assign())
1771 {
1772 auto& __this_alloc = this->_M_get_Node_allocator();
1773 auto& __that_alloc = __x._M_get_Node_allocator();
1774 if (!_Alloc_traits::_S_always_equal()
1775 && __this_alloc != __that_alloc)
1776 {
1777 // Replacement allocator cannot free existing storage, we need
1778 // to erase nodes first.
1779 clear();
1780 std::__alloc_on_copy(__this_alloc, __that_alloc);
1781 }
1782 }
1783#endif
1784
1785 _Reuse_or_alloc_node __roan(*this);
1786 _M_impl._M_reset();
1787 _M_impl._M_key_compare = __x._M_impl._M_key_compare;
1788 if (__x._M_root() != 0)
1789 _M_root() = _M_copy(__x, __roan);
1790 }
1791
1792 return *this;
1793 }
1794
1795 template<typename _Key, typename _Val, typename _KeyOfValue,
1796 typename _Compare, typename _Alloc>
1797#if __cplusplus201402L >= 201103L
1798 template<typename _Arg, typename _NodeGen>
1799#else
1800 template<typename _NodeGen>
1801#endif
1802 typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator
1803 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
1804 _M_insert_(_Base_ptr __x, _Base_ptr __p,
1805#if __cplusplus201402L >= 201103L
1806 _Arg&& __v,
1807#else
1808 const _Val& __v,
1809#endif
1810 _NodeGen& __node_gen)
1811 {
1812 bool __insert_left = (__x != 0 || __p == _M_end()
1813 || _M_impl._M_key_compare(_KeyOfValue()(__v),
1814 _S_key(__p)));
1815
1816 _Link_type __z = __node_gen(_GLIBCXX_FORWARD(_Arg, __v)std::forward<_Arg>(__v));
1817
1818 _Rb_tree_insert_and_rebalance(__insert_left, __z, __p,
1819 this->_M_impl._M_header);
1820 ++_M_impl._M_node_count;
1821 return iterator(__z);
1822 }
1823
1824 template<typename _Key, typename _Val, typename _KeyOfValue,
1825 typename _Compare, typename _Alloc>
1826#if __cplusplus201402L >= 201103L
1827 template<typename _Arg>
1828#endif
1829 typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator
1830 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
1831#if __cplusplus201402L >= 201103L
1832 _M_insert_lower(_Base_ptr __p, _Arg&& __v)
1833#else
1834 _M_insert_lower(_Base_ptr __p, const _Val& __v)
1835#endif
1836 {
1837 bool __insert_left = (__p == _M_end()
1838 || !_M_impl._M_key_compare(_S_key(__p),
1839 _KeyOfValue()(__v)));
1840
1841 _Link_type __z = _M_create_node(_GLIBCXX_FORWARD(_Arg, __v)std::forward<_Arg>(__v));
1842
1843 _Rb_tree_insert_and_rebalance(__insert_left, __z, __p,
1844 this->_M_impl._M_header);
1845 ++_M_impl._M_node_count;
1846 return iterator(__z);
1847 }
1848
1849 template<typename _Key, typename _Val, typename _KeyOfValue,
1850 typename _Compare, typename _Alloc>
1851#if __cplusplus201402L >= 201103L
1852 template<typename _Arg>
1853#endif
1854 typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator
1855 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
1856#if __cplusplus201402L >= 201103L
1857 _M_insert_equal_lower(_Arg&& __v)
1858#else
1859 _M_insert_equal_lower(const _Val& __v)
1860#endif
1861 {
1862 _Link_type __x = _M_begin();
1863 _Base_ptr __y = _M_end();
1864 while (__x != 0)
1865 {
1866 __y = __x;
1867 __x = !_M_impl._M_key_compare(_S_key(__x), _KeyOfValue()(__v)) ?
1868 _S_left(__x) : _S_right(__x);
1869 }
1870 return _M_insert_lower(__y, _GLIBCXX_FORWARD(_Arg, __v)std::forward<_Arg>(__v));
1871 }
1872
1873 template<typename _Key, typename _Val, typename _KoV,
1874 typename _Compare, typename _Alloc>
1875 template<typename _NodeGen>
1876 typename _Rb_tree<_Key, _Val, _KoV, _Compare, _Alloc>::_Link_type
1877 _Rb_tree<_Key, _Val, _KoV, _Compare, _Alloc>::
1878 _M_copy(_Const_Link_type __x, _Base_ptr __p, _NodeGen& __node_gen)
1879 {
1880 // Structural copy. __x and __p must be non-null.
1881 _Link_type __top = _M_clone_node(__x, __node_gen);
1882 __top->_M_parent = __p;
1883
1884 __tryif (true)
1885 {
1886 if (__x->_M_right)
1887 __top->_M_right = _M_copy(_S_right(__x), __top, __node_gen);
1888 __p = __top;
1889 __x = _S_left(__x);
1890
1891 while (__x != 0)
1892 {
1893 _Link_type __y = _M_clone_node(__x, __node_gen);
1894 __p->_M_left = __y;
1895 __y->_M_parent = __p;
1896 if (__x->_M_right)
1897 __y->_M_right = _M_copy(_S_right(__x), __y, __node_gen);
1898 __p = __y;
1899 __x = _S_left(__x);
1900 }
1901 }
1902 __catch(...)if (false)
1903 {
1904 _M_erase(__top);
1905 __throw_exception_again;
1906 }
1907 return __top;
1908 }
1909
1910 template<typename _Key, typename _Val, typename _KeyOfValue,
1911 typename _Compare, typename _Alloc>
1912 void
1913 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
1914 _M_erase(_Link_type __x)
1915 {
1916 // Erase without rebalancing.
1917 while (__x != 0)
1918 {
1919 _M_erase(_S_right(__x));
1920 _Link_type __y = _S_left(__x);
1921 _M_drop_node(__x);
1922 __x = __y;
1923 }
1924 }
1925
1926 template<typename _Key, typename _Val, typename _KeyOfValue,
1927 typename _Compare, typename _Alloc>
1928 typename _Rb_tree<_Key, _Val, _KeyOfValue,
1929 _Compare, _Alloc>::iterator
1930 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
1931 _M_lower_bound(_Link_type __x, _Base_ptr __y,
1932 const _Key& __k)
1933 {
1934 while (__x != 0)
1935 if (!_M_impl._M_key_compare(_S_key(__x), __k))
1936 __y = __x, __x = _S_left(__x);
1937 else
1938 __x = _S_right(__x);
1939 return iterator(__y);
1940 }
1941
1942 template<typename _Key, typename _Val, typename _KeyOfValue,
1943 typename _Compare, typename _Alloc>
1944 typename _Rb_tree<_Key, _Val, _KeyOfValue,
1945 _Compare, _Alloc>::const_iterator
1946 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
1947 _M_lower_bound(_Const_Link_type __x, _Const_Base_ptr __y,
1948 const _Key& __k) const
1949 {
1950 while (__x != 0)
1951 if (!_M_impl._M_key_compare(_S_key(__x), __k))
1952 __y = __x, __x = _S_left(__x);
1953 else
1954 __x = _S_right(__x);
1955 return const_iterator(__y);
1956 }
1957
1958 template<typename _Key, typename _Val, typename _KeyOfValue,
1959 typename _Compare, typename _Alloc>
1960 typename _Rb_tree<_Key, _Val, _KeyOfValue,
1961 _Compare, _Alloc>::iterator
1962 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
1963 _M_upper_bound(_Link_type __x, _Base_ptr __y,
1964 const _Key& __k)
1965 {
1966 while (__x != 0)
1967 if (_M_impl._M_key_compare(__k, _S_key(__x)))
1968 __y = __x, __x = _S_left(__x);
1969 else
1970 __x = _S_right(__x);
1971 return iterator(__y);
1972 }
1973
1974 template<typename _Key, typename _Val, typename _KeyOfValue,
1975 typename _Compare, typename _Alloc>
1976 typename _Rb_tree<_Key, _Val, _KeyOfValue,
1977 _Compare, _Alloc>::const_iterator
1978 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
1979 _M_upper_bound(_Const_Link_type __x, _Const_Base_ptr __y,
1980 const _Key& __k) const
1981 {
1982 while (__x != 0)
1983 if (_M_impl._M_key_compare(__k, _S_key(__x)))
1984 __y = __x, __x = _S_left(__x);
1985 else
1986 __x = _S_right(__x);
1987 return const_iterator(__y);
1988 }
1989
1990 template<typename _Key, typename _Val, typename _KeyOfValue,
1991 typename _Compare, typename _Alloc>
1992 pair<typename _Rb_tree<_Key, _Val, _KeyOfValue,
1993 _Compare, _Alloc>::iterator,
1994 typename _Rb_tree<_Key, _Val, _KeyOfValue,
1995 _Compare, _Alloc>::iterator>
1996 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
1997 equal_range(const _Key& __k)
1998 {
1999 _Link_type __x = _M_begin();
2000 _Base_ptr __y = _M_end();
2001 while (__x != 0)
2002 {
2003 if (_M_impl._M_key_compare(_S_key(__x), __k))
2004 __x = _S_right(__x);
2005 else if (_M_impl._M_key_compare(__k, _S_key(__x)))
2006 __y = __x, __x = _S_left(__x);
2007 else
2008 {
2009 _Link_type __xu(__x);
2010 _Base_ptr __yu(__y);
2011 __y = __x, __x = _S_left(__x);
2012 __xu = _S_right(__xu);
2013 return pair<iterator,
2014 iterator>(_M_lower_bound(__x, __y, __k),
2015 _M_upper_bound(__xu, __yu, __k));
2016 }
2017 }
2018 return pair<iterator, iterator>(iterator(__y),
2019 iterator(__y));
2020 }
2021
2022 template<typename _Key, typename _Val, typename _KeyOfValue,
2023 typename _Compare, typename _Alloc>
2024 pair<typename _Rb_tree<_Key, _Val, _KeyOfValue,
2025 _Compare, _Alloc>::const_iterator,
2026 typename _Rb_tree<_Key, _Val, _KeyOfValue,
2027 _Compare, _Alloc>::const_iterator>
2028 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2029 equal_range(const _Key& __k) const
2030 {
2031 _Const_Link_type __x = _M_begin();
2032 _Const_Base_ptr __y = _M_end();
2033 while (__x != 0)
2034 {
2035 if (_M_impl._M_key_compare(_S_key(__x), __k))
2036 __x = _S_right(__x);
2037 else if (_M_impl._M_key_compare(__k, _S_key(__x)))
2038 __y = __x, __x = _S_left(__x);
2039 else
2040 {
2041 _Const_Link_type __xu(__x);
2042 _Const_Base_ptr __yu(__y);
2043 __y = __x, __x = _S_left(__x);
2044 __xu = _S_right(__xu);
2045 return pair<const_iterator,
2046 const_iterator>(_M_lower_bound(__x, __y, __k),
2047 _M_upper_bound(__xu, __yu, __k));
2048 }
2049 }
2050 return pair<const_iterator, const_iterator>(const_iterator(__y),
2051 const_iterator(__y));
2052 }
2053
2054 template<typename _Key, typename _Val, typename _KeyOfValue,
2055 typename _Compare, typename _Alloc>
2056 void
2057 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2058 swap(_Rb_tree& __t)
2059 _GLIBCXX_NOEXCEPT_IF(__is_nothrow_swappable<_Compare>::value)noexcept(__is_nothrow_swappable<_Compare>::value)
2060 {
2061 if (_M_root() == 0)
2062 {
2063 if (__t._M_root() != 0)
2064 _M_impl._M_move_data(__t._M_impl);
2065 }
2066 else if (__t._M_root() == 0)
2067 __t._M_impl._M_move_data(_M_impl);
2068 else
2069 {
2070 std::swap(_M_root(),__t._M_root());
2071 std::swap(_M_leftmost(),__t._M_leftmost());
2072 std::swap(_M_rightmost(),__t._M_rightmost());
2073
2074 _M_root()->_M_parent = _M_end();
2075 __t._M_root()->_M_parent = __t._M_end();
2076 std::swap(this->_M_impl._M_node_count, __t._M_impl._M_node_count);
2077 }
2078 // No need to swap header's color as it does not change.
2079 std::swap(this->_M_impl._M_key_compare, __t._M_impl._M_key_compare);
2080
2081 _Alloc_traits::_S_on_swap(_M_get_Node_allocator(),
2082 __t._M_get_Node_allocator());
2083 }
2084
2085 template<typename _Key, typename _Val, typename _KeyOfValue,
2086 typename _Compare, typename _Alloc>
2087 pair<typename _Rb_tree<_Key, _Val, _KeyOfValue,
2088 _Compare, _Alloc>::_Base_ptr,
2089 typename _Rb_tree<_Key, _Val, _KeyOfValue,
2090 _Compare, _Alloc>::_Base_ptr>
2091 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2092 _M_get_insert_unique_pos(const key_type& __k)
2093 {
2094 typedef pair<_Base_ptr, _Base_ptr> _Res;
2095 _Link_type __x = _M_begin();
2096 _Base_ptr __y = _M_end();
2097 bool __comp = true;
2098 while (__x != 0)
2099 {
2100 __y = __x;
2101 __comp = _M_impl._M_key_compare(__k, _S_key(__x));
2102 __x = __comp ? _S_left(__x) : _S_right(__x);
2103 }
2104 iterator __j = iterator(__y);
2105 if (__comp)
2106 {
2107 if (__j == begin())
2108 return _Res(__x, __y);
2109 else
2110 --__j;
2111 }
2112 if (_M_impl._M_key_compare(_S_key(__j._M_node), __k))
2113 return _Res(__x, __y);
2114 return _Res(__j._M_node, 0);
2115 }
2116
2117 template<typename _Key, typename _Val, typename _KeyOfValue,
2118 typename _Compare, typename _Alloc>
2119 pair<typename _Rb_tree<_Key, _Val, _KeyOfValue,
2120 _Compare, _Alloc>::_Base_ptr,
2121 typename _Rb_tree<_Key, _Val, _KeyOfValue,
2122 _Compare, _Alloc>::_Base_ptr>
2123 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2124 _M_get_insert_equal_pos(const key_type& __k)
2125 {
2126 typedef pair<_Base_ptr, _Base_ptr> _Res;
2127 _Link_type __x = _M_begin();
2128 _Base_ptr __y = _M_end();
2129 while (__x != 0)
2130 {
2131 __y = __x;
2132 __x = _M_impl._M_key_compare(__k, _S_key(__x)) ?
2133 _S_left(__x) : _S_right(__x);
2134 }
2135 return _Res(__x, __y);
2136 }
2137
2138 template<typename _Key, typename _Val, typename _KeyOfValue,
2139 typename _Compare, typename _Alloc>
2140#if __cplusplus201402L >= 201103L
2141 template<typename _Arg>
2142#endif
2143 pair<typename _Rb_tree<_Key, _Val, _KeyOfValue,
2144 _Compare, _Alloc>::iterator, bool>
2145 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2146#if __cplusplus201402L >= 201103L
2147 _M_insert_unique(_Arg&& __v)
2148#else
2149 _M_insert_unique(const _Val& __v)
2150#endif
2151 {
2152 typedef pair<iterator, bool> _Res;
2153 pair<_Base_ptr, _Base_ptr> __res
2154 = _M_get_insert_unique_pos(_KeyOfValue()(__v));
2155
2156 if (__res.second)
2157 {
2158 _Alloc_node __an(*this);
2159 return _Res(_M_insert_(__res.first, __res.second,
2160 _GLIBCXX_FORWARD(_Arg, __v)std::forward<_Arg>(__v), __an),
2161 true);
2162 }
2163
2164 return _Res(iterator(__res.first), false);
2165 }
2166
2167 template<typename _Key, typename _Val, typename _KeyOfValue,
2168 typename _Compare, typename _Alloc>
2169#if __cplusplus201402L >= 201103L
2170 template<typename _Arg>
2171#endif
2172 typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator
2173 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2174#if __cplusplus201402L >= 201103L
2175 _M_insert_equal(_Arg&& __v)
2176#else
2177 _M_insert_equal(const _Val& __v)
2178#endif
2179 {
2180 pair<_Base_ptr, _Base_ptr> __res
2181 = _M_get_insert_equal_pos(_KeyOfValue()(__v));
2182 _Alloc_node __an(*this);
2183 return _M_insert_(__res.first, __res.second,
2184 _GLIBCXX_FORWARD(_Arg, __v)std::forward<_Arg>(__v), __an);
2185 }
2186
2187 template<typename _Key, typename _Val, typename _KeyOfValue,
2188 typename _Compare, typename _Alloc>
2189 pair<typename _Rb_tree<_Key, _Val, _KeyOfValue,
2190 _Compare, _Alloc>::_Base_ptr,
2191 typename _Rb_tree<_Key, _Val, _KeyOfValue,
2192 _Compare, _Alloc>::_Base_ptr>
2193 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2194 _M_get_insert_hint_unique_pos(const_iterator __position,
2195 const key_type& __k)
2196 {
2197 iterator __pos = __position._M_const_cast();
2198 typedef pair<_Base_ptr, _Base_ptr> _Res;
2199
2200 // end()
2201 if (__pos._M_node == _M_end())
2202 {
2203 if (size() > 0
2204 && _M_impl._M_key_compare(_S_key(_M_rightmost()), __k))
2205 return _Res(0, _M_rightmost());
2206 else
2207 return _M_get_insert_unique_pos(__k);
2208 }
2209 else if (_M_impl._M_key_compare(__k, _S_key(__pos._M_node)))
2210 {
2211 // First, try before...
2212 iterator __before = __pos;
2213 if (__pos._M_node == _M_leftmost()) // begin()
2214 return _Res(_M_leftmost(), _M_leftmost());
2215 else if (_M_impl._M_key_compare(_S_key((--__before)._M_node), __k))
2216 {
2217 if (_S_right(__before._M_node) == 0)
2218 return _Res(0, __before._M_node);
2219 else
2220 return _Res(__pos._M_node, __pos._M_node);
2221 }
2222 else
2223 return _M_get_insert_unique_pos(__k);
2224 }
2225 else if (_M_impl._M_key_compare(_S_key(__pos._M_node), __k))
2226 {
2227 // ... then try after.
2228 iterator __after = __pos;
2229 if (__pos._M_node == _M_rightmost())
2230 return _Res(0, _M_rightmost());
2231 else if (_M_impl._M_key_compare(__k, _S_key((++__after)._M_node)))
2232 {
2233 if (_S_right(__pos._M_node) == 0)
2234 return _Res(0, __pos._M_node);
2235 else
2236 return _Res(__after._M_node, __after._M_node);
2237 }
2238 else
2239 return _M_get_insert_unique_pos(__k);
2240 }
2241 else
2242 // Equivalent keys.
2243 return _Res(__pos._M_node, 0);
2244 }
2245
2246 template<typename _Key, typename _Val, typename _KeyOfValue,
2247 typename _Compare, typename _Alloc>
2248#if __cplusplus201402L >= 201103L
2249 template<typename _Arg, typename _NodeGen>
2250#else
2251 template<typename _NodeGen>
2252#endif
2253 typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator
2254 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2255 _M_insert_unique_(const_iterator __position,
2256#if __cplusplus201402L >= 201103L
2257 _Arg&& __v,
2258#else
2259 const _Val& __v,
2260#endif
2261 _NodeGen& __node_gen)
2262 {
2263 pair<_Base_ptr, _Base_ptr> __res
2264 = _M_get_insert_hint_unique_pos(__position, _KeyOfValue()(__v));
2265
2266 if (__res.second)
2267 return _M_insert_(__res.first, __res.second,
2268 _GLIBCXX_FORWARD(_Arg, __v)std::forward<_Arg>(__v),
2269 __node_gen);
2270 return iterator(__res.first);
2271 }
2272
2273 template<typename _Key, typename _Val, typename _KeyOfValue,
2274 typename _Compare, typename _Alloc>
2275 pair<typename _Rb_tree<_Key, _Val, _KeyOfValue,
2276 _Compare, _Alloc>::_Base_ptr,
2277 typename _Rb_tree<_Key, _Val, _KeyOfValue,
2278 _Compare, _Alloc>::_Base_ptr>
2279 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2280 _M_get_insert_hint_equal_pos(const_iterator __position, const key_type& __k)
2281 {
2282 iterator __pos = __position._M_const_cast();
2283 typedef pair<_Base_ptr, _Base_ptr> _Res;
2284
2285 // end()
2286 if (__pos._M_node == _M_end())
2287 {
2288 if (size() > 0
2289 && !_M_impl._M_key_compare(__k, _S_key(_M_rightmost())))
2290 return _Res(0, _M_rightmost());
2291 else
2292 return _M_get_insert_equal_pos(__k);
2293 }
2294 else if (!_M_impl._M_key_compare(_S_key(__pos._M_node), __k))
2295 {
2296 // First, try before...
2297 iterator __before = __pos;
2298 if (__pos._M_node == _M_leftmost()) // begin()
2299 return _Res(_M_leftmost(), _M_leftmost());
2300 else if (!_M_impl._M_key_compare(__k, _S_key((--__before)._M_node)))
2301 {
2302 if (_S_right(__before._M_node) == 0)
2303 return _Res(0, __before._M_node);
2304 else
2305 return _Res(__pos._M_node, __pos._M_node);
2306 }
2307 else
2308 return _M_get_insert_equal_pos(__k);
2309 }
2310 else
2311 {
2312 // ... then try after.
2313 iterator __after = __pos;
2314 if (__pos._M_node == _M_rightmost())
2315 return _Res(0, _M_rightmost());
2316 else if (!_M_impl._M_key_compare(_S_key((++__after)._M_node), __k))
2317 {
2318 if (_S_right(__pos._M_node) == 0)
2319 return _Res(0, __pos._M_node);
2320 else
2321 return _Res(__after._M_node, __after._M_node);
2322 }
2323 else
2324 return _Res(0, 0);
2325 }
2326 }
2327
2328 template<typename _Key, typename _Val, typename _KeyOfValue,
2329 typename _Compare, typename _Alloc>
2330#if __cplusplus201402L >= 201103L
2331 template<typename _Arg, typename _NodeGen>
2332#else
2333 template<typename _NodeGen>
2334#endif
2335 typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator
2336 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2337 _M_insert_equal_(const_iterator __position,
2338#if __cplusplus201402L >= 201103L
2339 _Arg&& __v,
2340#else
2341 const _Val& __v,
2342#endif
2343 _NodeGen& __node_gen)
2344 {
2345 pair<_Base_ptr, _Base_ptr> __res
2346 = _M_get_insert_hint_equal_pos(__position, _KeyOfValue()(__v));
2347
2348 if (__res.second)
2349 return _M_insert_(__res.first, __res.second,
2350 _GLIBCXX_FORWARD(_Arg, __v)std::forward<_Arg>(__v),
2351 __node_gen);
2352
2353 return _M_insert_equal_lower(_GLIBCXX_FORWARD(_Arg, __v)std::forward<_Arg>(__v));
2354 }
2355
2356#if __cplusplus201402L >= 201103L
2357 template<typename _Key, typename _Val, typename _KeyOfValue,
2358 typename _Compare, typename _Alloc>
2359 typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator
2360 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2361 _M_insert_node(_Base_ptr __x, _Base_ptr __p, _Link_type __z)
2362 {
2363 bool __insert_left = (__x != 0 || __p == _M_end()
2364 || _M_impl._M_key_compare(_S_key(__z),
2365 _S_key(__p)));
2366
2367 _Rb_tree_insert_and_rebalance(__insert_left, __z, __p,
2368 this->_M_impl._M_header);
2369 ++_M_impl._M_node_count;
2370 return iterator(__z);
2371 }
2372
2373 template<typename _Key, typename _Val, typename _KeyOfValue,
2374 typename _Compare, typename _Alloc>
2375 typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator
2376 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2377 _M_insert_lower_node(_Base_ptr __p, _Link_type __z)
2378 {
2379 bool __insert_left = (__p == _M_end()
2380 || !_M_impl._M_key_compare(_S_key(__p),
2381 _S_key(__z)));
2382
2383 _Rb_tree_insert_and_rebalance(__insert_left, __z, __p,
2384 this->_M_impl._M_header);
2385 ++_M_impl._M_node_count;
2386 return iterator(__z);
2387 }
2388
2389 template<typename _Key, typename _Val, typename _KeyOfValue,
2390 typename _Compare, typename _Alloc>
2391 typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator
2392 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2393 _M_insert_equal_lower_node(_Link_type __z)
2394 {
2395 _Link_type __x = _M_begin();
2396 _Base_ptr __y = _M_end();
2397 while (__x != 0)
2398 {
2399 __y = __x;
2400 __x = !_M_impl._M_key_compare(_S_key(__x), _S_key(__z)) ?
2401 _S_left(__x) : _S_right(__x);
2402 }
2403 return _M_insert_lower_node(__y, __z);
2404 }
2405
2406 template<typename _Key, typename _Val, typename _KeyOfValue,
2407 typename _Compare, typename _Alloc>
2408 template<typename... _Args>
2409 pair<typename _Rb_tree<_Key, _Val, _KeyOfValue,
2410 _Compare, _Alloc>::iterator, bool>
2411 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2412 _M_emplace_unique(_Args&&... __args)
2413 {
2414 _Link_type __z = _M_create_node(std::forward<_Args>(__args)...);
2415
2416 __tryif (true)
2417 {
2418 typedef pair<iterator, bool> _Res;
2419 auto __res = _M_get_insert_unique_pos(_S_key(__z));
2420 if (__res.second)
2421 return _Res(_M_insert_node(__res.first, __res.second, __z), true);
2422
2423 _M_drop_node(__z);
2424 return _Res(iterator(__res.first), false);
2425 }
2426 __catch(...)if (false)
2427 {
2428 _M_drop_node(__z);
2429 __throw_exception_again;
2430 }
2431 }
2432
2433 template<typename _Key, typename _Val, typename _KeyOfValue,
2434 typename _Compare, typename _Alloc>
2435 template<typename... _Args>
2436 typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator
2437 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2438 _M_emplace_equal(_Args&&... __args)
2439 {
2440 _Link_type __z = _M_create_node(std::forward<_Args>(__args)...);
2441
2442 __tryif (true)
2443 {
2444 auto __res = _M_get_insert_equal_pos(_S_key(__z));
2445 return _M_insert_node(__res.first, __res.second, __z);
2446 }
2447 __catch(...)if (false)
2448 {
2449 _M_drop_node(__z);
2450 __throw_exception_again;
2451 }
2452 }
2453
2454 template<typename _Key, typename _Val, typename _KeyOfValue,
2455 typename _Compare, typename _Alloc>
2456 template<typename... _Args>
2457 typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator
2458 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2459 _M_emplace_hint_unique(const_iterator __pos, _Args&&... __args)
2460 {
2461 _Link_type __z = _M_create_node(std::forward<_Args>(__args)...);
2462
2463 __tryif (true)
2464 {
2465 auto __res = _M_get_insert_hint_unique_pos(__pos, _S_key(__z));
2466
2467 if (__res.second)
2468 return _M_insert_node(__res.first, __res.second, __z);
2469
2470 _M_drop_node(__z);
2471 return iterator(__res.first);
2472 }
2473 __catch(...)if (false)
2474 {
2475 _M_drop_node(__z);
2476 __throw_exception_again;
2477 }
2478 }
2479
2480 template<typename _Key, typename _Val, typename _KeyOfValue,
2481 typename _Compare, typename _Alloc>
2482 template<typename... _Args>
2483 typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::iterator
2484 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2485 _M_emplace_hint_equal(const_iterator __pos, _Args&&... __args)
2486 {
2487 _Link_type __z = _M_create_node(std::forward<_Args>(__args)...);
2488
2489 __tryif (true)
2490 {
2491 auto __res = _M_get_insert_hint_equal_pos(__pos, _S_key(__z));
2492
2493 if (__res.second)
2494 return _M_insert_node(__res.first, __res.second, __z);
2495
2496 return _M_insert_equal_lower_node(__z);
2497 }
2498 __catch(...)if (false)
2499 {
2500 _M_drop_node(__z);
2501 __throw_exception_again;
2502 }
2503 }
2504#endif
2505
2506
2507 template<typename _Key, typename _Val, typename _KeyOfValue,
2508 typename _Compare, typename _Alloc>
2509 void
2510 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2511 _M_erase_aux(const_iterator __position)
2512 {
2513 _Link_type __y =
2514 static_cast<_Link_type>(_Rb_tree_rebalance_for_erase
2515 (const_cast<_Base_ptr>(__position._M_node),
2516 this->_M_impl._M_header));
2517 _M_drop_node(__y);
2518 --_M_impl._M_node_count;
2519 }
2520
2521 template<typename _Key, typename _Val, typename _KeyOfValue,
2522 typename _Compare, typename _Alloc>
2523 void
2524 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2525 _M_erase_aux(const_iterator __first, const_iterator __last)
2526 {
2527 if (__first == begin() && __last == end())
2528 clear();
2529 else
2530 while (__first != __last)
2531 _M_erase_aux(__first++);
2532 }
2533
2534 template<typename _Key, typename _Val, typename _KeyOfValue,
2535 typename _Compare, typename _Alloc>
2536 typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::size_type
2537 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2538 erase(const _Key& __x)
2539 {
2540 pair<iterator, iterator> __p = equal_range(__x);
2541 const size_type __old_size = size();
2542 _M_erase_aux(__p.first, __p.second);
2543 return __old_size - size();
2544 }
2545
2546 template<typename _Key, typename _Val, typename _KeyOfValue,
2547 typename _Compare, typename _Alloc>
2548 typename _Rb_tree<_Key, _Val, _KeyOfValue,
2549 _Compare, _Alloc>::iterator
2550 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2551 find(const _Key& __k)
2552 {
2553 iterator __j = _M_lower_bound(_M_begin(), _M_end(), __k);
2554 return (__j == end()
2555 || _M_impl._M_key_compare(__k,
2556 _S_key(__j._M_node))) ? end() : __j;
2557 }
2558
2559 template<typename _Key, typename _Val, typename _KeyOfValue,
2560 typename _Compare, typename _Alloc>
2561 typename _Rb_tree<_Key, _Val, _KeyOfValue,
2562 _Compare, _Alloc>::const_iterator
2563 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2564 find(const _Key& __k) const
2565 {
2566 const_iterator __j = _M_lower_bound(_M_begin(), _M_end(), __k);
2567 return (__j == end()
2568 || _M_impl._M_key_compare(__k,
2569 _S_key(__j._M_node))) ? end() : __j;
2570 }
2571
2572 template<typename _Key, typename _Val, typename _KeyOfValue,
2573 typename _Compare, typename _Alloc>
2574 typename _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::size_type
2575 _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
2576 count(const _Key& __k) const
2577 {
2578 pair<const_iterator, const_iterator> __p = equal_range(__k);
2579 const size_type __n = std::distance(__p.first, __p.second);
2580 return __n;
2581 }
2582
2583 _GLIBCXX_PURE__attribute__ ((__pure__)) unsigned int
2584 _Rb_tree_black_count(const _Rb_tree_node_base* __node,
2585 const _Rb_tree_node_base* __root) throw ();
2586
2587 template<typename _Key, typename _Val, typename _KeyOfValue,
2588 typename _Compare, typename _Alloc>
2589 bool
2590 _Rb_tree<_Key,_Val,_KeyOfValue,_Compare,_Alloc>::__rb_verify() const
2591 {
2592 if (_M_impl._M_node_count == 0 || begin() == end())
2593 return _M_impl._M_node_count == 0 && begin() == end()
2594 && this->_M_impl._M_header._M_left == _M_end()
2595 && this->_M_impl._M_header._M_right == _M_end();
2596
2597 unsigned int __len = _Rb_tree_black_count(_M_leftmost(), _M_root());
2598 for (const_iterator __it = begin(); __it != end(); ++__it)
2599 {
2600 _Const_Link_type __x = static_cast<_Const_Link_type>(__it._M_node);
2601 _Const_Link_type __L = _S_left(__x);
2602 _Const_Link_type __R = _S_right(__x);
2603
2604 if (__x->_M_color == _S_red)
2605 if ((__L && __L->_M_color == _S_red)
2606 || (__R && __R->_M_color == _S_red))
2607 return false;
2608
2609 if (__L && _M_impl._M_key_compare(_S_key(__x), _S_key(__L)))
2610 return false;
2611 if (__R && _M_impl._M_key_compare(_S_key(__R), _S_key(__x)))
2612 return false;
2613
2614 if (!__L && !__R && _Rb_tree_black_count(__x, _M_root()) != __len)
2615 return false;
2616 }
2617
2618 if (_M_leftmost() != _Rb_tree_node_base::_S_minimum(_M_root()))
2619 return false;
2620 if (_M_rightmost() != _Rb_tree_node_base::_S_maximum(_M_root()))
2621 return false;
2622 return true;
2623 }
2624
2625#if __cplusplus201402L > 201402L
2626 // Allow access to internals of compatible _Rb_tree specializations.
2627 template<typename _Key, typename _Val, typename _Sel, typename _Cmp1,
2628 typename _Alloc, typename _Cmp2>
2629 struct _Rb_tree_merge_helper<_Rb_tree<_Key, _Val, _Sel, _Cmp1, _Alloc>,
2630 _Cmp2>
2631 {
2632 private:
2633 friend class _Rb_tree<_Key, _Val, _Sel, _Cmp1, _Alloc>;
2634
2635 static auto&
2636 _S_get_impl(_Rb_tree<_Key, _Val, _Sel, _Cmp2, _Alloc>& __tree)
2637 { return __tree._M_impl; }
2638 };
2639#endif // C++17
2640
2641_GLIBCXX_END_NAMESPACE_VERSION
2642} // namespace
2643
2644#endif