LLVM 19.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
14#include "SIDefines.h"
15#include "SIInstrInfo.h"
16#include "SIRegisterInfo.h"
21#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/StringSet.h"
24#include "llvm/ADT/Twine.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCContext.h"
29#include "llvm/MC/MCExpr.h"
30#include "llvm/MC/MCInst.h"
31#include "llvm/MC/MCInstrDesc.h"
36#include "llvm/MC/MCSymbol.h"
43#include <optional>
44
45using namespace llvm;
46using namespace llvm::AMDGPU;
47using namespace llvm::amdhsa;
48
49namespace {
50
51class AMDGPUAsmParser;
52
53enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
54
55//===----------------------------------------------------------------------===//
56// Operand
57//===----------------------------------------------------------------------===//
58
59class AMDGPUOperand : public MCParsedAsmOperand {
60 enum KindTy {
61 Token,
62 Immediate,
65 } Kind;
66
67 SMLoc StartLoc, EndLoc;
68 const AMDGPUAsmParser *AsmParser;
69
70public:
71 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
72 : Kind(Kind_), AsmParser(AsmParser_) {}
73
74 using Ptr = std::unique_ptr<AMDGPUOperand>;
75
76 struct Modifiers {
77 bool Abs = false;
78 bool Neg = false;
79 bool Sext = false;
80 bool Lit = false;
81
82 bool hasFPModifiers() const { return Abs || Neg; }
83 bool hasIntModifiers() const { return Sext; }
84 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
85
86 int64_t getFPModifiersOperand() const {
87 int64_t Operand = 0;
88 Operand |= Abs ? SISrcMods::ABS : 0u;
89 Operand |= Neg ? SISrcMods::NEG : 0u;
90 return Operand;
91 }
92
93 int64_t getIntModifiersOperand() const {
94 int64_t Operand = 0;
95 Operand |= Sext ? SISrcMods::SEXT : 0u;
96 return Operand;
97 }
98
99 int64_t getModifiersOperand() const {
100 assert(!(hasFPModifiers() && hasIntModifiers())
101 && "fp and int modifiers should not be used simultaneously");
102 if (hasFPModifiers())
103 return getFPModifiersOperand();
104 if (hasIntModifiers())
105 return getIntModifiersOperand();
106 return 0;
107 }
108
109 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
110 };
111
112 enum ImmTy {
113 ImmTyNone,
114 ImmTyGDS,
115 ImmTyLDS,
116 ImmTyOffen,
117 ImmTyIdxen,
118 ImmTyAddr64,
119 ImmTyOffset,
120 ImmTyInstOffset,
121 ImmTyOffset0,
122 ImmTyOffset1,
123 ImmTySMEMOffsetMod,
124 ImmTyCPol,
125 ImmTyTFE,
126 ImmTyD16,
127 ImmTyClamp,
128 ImmTyOModSI,
129 ImmTySDWADstSel,
130 ImmTySDWASrc0Sel,
131 ImmTySDWASrc1Sel,
132 ImmTySDWADstUnused,
133 ImmTyDMask,
134 ImmTyDim,
135 ImmTyUNorm,
136 ImmTyDA,
137 ImmTyR128A16,
138 ImmTyA16,
139 ImmTyLWE,
140 ImmTyExpTgt,
141 ImmTyExpCompr,
142 ImmTyExpVM,
143 ImmTyFORMAT,
144 ImmTyHwreg,
145 ImmTyOff,
146 ImmTySendMsg,
147 ImmTyInterpSlot,
148 ImmTyInterpAttr,
149 ImmTyInterpAttrChan,
150 ImmTyOpSel,
151 ImmTyOpSelHi,
152 ImmTyNegLo,
153 ImmTyNegHi,
154 ImmTyIndexKey8bit,
155 ImmTyIndexKey16bit,
156 ImmTyDPP8,
157 ImmTyDppCtrl,
158 ImmTyDppRowMask,
159 ImmTyDppBankMask,
160 ImmTyDppBoundCtrl,
161 ImmTyDppFI,
162 ImmTySwizzle,
163 ImmTyGprIdxMode,
164 ImmTyHigh,
165 ImmTyBLGP,
166 ImmTyCBSZ,
167 ImmTyABID,
168 ImmTyEndpgm,
169 ImmTyWaitVDST,
170 ImmTyWaitEXP,
171 ImmTyWaitVAVDst,
172 ImmTyWaitVMVSrc,
173 ImmTyByteSel,
174 };
175
176 // Immediate operand kind.
177 // It helps to identify the location of an offending operand after an error.
178 // Note that regular literals and mandatory literals (KImm) must be handled
179 // differently. When looking for an offending operand, we should usually
180 // ignore mandatory literals because they are part of the instruction and
181 // cannot be changed. Report location of mandatory operands only for VOPD,
182 // when both OpX and OpY have a KImm and there are no other literals.
183 enum ImmKindTy {
184 ImmKindTyNone,
185 ImmKindTyLiteral,
186 ImmKindTyMandatoryLiteral,
187 ImmKindTyConst,
188 };
189
190private:
191 struct TokOp {
192 const char *Data;
193 unsigned Length;
194 };
195
196 struct ImmOp {
197 int64_t Val;
198 ImmTy Type;
199 bool IsFPImm;
200 mutable ImmKindTy Kind;
201 Modifiers Mods;
202 };
203
204 struct RegOp {
205 unsigned RegNo;
206 Modifiers Mods;
207 };
208
209 union {
210 TokOp Tok;
211 ImmOp Imm;
212 RegOp Reg;
213 const MCExpr *Expr;
214 };
215
216public:
217 bool isToken() const override { return Kind == Token; }
218
219 bool isSymbolRefExpr() const {
220 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
221 }
222
223 bool isImm() const override {
224 return Kind == Immediate;
225 }
226
227 void setImmKindNone() const {
228 assert(isImm());
229 Imm.Kind = ImmKindTyNone;
230 }
231
232 void setImmKindLiteral() const {
233 assert(isImm());
234 Imm.Kind = ImmKindTyLiteral;
235 }
236
237 void setImmKindMandatoryLiteral() const {
238 assert(isImm());
239 Imm.Kind = ImmKindTyMandatoryLiteral;
240 }
241
242 void setImmKindConst() const {
243 assert(isImm());
244 Imm.Kind = ImmKindTyConst;
245 }
246
247 bool IsImmKindLiteral() const {
248 return isImm() && Imm.Kind == ImmKindTyLiteral;
249 }
250
251 bool IsImmKindMandatoryLiteral() const {
252 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
253 }
254
255 bool isImmKindConst() const {
256 return isImm() && Imm.Kind == ImmKindTyConst;
257 }
258
259 bool isInlinableImm(MVT type) const;
260 bool isLiteralImm(MVT type) const;
261
262 bool isRegKind() const {
263 return Kind == Register;
264 }
265
266 bool isReg() const override {
267 return isRegKind() && !hasModifiers();
268 }
269
270 bool isRegOrInline(unsigned RCID, MVT type) const {
271 return isRegClass(RCID) || isInlinableImm(type);
272 }
273
274 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
275 return isRegOrInline(RCID, type) || isLiteralImm(type);
276 }
277
278 bool isRegOrImmWithInt16InputMods() const {
279 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
280 }
281
282 bool isRegOrImmWithIntT16InputMods() const {
283 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
284 }
285
286 bool isRegOrImmWithInt32InputMods() const {
287 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
288 }
289
290 bool isRegOrInlineImmWithInt16InputMods() const {
291 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
292 }
293
294 bool isRegOrInlineImmWithInt32InputMods() const {
295 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
296 }
297
298 bool isRegOrImmWithInt64InputMods() const {
299 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
300 }
301
302 bool isRegOrImmWithFP16InputMods() const {
303 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
304 }
305
306 bool isRegOrImmWithFPT16InputMods() const {
307 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
308 }
309
310 bool isRegOrImmWithFP32InputMods() const {
311 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
312 }
313
314 bool isRegOrImmWithFP64InputMods() const {
315 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
316 }
317
318 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
319 return isRegOrInline(
320 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
321 }
322
323 bool isRegOrInlineImmWithFP32InputMods() const {
324 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
325 }
326
327 bool isPackedFP16InputMods() const {
328 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
329 }
330
331 bool isVReg() const {
332 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
333 isRegClass(AMDGPU::VReg_64RegClassID) ||
334 isRegClass(AMDGPU::VReg_96RegClassID) ||
335 isRegClass(AMDGPU::VReg_128RegClassID) ||
336 isRegClass(AMDGPU::VReg_160RegClassID) ||
337 isRegClass(AMDGPU::VReg_192RegClassID) ||
338 isRegClass(AMDGPU::VReg_256RegClassID) ||
339 isRegClass(AMDGPU::VReg_512RegClassID) ||
340 isRegClass(AMDGPU::VReg_1024RegClassID);
341 }
342
343 bool isVReg32() const {
344 return isRegClass(AMDGPU::VGPR_32RegClassID);
345 }
346
347 bool isVReg32OrOff() const {
348 return isOff() || isVReg32();
349 }
350
351 bool isNull() const {
352 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
353 }
354
355 bool isVRegWithInputMods() const;
356 template <bool IsFake16> bool isT16VRegWithInputMods() const;
357
358 bool isSDWAOperand(MVT type) const;
359 bool isSDWAFP16Operand() const;
360 bool isSDWAFP32Operand() const;
361 bool isSDWAInt16Operand() const;
362 bool isSDWAInt32Operand() const;
363
364 bool isImmTy(ImmTy ImmT) const {
365 return isImm() && Imm.Type == ImmT;
366 }
367
368 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
369
370 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
371
372 bool isImmModifier() const {
373 return isImm() && Imm.Type != ImmTyNone;
374 }
375
376 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
377 bool isDim() const { return isImmTy(ImmTyDim); }
378 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
379 bool isOff() const { return isImmTy(ImmTyOff); }
380 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
381 bool isOffen() const { return isImmTy(ImmTyOffen); }
382 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
383 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
384 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
385 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
386 bool isGDS() const { return isImmTy(ImmTyGDS); }
387 bool isLDS() const { return isImmTy(ImmTyLDS); }
388 bool isCPol() const { return isImmTy(ImmTyCPol); }
389 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
390 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
391 bool isTFE() const { return isImmTy(ImmTyTFE); }
392 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
393 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
394 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
395 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
396 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
397 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
398 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
399 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
400 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
401 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
402 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
403 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
404 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
405
406 bool isRegOrImm() const {
407 return isReg() || isImm();
408 }
409
410 bool isRegClass(unsigned RCID) const;
411
412 bool isInlineValue() const;
413
414 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
415 return isRegOrInline(RCID, type) && !hasModifiers();
416 }
417
418 bool isSCSrcB16() const {
419 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
420 }
421
422 bool isSCSrcV2B16() const {
423 return isSCSrcB16();
424 }
425
426 bool isSCSrc_b32() const {
427 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
428 }
429
430 bool isSCSrc_b64() const {
431 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
432 }
433
434 bool isBoolReg() const;
435
436 bool isSCSrcF16() const {
437 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
438 }
439
440 bool isSCSrcV2F16() const {
441 return isSCSrcF16();
442 }
443
444 bool isSCSrcF32() const {
445 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
446 }
447
448 bool isSCSrcF64() const {
449 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
450 }
451
452 bool isSSrc_b32() const {
453 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
454 }
455
456 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
457
458 bool isSSrcV2B16() const {
459 llvm_unreachable("cannot happen");
460 return isSSrc_b16();
461 }
462
463 bool isSSrc_b64() const {
464 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
465 // See isVSrc64().
466 return isSCSrc_b64() || isLiteralImm(MVT::i64);
467 }
468
469 bool isSSrc_f32() const {
470 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
471 }
472
473 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
474
475 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
476
477 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
478
479 bool isSSrcV2F16() const {
480 llvm_unreachable("cannot happen");
481 return isSSrc_f16();
482 }
483
484 bool isSSrcV2FP32() const {
485 llvm_unreachable("cannot happen");
486 return isSSrc_f32();
487 }
488
489 bool isSCSrcV2FP32() const {
490 llvm_unreachable("cannot happen");
491 return isSCSrcF32();
492 }
493
494 bool isSSrcV2INT32() const {
495 llvm_unreachable("cannot happen");
496 return isSSrc_b32();
497 }
498
499 bool isSCSrcV2INT32() const {
500 llvm_unreachable("cannot happen");
501 return isSCSrc_b32();
502 }
503
504 bool isSSrcOrLds_b32() const {
505 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
506 isLiteralImm(MVT::i32) || isExpr();
507 }
508
509 bool isVCSrc_b32() const {
510 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
511 }
512
513 bool isVCSrcB64() const {
514 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
515 }
516
517 bool isVCSrcTB16() const {
518 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
519 }
520
521 bool isVCSrcTB16_Lo128() const {
522 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
523 }
524
525 bool isVCSrcFake16B16_Lo128() const {
526 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
527 }
528
529 bool isVCSrc_b16() const {
530 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
531 }
532
533 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
534
535 bool isVCSrc_f32() const {
536 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
537 }
538
539 bool isVCSrcF64() const {
540 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
541 }
542
543 bool isVCSrcTBF16() const {
544 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
545 }
546
547 bool isVCSrcTF16() const {
548 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
549 }
550
551 bool isVCSrcTBF16_Lo128() const {
552 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
553 }
554
555 bool isVCSrcTF16_Lo128() const {
556 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
557 }
558
559 bool isVCSrcFake16BF16_Lo128() const {
560 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
561 }
562
563 bool isVCSrcFake16F16_Lo128() const {
564 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
565 }
566
567 bool isVCSrc_bf16() const {
568 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
569 }
570
571 bool isVCSrc_f16() const {
572 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
573 }
574
575 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
576
577 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
578
579 bool isVSrc_b32() const {
580 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
581 }
582
583 bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
584
585 bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
586
587 bool isVSrcT_b16_Lo128() const {
588 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
589 }
590
591 bool isVSrcFake16_b16_Lo128() const {
592 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
593 }
594
595 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
596
597 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
598
599 bool isVCSrcV2FP32() const {
600 return isVCSrcF64();
601 }
602
603 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
604
605 bool isVCSrcV2INT32() const {
606 return isVCSrcB64();
607 }
608
609 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
610
611 bool isVSrc_f32() const {
612 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
613 }
614
615 bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
616
617 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
618
619 bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
620
621 bool isVSrcT_bf16_Lo128() const {
622 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
623 }
624
625 bool isVSrcT_f16_Lo128() const {
626 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
627 }
628
629 bool isVSrcFake16_bf16_Lo128() const {
630 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
631 }
632
633 bool isVSrcFake16_f16_Lo128() const {
634 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
635 }
636
637 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
638
639 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
640
641 bool isVSrc_v2bf16() const {
642 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
643 }
644
645 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
646
647 bool isVISrcB32() const {
648 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
649 }
650
651 bool isVISrcB16() const {
652 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
653 }
654
655 bool isVISrcV2B16() const {
656 return isVISrcB16();
657 }
658
659 bool isVISrcF32() const {
660 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
661 }
662
663 bool isVISrcF16() const {
664 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
665 }
666
667 bool isVISrcV2F16() const {
668 return isVISrcF16() || isVISrcB32();
669 }
670
671 bool isVISrc_64_bf16() const {
672 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
673 }
674
675 bool isVISrc_64_f16() const {
676 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
677 }
678
679 bool isVISrc_64_b32() const {
680 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
681 }
682
683 bool isVISrc_64B64() const {
684 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
685 }
686
687 bool isVISrc_64_f64() const {
688 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
689 }
690
691 bool isVISrc_64V2FP32() const {
692 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
693 }
694
695 bool isVISrc_64V2INT32() const {
696 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
697 }
698
699 bool isVISrc_256_b32() const {
700 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
701 }
702
703 bool isVISrc_256_f32() const {
704 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
705 }
706
707 bool isVISrc_256B64() const {
708 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
709 }
710
711 bool isVISrc_256_f64() const {
712 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
713 }
714
715 bool isVISrc_128B16() const {
716 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
717 }
718
719 bool isVISrc_128V2B16() const {
720 return isVISrc_128B16();
721 }
722
723 bool isVISrc_128_b32() const {
724 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
725 }
726
727 bool isVISrc_128_f32() const {
728 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
729 }
730
731 bool isVISrc_256V2FP32() const {
732 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
733 }
734
735 bool isVISrc_256V2INT32() const {
736 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
737 }
738
739 bool isVISrc_512_b32() const {
740 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
741 }
742
743 bool isVISrc_512B16() const {
744 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
745 }
746
747 bool isVISrc_512V2B16() const {
748 return isVISrc_512B16();
749 }
750
751 bool isVISrc_512_f32() const {
752 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
753 }
754
755 bool isVISrc_512F16() const {
756 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
757 }
758
759 bool isVISrc_512V2F16() const {
760 return isVISrc_512F16() || isVISrc_512_b32();
761 }
762
763 bool isVISrc_1024_b32() const {
764 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
765 }
766
767 bool isVISrc_1024B16() const {
768 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
769 }
770
771 bool isVISrc_1024V2B16() const {
772 return isVISrc_1024B16();
773 }
774
775 bool isVISrc_1024_f32() const {
776 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
777 }
778
779 bool isVISrc_1024F16() const {
780 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
781 }
782
783 bool isVISrc_1024V2F16() const {
784 return isVISrc_1024F16() || isVISrc_1024_b32();
785 }
786
787 bool isAISrcB32() const {
788 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
789 }
790
791 bool isAISrcB16() const {
792 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
793 }
794
795 bool isAISrcV2B16() const {
796 return isAISrcB16();
797 }
798
799 bool isAISrcF32() const {
800 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
801 }
802
803 bool isAISrcF16() const {
804 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
805 }
806
807 bool isAISrcV2F16() const {
808 return isAISrcF16() || isAISrcB32();
809 }
810
811 bool isAISrc_64B64() const {
812 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
813 }
814
815 bool isAISrc_64_f64() const {
816 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
817 }
818
819 bool isAISrc_128_b32() const {
820 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
821 }
822
823 bool isAISrc_128B16() const {
824 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
825 }
826
827 bool isAISrc_128V2B16() const {
828 return isAISrc_128B16();
829 }
830
831 bool isAISrc_128_f32() const {
832 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
833 }
834
835 bool isAISrc_128F16() const {
836 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
837 }
838
839 bool isAISrc_128V2F16() const {
840 return isAISrc_128F16() || isAISrc_128_b32();
841 }
842
843 bool isVISrc_128_bf16() const {
844 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
845 }
846
847 bool isVISrc_128_f16() const {
848 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
849 }
850
851 bool isVISrc_128V2F16() const {
852 return isVISrc_128_f16() || isVISrc_128_b32();
853 }
854
855 bool isAISrc_256B64() const {
856 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
857 }
858
859 bool isAISrc_256_f64() const {
860 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
861 }
862
863 bool isAISrc_512_b32() const {
864 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
865 }
866
867 bool isAISrc_512B16() const {
868 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
869 }
870
871 bool isAISrc_512V2B16() const {
872 return isAISrc_512B16();
873 }
874
875 bool isAISrc_512_f32() const {
876 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
877 }
878
879 bool isAISrc_512F16() const {
880 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
881 }
882
883 bool isAISrc_512V2F16() const {
884 return isAISrc_512F16() || isAISrc_512_b32();
885 }
886
887 bool isAISrc_1024_b32() const {
888 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
889 }
890
891 bool isAISrc_1024B16() const {
892 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
893 }
894
895 bool isAISrc_1024V2B16() const {
896 return isAISrc_1024B16();
897 }
898
899 bool isAISrc_1024_f32() const {
900 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
901 }
902
903 bool isAISrc_1024F16() const {
904 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
905 }
906
907 bool isAISrc_1024V2F16() const {
908 return isAISrc_1024F16() || isAISrc_1024_b32();
909 }
910
911 bool isKImmFP32() const {
912 return isLiteralImm(MVT::f32);
913 }
914
915 bool isKImmFP16() const {
916 return isLiteralImm(MVT::f16);
917 }
918
919 bool isMem() const override {
920 return false;
921 }
922
923 bool isExpr() const {
924 return Kind == Expression;
925 }
926
927 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
928
929 bool isSWaitCnt() const;
930 bool isDepCtr() const;
931 bool isSDelayALU() const;
932 bool isHwreg() const;
933 bool isSendMsg() const;
934 bool isSplitBarrier() const;
935 bool isSwizzle() const;
936 bool isSMRDOffset8() const;
937 bool isSMEMOffset() const;
938 bool isSMRDLiteralOffset() const;
939 bool isDPP8() const;
940 bool isDPPCtrl() const;
941 bool isBLGP() const;
942 bool isGPRIdxMode() const;
943 bool isS16Imm() const;
944 bool isU16Imm() const;
945 bool isEndpgm() const;
946
947 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
948 return [=](){ return P(*this); };
949 }
950
951 StringRef getToken() const {
952 assert(isToken());
953 return StringRef(Tok.Data, Tok.Length);
954 }
955
956 int64_t getImm() const {
957 assert(isImm());
958 return Imm.Val;
959 }
960
961 void setImm(int64_t Val) {
962 assert(isImm());
963 Imm.Val = Val;
964 }
965
966 ImmTy getImmTy() const {
967 assert(isImm());
968 return Imm.Type;
969 }
970
971 MCRegister getReg() const override {
972 assert(isRegKind());
973 return Reg.RegNo;
974 }
975
976 SMLoc getStartLoc() const override {
977 return StartLoc;
978 }
979
980 SMLoc getEndLoc() const override {
981 return EndLoc;
982 }
983
984 SMRange getLocRange() const {
985 return SMRange(StartLoc, EndLoc);
986 }
987
988 Modifiers getModifiers() const {
989 assert(isRegKind() || isImmTy(ImmTyNone));
990 return isRegKind() ? Reg.Mods : Imm.Mods;
991 }
992
993 void setModifiers(Modifiers Mods) {
994 assert(isRegKind() || isImmTy(ImmTyNone));
995 if (isRegKind())
996 Reg.Mods = Mods;
997 else
998 Imm.Mods = Mods;
999 }
1000
1001 bool hasModifiers() const {
1002 return getModifiers().hasModifiers();
1003 }
1004
1005 bool hasFPModifiers() const {
1006 return getModifiers().hasFPModifiers();
1007 }
1008
1009 bool hasIntModifiers() const {
1010 return getModifiers().hasIntModifiers();
1011 }
1012
1013 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1014
1015 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1016
1017 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1018
1019 void addRegOperands(MCInst &Inst, unsigned N) const;
1020
1021 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1022 if (isRegKind())
1023 addRegOperands(Inst, N);
1024 else
1025 addImmOperands(Inst, N);
1026 }
1027
1028 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1029 Modifiers Mods = getModifiers();
1030 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1031 if (isRegKind()) {
1032 addRegOperands(Inst, N);
1033 } else {
1034 addImmOperands(Inst, N, false);
1035 }
1036 }
1037
1038 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1039 assert(!hasIntModifiers());
1040 addRegOrImmWithInputModsOperands(Inst, N);
1041 }
1042
1043 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1044 assert(!hasFPModifiers());
1045 addRegOrImmWithInputModsOperands(Inst, N);
1046 }
1047
1048 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1049 Modifiers Mods = getModifiers();
1050 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1051 assert(isRegKind());
1052 addRegOperands(Inst, N);
1053 }
1054
1055 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1056 assert(!hasIntModifiers());
1057 addRegWithInputModsOperands(Inst, N);
1058 }
1059
1060 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1061 assert(!hasFPModifiers());
1062 addRegWithInputModsOperands(Inst, N);
1063 }
1064
1065 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1066 // clang-format off
1067 switch (Type) {
1068 case ImmTyNone: OS << "None"; break;
1069 case ImmTyGDS: OS << "GDS"; break;
1070 case ImmTyLDS: OS << "LDS"; break;
1071 case ImmTyOffen: OS << "Offen"; break;
1072 case ImmTyIdxen: OS << "Idxen"; break;
1073 case ImmTyAddr64: OS << "Addr64"; break;
1074 case ImmTyOffset: OS << "Offset"; break;
1075 case ImmTyInstOffset: OS << "InstOffset"; break;
1076 case ImmTyOffset0: OS << "Offset0"; break;
1077 case ImmTyOffset1: OS << "Offset1"; break;
1078 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1079 case ImmTyCPol: OS << "CPol"; break;
1080 case ImmTyIndexKey8bit: OS << "index_key"; break;
1081 case ImmTyIndexKey16bit: OS << "index_key"; break;
1082 case ImmTyTFE: OS << "TFE"; break;
1083 case ImmTyD16: OS << "D16"; break;
1084 case ImmTyFORMAT: OS << "FORMAT"; break;
1085 case ImmTyClamp: OS << "Clamp"; break;
1086 case ImmTyOModSI: OS << "OModSI"; break;
1087 case ImmTyDPP8: OS << "DPP8"; break;
1088 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1089 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1090 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1091 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1092 case ImmTyDppFI: OS << "DppFI"; break;
1093 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1094 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1095 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1096 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1097 case ImmTyDMask: OS << "DMask"; break;
1098 case ImmTyDim: OS << "Dim"; break;
1099 case ImmTyUNorm: OS << "UNorm"; break;
1100 case ImmTyDA: OS << "DA"; break;
1101 case ImmTyR128A16: OS << "R128A16"; break;
1102 case ImmTyA16: OS << "A16"; break;
1103 case ImmTyLWE: OS << "LWE"; break;
1104 case ImmTyOff: OS << "Off"; break;
1105 case ImmTyExpTgt: OS << "ExpTgt"; break;
1106 case ImmTyExpCompr: OS << "ExpCompr"; break;
1107 case ImmTyExpVM: OS << "ExpVM"; break;
1108 case ImmTyHwreg: OS << "Hwreg"; break;
1109 case ImmTySendMsg: OS << "SendMsg"; break;
1110 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1111 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1112 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1113 case ImmTyOpSel: OS << "OpSel"; break;
1114 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1115 case ImmTyNegLo: OS << "NegLo"; break;
1116 case ImmTyNegHi: OS << "NegHi"; break;
1117 case ImmTySwizzle: OS << "Swizzle"; break;
1118 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1119 case ImmTyHigh: OS << "High"; break;
1120 case ImmTyBLGP: OS << "BLGP"; break;
1121 case ImmTyCBSZ: OS << "CBSZ"; break;
1122 case ImmTyABID: OS << "ABID"; break;
1123 case ImmTyEndpgm: OS << "Endpgm"; break;
1124 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1125 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1126 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1127 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1128 case ImmTyByteSel: OS << "ByteSel" ; break;
1129 }
1130 // clang-format on
1131 }
1132
1133 void print(raw_ostream &OS) const override {
1134 switch (Kind) {
1135 case Register:
1136 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1137 break;
1138 case Immediate:
1139 OS << '<' << getImm();
1140 if (getImmTy() != ImmTyNone) {
1141 OS << " type: "; printImmTy(OS, getImmTy());
1142 }
1143 OS << " mods: " << Imm.Mods << '>';
1144 break;
1145 case Token:
1146 OS << '\'' << getToken() << '\'';
1147 break;
1148 case Expression:
1149 OS << "<expr " << *Expr << '>';
1150 break;
1151 }
1152 }
1153
1154 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1155 int64_t Val, SMLoc Loc,
1156 ImmTy Type = ImmTyNone,
1157 bool IsFPImm = false) {
1158 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1159 Op->Imm.Val = Val;
1160 Op->Imm.IsFPImm = IsFPImm;
1161 Op->Imm.Kind = ImmKindTyNone;
1162 Op->Imm.Type = Type;
1163 Op->Imm.Mods = Modifiers();
1164 Op->StartLoc = Loc;
1165 Op->EndLoc = Loc;
1166 return Op;
1167 }
1168
1169 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1170 StringRef Str, SMLoc Loc,
1171 bool HasExplicitEncodingSize = true) {
1172 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1173 Res->Tok.Data = Str.data();
1174 Res->Tok.Length = Str.size();
1175 Res->StartLoc = Loc;
1176 Res->EndLoc = Loc;
1177 return Res;
1178 }
1179
1180 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1181 unsigned RegNo, SMLoc S,
1182 SMLoc E) {
1183 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1184 Op->Reg.RegNo = RegNo;
1185 Op->Reg.Mods = Modifiers();
1186 Op->StartLoc = S;
1187 Op->EndLoc = E;
1188 return Op;
1189 }
1190
1191 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1192 const class MCExpr *Expr, SMLoc S) {
1193 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1194 Op->Expr = Expr;
1195 Op->StartLoc = S;
1196 Op->EndLoc = S;
1197 return Op;
1198 }
1199};
1200
1201raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1202 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1203 return OS;
1204}
1205
1206//===----------------------------------------------------------------------===//
1207// AsmParser
1208//===----------------------------------------------------------------------===//
1209
1210// Holds info related to the current kernel, e.g. count of SGPRs used.
1211// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1212// .amdgpu_hsa_kernel or at EOF.
1213class KernelScopeInfo {
1214 int SgprIndexUnusedMin = -1;
1215 int VgprIndexUnusedMin = -1;
1216 int AgprIndexUnusedMin = -1;
1217 MCContext *Ctx = nullptr;
1218 MCSubtargetInfo const *MSTI = nullptr;
1219
1220 void usesSgprAt(int i) {
1221 if (i >= SgprIndexUnusedMin) {
1222 SgprIndexUnusedMin = ++i;
1223 if (Ctx) {
1224 MCSymbol* const Sym =
1225 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1226 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1227 }
1228 }
1229 }
1230
1231 void usesVgprAt(int i) {
1232 if (i >= VgprIndexUnusedMin) {
1233 VgprIndexUnusedMin = ++i;
1234 if (Ctx) {
1235 MCSymbol* const Sym =
1236 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1237 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1238 VgprIndexUnusedMin);
1239 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1240 }
1241 }
1242 }
1243
1244 void usesAgprAt(int i) {
1245 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1246 if (!hasMAIInsts(*MSTI))
1247 return;
1248
1249 if (i >= AgprIndexUnusedMin) {
1250 AgprIndexUnusedMin = ++i;
1251 if (Ctx) {
1252 MCSymbol* const Sym =
1253 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1254 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1255
1256 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1257 MCSymbol* const vSym =
1258 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1259 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1260 VgprIndexUnusedMin);
1261 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1262 }
1263 }
1264 }
1265
1266public:
1267 KernelScopeInfo() = default;
1268
1269 void initialize(MCContext &Context) {
1270 Ctx = &Context;
1271 MSTI = Ctx->getSubtargetInfo();
1272
1273 usesSgprAt(SgprIndexUnusedMin = -1);
1274 usesVgprAt(VgprIndexUnusedMin = -1);
1275 if (hasMAIInsts(*MSTI)) {
1276 usesAgprAt(AgprIndexUnusedMin = -1);
1277 }
1278 }
1279
1280 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1281 unsigned RegWidth) {
1282 switch (RegKind) {
1283 case IS_SGPR:
1284 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1285 break;
1286 case IS_AGPR:
1287 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1288 break;
1289 case IS_VGPR:
1290 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1291 break;
1292 default:
1293 break;
1294 }
1295 }
1296};
1297
1298class AMDGPUAsmParser : public MCTargetAsmParser {
1299 MCAsmParser &Parser;
1300
1301 unsigned ForcedEncodingSize = 0;
1302 bool ForcedDPP = false;
1303 bool ForcedSDWA = false;
1304 KernelScopeInfo KernelScope;
1305
1306 /// @name Auto-generated Match Functions
1307 /// {
1308
1309#define GET_ASSEMBLER_HEADER
1310#include "AMDGPUGenAsmMatcher.inc"
1311
1312 /// }
1313
1314private:
1315 void createConstantSymbol(StringRef Id, int64_t Val);
1316
1317 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1318 bool OutOfRangeError(SMRange Range);
1319 /// Calculate VGPR/SGPR blocks required for given target, reserved
1320 /// registers, and user-specified NextFreeXGPR values.
1321 ///
1322 /// \param Features [in] Target features, used for bug corrections.
1323 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1324 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1325 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1326 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1327 /// descriptor field, if valid.
1328 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1329 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1330 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1331 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1332 /// \param VGPRBlocks [out] Result VGPR block count.
1333 /// \param SGPRBlocks [out] Result SGPR block count.
1334 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1335 const MCExpr *FlatScrUsed, bool XNACKUsed,
1336 std::optional<bool> EnableWavefrontSize32,
1337 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1338 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1339 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1340 bool ParseDirectiveAMDGCNTarget();
1341 bool ParseDirectiveAMDHSACodeObjectVersion();
1342 bool ParseDirectiveAMDHSAKernel();
1343 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1344 bool ParseDirectiveAMDKernelCodeT();
1345 // TODO: Possibly make subtargetHasRegister const.
1346 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1347 bool ParseDirectiveAMDGPUHsaKernel();
1348
1349 bool ParseDirectiveISAVersion();
1350 bool ParseDirectiveHSAMetadata();
1351 bool ParseDirectivePALMetadataBegin();
1352 bool ParseDirectivePALMetadata();
1353 bool ParseDirectiveAMDGPULDS();
1354
1355 /// Common code to parse out a block of text (typically YAML) between start and
1356 /// end directives.
1357 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1358 const char *AssemblerDirectiveEnd,
1359 std::string &CollectString);
1360
1361 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1362 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1363 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1364 unsigned &RegNum, unsigned &RegWidth,
1365 bool RestoreOnFailure = false);
1366 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1367 unsigned &RegNum, unsigned &RegWidth,
1369 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1370 unsigned &RegWidth,
1372 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1373 unsigned &RegWidth,
1375 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1376 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1377 bool ParseRegRange(unsigned& Num, unsigned& Width);
1378 unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
1379 unsigned RegWidth, SMLoc Loc);
1380
1381 bool isRegister();
1382 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1383 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1384 void initializeGprCountSymbol(RegisterKind RegKind);
1385 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1386 unsigned RegWidth);
1387 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1388 bool IsAtomic);
1389
1390public:
1391 enum OperandMode {
1392 OperandMode_Default,
1393 OperandMode_NSA,
1394 };
1395
1396 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1397
1398 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1399 const MCInstrInfo &MII,
1400 const MCTargetOptions &Options)
1401 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1403
1404 if (getFeatureBits().none()) {
1405 // Set default features.
1406 copySTI().ToggleFeature("southern-islands");
1407 }
1408
1409 FeatureBitset FB = getFeatureBits();
1410 if (!FB[AMDGPU::FeatureWavefrontSize64] &&
1411 !FB[AMDGPU::FeatureWavefrontSize32]) {
1412 // If there is no default wave size it must be a generation before gfx10,
1413 // these have FeatureWavefrontSize64 in their definition already. For
1414 // gfx10+ set wave32 as a default.
1415 copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
1416 }
1417
1418 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1419
1421 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1422 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1423 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1424 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1425 } else {
1426 createConstantSymbol(".option.machine_version_major", ISA.Major);
1427 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1428 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1429 }
1430 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1431 initializeGprCountSymbol(IS_VGPR);
1432 initializeGprCountSymbol(IS_SGPR);
1433 } else
1434 KernelScope.initialize(getContext());
1435
1436 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1437 createConstantSymbol(Symbol, Code);
1438
1439 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1440 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1441 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1442 }
1443
1444 bool hasMIMG_R128() const {
1445 return AMDGPU::hasMIMG_R128(getSTI());
1446 }
1447
1448 bool hasPackedD16() const {
1449 return AMDGPU::hasPackedD16(getSTI());
1450 }
1451
1452 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1453
1454 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1455
1456 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1457
1458 bool isSI() const {
1459 return AMDGPU::isSI(getSTI());
1460 }
1461
1462 bool isCI() const {
1463 return AMDGPU::isCI(getSTI());
1464 }
1465
1466 bool isVI() const {
1467 return AMDGPU::isVI(getSTI());
1468 }
1469
1470 bool isGFX9() const {
1471 return AMDGPU::isGFX9(getSTI());
1472 }
1473
1474 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1475 bool isGFX90A() const {
1476 return AMDGPU::isGFX90A(getSTI());
1477 }
1478
1479 bool isGFX940() const {
1480 return AMDGPU::isGFX940(getSTI());
1481 }
1482
1483 bool isGFX9Plus() const {
1484 return AMDGPU::isGFX9Plus(getSTI());
1485 }
1486
1487 bool isGFX10() const {
1488 return AMDGPU::isGFX10(getSTI());
1489 }
1490
1491 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1492
1493 bool isGFX11() const {
1494 return AMDGPU::isGFX11(getSTI());
1495 }
1496
1497 bool isGFX11Plus() const {
1498 return AMDGPU::isGFX11Plus(getSTI());
1499 }
1500
1501 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1502
1503 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1504
1505 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1506
1507 bool isGFX10_BEncoding() const {
1509 }
1510
1511 bool hasInv2PiInlineImm() const {
1512 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1513 }
1514
1515 bool hasFlatOffsets() const {
1516 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1517 }
1518
1519 bool hasArchitectedFlatScratch() const {
1520 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1521 }
1522
1523 bool hasSGPR102_SGPR103() const {
1524 return !isVI() && !isGFX9();
1525 }
1526
1527 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1528
1529 bool hasIntClamp() const {
1530 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1531 }
1532
1533 bool hasPartialNSAEncoding() const {
1534 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1535 }
1536
1537 unsigned getNSAMaxSize(bool HasSampler = false) const {
1538 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1539 }
1540
1541 unsigned getMaxNumUserSGPRs() const {
1543 }
1544
1545 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1546
1547 AMDGPUTargetStreamer &getTargetStreamer() {
1549 return static_cast<AMDGPUTargetStreamer &>(TS);
1550 }
1551
1552 const MCRegisterInfo *getMRI() const {
1553 // We need this const_cast because for some reason getContext() is not const
1554 // in MCAsmParser.
1555 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1556 }
1557
1558 const MCInstrInfo *getMII() const {
1559 return &MII;
1560 }
1561
1562 const FeatureBitset &getFeatureBits() const {
1563 return getSTI().getFeatureBits();
1564 }
1565
1566 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1567 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1568 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1569
1570 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1571 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1572 bool isForcedDPP() const { return ForcedDPP; }
1573 bool isForcedSDWA() const { return ForcedSDWA; }
1574 ArrayRef<unsigned> getMatchedVariants() const;
1575 StringRef getMatchedVariantName() const;
1576
1577 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1578 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1579 bool RestoreOnFailure);
1580 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1582 SMLoc &EndLoc) override;
1583 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1585 unsigned Kind) override;
1586 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1589 bool MatchingInlineAsm) override;
1590 bool ParseDirective(AsmToken DirectiveID) override;
1591 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1592 OperandMode Mode = OperandMode_Default);
1593 StringRef parseMnemonicSuffix(StringRef Name);
1595 SMLoc NameLoc, OperandVector &Operands) override;
1596 //bool ProcessInstruction(MCInst &Inst);
1597
1599
1600 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1601
1603 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1604 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1605 std::function<bool(int64_t &)> ConvertResult = nullptr);
1606
1607 ParseStatus parseOperandArrayWithPrefix(
1608 const char *Prefix, OperandVector &Operands,
1609 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1610 bool (*ConvertResult)(int64_t &) = nullptr);
1611
1613 parseNamedBit(StringRef Name, OperandVector &Operands,
1614 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1615 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1617 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1618 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1619 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1620 SMLoc &StringLoc);
1621
1622 bool isModifier();
1623 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1624 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1625 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1626 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1627 bool parseSP3NegModifier();
1628 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1629 bool HasLit = false);
1631 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1632 bool HasLit = false);
1633 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1634 bool AllowImm = true);
1635 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1636 bool AllowImm = true);
1637 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1638 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1639 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1640 ParseStatus tryParseIndexKey(OperandVector &Operands,
1641 AMDGPUOperand::ImmTy ImmTy);
1642 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1643 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1644
1645 ParseStatus parseDfmtNfmt(int64_t &Format);
1646 ParseStatus parseUfmt(int64_t &Format);
1647 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1648 int64_t &Format);
1649 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1650 int64_t &Format);
1651 ParseStatus parseFORMAT(OperandVector &Operands);
1652 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1653 ParseStatus parseNumericFormat(int64_t &Format);
1654 ParseStatus parseFlatOffset(OperandVector &Operands);
1655 ParseStatus parseR128A16(OperandVector &Operands);
1657 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1658 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1659
1660 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1661
1662 bool parseCnt(int64_t &IntVal);
1663 ParseStatus parseSWaitCnt(OperandVector &Operands);
1664
1665 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1666 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1667 ParseStatus parseDepCtr(OperandVector &Operands);
1668
1669 bool parseDelay(int64_t &Delay);
1670 ParseStatus parseSDelayALU(OperandVector &Operands);
1671
1672 ParseStatus parseHwreg(OperandVector &Operands);
1673
1674private:
1675 struct OperandInfoTy {
1676 SMLoc Loc;
1677 int64_t Val;
1678 bool IsSymbolic = false;
1679 bool IsDefined = false;
1680
1681 OperandInfoTy(int64_t Val) : Val(Val) {}
1682 };
1683
1684 struct StructuredOpField : OperandInfoTy {
1687 unsigned Width;
1688 bool IsDefined = false;
1689
1690 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1691 int64_t Default)
1692 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1693 virtual ~StructuredOpField() = default;
1694
1695 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1696 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1697 return false;
1698 }
1699
1700 virtual bool validate(AMDGPUAsmParser &Parser) const {
1701 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1702 return Error(Parser, "not supported on this GPU");
1703 if (!isUIntN(Width, Val))
1704 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1705 return true;
1706 }
1707 };
1708
1709 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1710 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1711
1712 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1713 bool validateSendMsg(const OperandInfoTy &Msg,
1714 const OperandInfoTy &Op,
1715 const OperandInfoTy &Stream);
1716
1717 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1718 OperandInfoTy &Width);
1719
1720 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1721 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1722 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1723
1724 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1725 const OperandVector &Operands) const;
1726 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1727 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1728 SMLoc getLitLoc(const OperandVector &Operands,
1729 bool SearchMandatoryLiterals = false) const;
1730 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1731 SMLoc getConstLoc(const OperandVector &Operands) const;
1732 SMLoc getInstLoc(const OperandVector &Operands) const;
1733
1734 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1735 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1736 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1737 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1738 bool validateSOPLiteral(const MCInst &Inst) const;
1739 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1740 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1741 const OperandVector &Operands);
1742 bool validateIntClampSupported(const MCInst &Inst);
1743 bool validateMIMGAtomicDMask(const MCInst &Inst);
1744 bool validateMIMGGatherDMask(const MCInst &Inst);
1745 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1746 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1747 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1748 bool validateMIMGD16(const MCInst &Inst);
1749 bool validateMIMGMSAA(const MCInst &Inst);
1750 bool validateOpSel(const MCInst &Inst);
1751 bool validateNeg(const MCInst &Inst, int OpName);
1752 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1753 bool validateVccOperand(unsigned Reg) const;
1754 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1755 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1756 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1757 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1758 bool validateAGPRLdSt(const MCInst &Inst) const;
1759 bool validateVGPRAlign(const MCInst &Inst) const;
1760 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1761 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1762 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1763 bool validateDivScale(const MCInst &Inst);
1764 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1765 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1766 const SMLoc &IDLoc);
1767 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1768 const unsigned CPol);
1769 bool validateExeczVcczOperands(const OperandVector &Operands);
1770 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1771 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1772 unsigned getConstantBusLimit(unsigned Opcode) const;
1773 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1774 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1775 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1776
1777 bool isSupportedMnemo(StringRef Mnemo,
1778 const FeatureBitset &FBS);
1779 bool isSupportedMnemo(StringRef Mnemo,
1780 const FeatureBitset &FBS,
1781 ArrayRef<unsigned> Variants);
1782 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1783
1784 bool isId(const StringRef Id) const;
1785 bool isId(const AsmToken &Token, const StringRef Id) const;
1786 bool isToken(const AsmToken::TokenKind Kind) const;
1787 StringRef getId() const;
1788 bool trySkipId(const StringRef Id);
1789 bool trySkipId(const StringRef Pref, const StringRef Id);
1790 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1791 bool trySkipToken(const AsmToken::TokenKind Kind);
1792 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1793 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1794 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1795
1796 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1797 AsmToken::TokenKind getTokenKind() const;
1798 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1800 StringRef getTokenStr() const;
1801 AsmToken peekToken(bool ShouldSkipSpace = true);
1802 AsmToken getToken() const;
1803 SMLoc getLoc() const;
1804 void lex();
1805
1806public:
1807 void onBeginOfFile() override;
1808 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1809
1810 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1811
1812 ParseStatus parseExpTgt(OperandVector &Operands);
1813 ParseStatus parseSendMsg(OperandVector &Operands);
1814 ParseStatus parseInterpSlot(OperandVector &Operands);
1815 ParseStatus parseInterpAttr(OperandVector &Operands);
1816 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1817 ParseStatus parseBoolReg(OperandVector &Operands);
1818
1819 bool parseSwizzleOperand(int64_t &Op,
1820 const unsigned MinVal,
1821 const unsigned MaxVal,
1822 const StringRef ErrMsg,
1823 SMLoc &Loc);
1824 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1825 const unsigned MinVal,
1826 const unsigned MaxVal,
1827 const StringRef ErrMsg);
1828 ParseStatus parseSwizzle(OperandVector &Operands);
1829 bool parseSwizzleOffset(int64_t &Imm);
1830 bool parseSwizzleMacro(int64_t &Imm);
1831 bool parseSwizzleQuadPerm(int64_t &Imm);
1832 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1833 bool parseSwizzleBroadcast(int64_t &Imm);
1834 bool parseSwizzleSwap(int64_t &Imm);
1835 bool parseSwizzleReverse(int64_t &Imm);
1836
1837 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1838 int64_t parseGPRIdxMacro();
1839
1840 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1841 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1842
1843 ParseStatus parseOModSI(OperandVector &Operands);
1844
1845 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1846 OptionalImmIndexMap &OptionalIdx);
1847 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1848 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1849 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1850 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1851
1852 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1853 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1854 OptionalImmIndexMap &OptionalIdx);
1855 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1856 OptionalImmIndexMap &OptionalIdx);
1857
1858 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1859 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1860
1861 bool parseDimId(unsigned &Encoding);
1863 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1865 ParseStatus parseDPPCtrl(OperandVector &Operands);
1866 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1867 int64_t parseDPPCtrlSel(StringRef Ctrl);
1868 int64_t parseDPPCtrlPerm();
1869 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1870 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1871 cvtDPP(Inst, Operands, true);
1872 }
1873 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1874 bool IsDPP8 = false);
1875 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1876 cvtVOP3DPP(Inst, Operands, true);
1877 }
1878
1879 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1880 AMDGPUOperand::ImmTy Type);
1881 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1882 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1883 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1884 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1885 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1886 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1887 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1888 uint64_t BasicInstType,
1889 bool SkipDstVcc = false,
1890 bool SkipSrcVcc = false);
1891
1892 ParseStatus parseEndpgm(OperandVector &Operands);
1893
1895};
1896
1897} // end anonymous namespace
1898
1899// May be called with integer type with equivalent bitwidth.
1900static const fltSemantics *getFltSemantics(unsigned Size) {
1901 switch (Size) {
1902 case 4:
1903 return &APFloat::IEEEsingle();
1904 case 8:
1905 return &APFloat::IEEEdouble();
1906 case 2:
1907 return &APFloat::IEEEhalf();
1908 default:
1909 llvm_unreachable("unsupported fp type");
1910 }
1911}
1912
1914 return getFltSemantics(VT.getSizeInBits() / 8);
1915}
1916
1918 switch (OperandType) {
1919 // When floating-point immediate is used as operand of type i16, the 32-bit
1920 // representation of the constant truncated to the 16 LSBs should be used.
1940 return &APFloat::IEEEsingle();
1946 return &APFloat::IEEEdouble();
1955 return &APFloat::IEEEhalf();
1963 return &APFloat::BFloat();
1964 default:
1965 llvm_unreachable("unsupported fp type");
1966 }
1967}
1968
1969//===----------------------------------------------------------------------===//
1970// Operand
1971//===----------------------------------------------------------------------===//
1972
1973static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1974 bool Lost;
1975
1976 // Convert literal to single precision
1978 APFloat::rmNearestTiesToEven,
1979 &Lost);
1980 // We allow precision lost but not overflow or underflow
1981 if (Status != APFloat::opOK &&
1982 Lost &&
1983 ((Status & APFloat::opOverflow) != 0 ||
1984 (Status & APFloat::opUnderflow) != 0)) {
1985 return false;
1986 }
1987
1988 return true;
1989}
1990
1991static bool isSafeTruncation(int64_t Val, unsigned Size) {
1992 return isUIntN(Size, Val) || isIntN(Size, Val);
1993}
1994
1995static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1996 if (VT.getScalarType() == MVT::i16)
1997 return isInlinableLiteral32(Val, HasInv2Pi);
1998
1999 if (VT.getScalarType() == MVT::f16)
2000 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2001
2002 assert(VT.getScalarType() == MVT::bf16);
2003
2004 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2005}
2006
2007bool AMDGPUOperand::isInlinableImm(MVT type) const {
2008
2009 // This is a hack to enable named inline values like
2010 // shared_base with both 32-bit and 64-bit operands.
2011 // Note that these values are defined as
2012 // 32-bit operands only.
2013 if (isInlineValue()) {
2014 return true;
2015 }
2016
2017 if (!isImmTy(ImmTyNone)) {
2018 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2019 return false;
2020 }
2021 // TODO: We should avoid using host float here. It would be better to
2022 // check the float bit values which is what a few other places do.
2023 // We've had bot failures before due to weird NaN support on mips hosts.
2024
2025 APInt Literal(64, Imm.Val);
2026
2027 if (Imm.IsFPImm) { // We got fp literal token
2028 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2030 AsmParser->hasInv2PiInlineImm());
2031 }
2032
2033 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2034 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2035 return false;
2036
2037 if (type.getScalarSizeInBits() == 16) {
2038 bool Lost = false;
2039 switch (type.getScalarType().SimpleTy) {
2040 default:
2041 llvm_unreachable("unknown 16-bit type");
2042 case MVT::bf16:
2043 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2044 &Lost);
2045 break;
2046 case MVT::f16:
2047 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2048 &Lost);
2049 break;
2050 case MVT::i16:
2051 FPLiteral.convert(APFloatBase::IEEEsingle(),
2052 APFloat::rmNearestTiesToEven, &Lost);
2053 break;
2054 }
2055 // We need to use 32-bit representation here because when a floating-point
2056 // inline constant is used as an i16 operand, its 32-bit representation
2057 // representation will be used. We will need the 32-bit value to check if
2058 // it is FP inline constant.
2059 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2060 return isInlineableLiteralOp16(ImmVal, type,
2061 AsmParser->hasInv2PiInlineImm());
2062 }
2063
2064 // Check if single precision literal is inlinable
2066 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2067 AsmParser->hasInv2PiInlineImm());
2068 }
2069
2070 // We got int literal token.
2071 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2073 AsmParser->hasInv2PiInlineImm());
2074 }
2075
2076 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2077 return false;
2078 }
2079
2080 if (type.getScalarSizeInBits() == 16) {
2082 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2083 type, AsmParser->hasInv2PiInlineImm());
2084 }
2085
2087 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2088 AsmParser->hasInv2PiInlineImm());
2089}
2090
2091bool AMDGPUOperand::isLiteralImm(MVT type) const {
2092 // Check that this immediate can be added as literal
2093 if (!isImmTy(ImmTyNone)) {
2094 return false;
2095 }
2096
2097 if (!Imm.IsFPImm) {
2098 // We got int literal token.
2099
2100 if (type == MVT::f64 && hasFPModifiers()) {
2101 // Cannot apply fp modifiers to int literals preserving the same semantics
2102 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2103 // disable these cases.
2104 return false;
2105 }
2106
2107 unsigned Size = type.getSizeInBits();
2108 if (Size == 64)
2109 Size = 32;
2110
2111 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2112 // types.
2113 return isSafeTruncation(Imm.Val, Size);
2114 }
2115
2116 // We got fp literal token
2117 if (type == MVT::f64) { // Expected 64-bit fp operand
2118 // We would set low 64-bits of literal to zeroes but we accept this literals
2119 return true;
2120 }
2121
2122 if (type == MVT::i64) { // Expected 64-bit int operand
2123 // We don't allow fp literals in 64-bit integer instructions. It is
2124 // unclear how we should encode them.
2125 return false;
2126 }
2127
2128 // We allow fp literals with f16x2 operands assuming that the specified
2129 // literal goes into the lower half and the upper half is zero. We also
2130 // require that the literal may be losslessly converted to f16.
2131 //
2132 // For i16x2 operands, we assume that the specified literal is encoded as a
2133 // single-precision float. This is pretty odd, but it matches SP3 and what
2134 // happens in hardware.
2135 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2136 : (type == MVT::v2i16) ? MVT::f32
2137 : (type == MVT::v2f32) ? MVT::f32
2138 : type;
2139
2140 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2141 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2142}
2143
2144bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2145 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2146}
2147
2148bool AMDGPUOperand::isVRegWithInputMods() const {
2149 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2150 // GFX90A allows DPP on 64-bit operands.
2151 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2152 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2153}
2154
2155template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2156 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2157 : AMDGPU::VGPR_16_Lo128RegClassID);
2158}
2159
2160bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2161 if (AsmParser->isVI())
2162 return isVReg32();
2163 if (AsmParser->isGFX9Plus())
2164 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2165 return false;
2166}
2167
2168bool AMDGPUOperand::isSDWAFP16Operand() const {
2169 return isSDWAOperand(MVT::f16);
2170}
2171
2172bool AMDGPUOperand::isSDWAFP32Operand() const {
2173 return isSDWAOperand(MVT::f32);
2174}
2175
2176bool AMDGPUOperand::isSDWAInt16Operand() const {
2177 return isSDWAOperand(MVT::i16);
2178}
2179
2180bool AMDGPUOperand::isSDWAInt32Operand() const {
2181 return isSDWAOperand(MVT::i32);
2182}
2183
2184bool AMDGPUOperand::isBoolReg() const {
2185 auto FB = AsmParser->getFeatureBits();
2186 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2187 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2188}
2189
2190uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2191{
2192 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2193 assert(Size == 2 || Size == 4 || Size == 8);
2194
2195 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2196
2197 if (Imm.Mods.Abs) {
2198 Val &= ~FpSignMask;
2199 }
2200 if (Imm.Mods.Neg) {
2201 Val ^= FpSignMask;
2202 }
2203
2204 return Val;
2205}
2206
2207void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2208 if (isExpr()) {
2210 return;
2211 }
2212
2213 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2214 Inst.getNumOperands())) {
2215 addLiteralImmOperand(Inst, Imm.Val,
2216 ApplyModifiers &
2217 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2218 } else {
2219 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2221 setImmKindNone();
2222 }
2223}
2224
2225void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2226 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2227 auto OpNum = Inst.getNumOperands();
2228 // Check that this operand accepts literals
2229 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2230
2231 if (ApplyModifiers) {
2232 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2233 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2234 Val = applyInputFPModifiers(Val, Size);
2235 }
2236
2237 APInt Literal(64, Val);
2238 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2239
2240 if (Imm.IsFPImm) { // We got fp literal token
2241 switch (OpTy) {
2247 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2248 AsmParser->hasInv2PiInlineImm())) {
2249 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2250 setImmKindConst();
2251 return;
2252 }
2253
2254 // Non-inlineable
2255 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2256 // For fp operands we check if low 32 bits are zeros
2257 if (Literal.getLoBits(32) != 0) {
2258 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2259 "Can't encode literal as exact 64-bit floating-point operand. "
2260 "Low 32-bits will be set to zero");
2261 Val &= 0xffffffff00000000u;
2262 }
2263
2265 setImmKindLiteral();
2266 return;
2267 }
2268
2269 // We don't allow fp literals in 64-bit integer instructions. It is
2270 // unclear how we should encode them. This case should be checked earlier
2271 // in predicate methods (isLiteralImm())
2272 llvm_unreachable("fp literal in 64-bit integer instruction.");
2273
2281 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2282 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2283 // loss of precision. The constant represents ideomatic fp32 value of
2284 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2285 // bits. Prevent rounding below.
2286 Inst.addOperand(MCOperand::createImm(0x3e22));
2287 setImmKindLiteral();
2288 return;
2289 }
2290 [[fallthrough]];
2291
2319 bool lost;
2320 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2321 // Convert literal to single precision
2322 FPLiteral.convert(*getOpFltSemantics(OpTy),
2323 APFloat::rmNearestTiesToEven, &lost);
2324 // We allow precision lost but not overflow or underflow. This should be
2325 // checked earlier in isLiteralImm()
2326
2327 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2328 Inst.addOperand(MCOperand::createImm(ImmVal));
2329 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2330 setImmKindMandatoryLiteral();
2331 } else {
2332 setImmKindLiteral();
2333 }
2334 return;
2335 }
2336 default:
2337 llvm_unreachable("invalid operand size");
2338 }
2339
2340 return;
2341 }
2342
2343 // We got int literal token.
2344 // Only sign extend inline immediates.
2345 switch (OpTy) {
2361 if (isSafeTruncation(Val, 32) &&
2362 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2363 AsmParser->hasInv2PiInlineImm())) {
2365 setImmKindConst();
2366 return;
2367 }
2368
2369 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2370 setImmKindLiteral();
2371 return;
2372
2378 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2380 setImmKindConst();
2381 return;
2382 }
2383
2384 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2385 : Lo_32(Val);
2386
2388 setImmKindLiteral();
2389 return;
2390
2394 if (isSafeTruncation(Val, 16) &&
2395 AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2396 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2397 setImmKindConst();
2398 return;
2399 }
2400
2401 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2402 setImmKindLiteral();
2403 return;
2404
2409 if (isSafeTruncation(Val, 16) &&
2410 AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2411 AsmParser->hasInv2PiInlineImm())) {
2413 setImmKindConst();
2414 return;
2415 }
2416
2417 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2418 setImmKindLiteral();
2419 return;
2420
2425 if (isSafeTruncation(Val, 16) &&
2426 AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2427 AsmParser->hasInv2PiInlineImm())) {
2429 setImmKindConst();
2430 return;
2431 }
2432
2433 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2434 setImmKindLiteral();
2435 return;
2436
2439 assert(isSafeTruncation(Val, 16));
2440 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2442 return;
2443 }
2446 assert(isSafeTruncation(Val, 16));
2447 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2448 AsmParser->hasInv2PiInlineImm()));
2449
2451 return;
2452 }
2453
2456 assert(isSafeTruncation(Val, 16));
2457 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2458 AsmParser->hasInv2PiInlineImm()));
2459
2461 return;
2462 }
2463
2465 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2466 setImmKindMandatoryLiteral();
2467 return;
2469 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2470 setImmKindMandatoryLiteral();
2471 return;
2472 default:
2473 llvm_unreachable("invalid operand size");
2474 }
2475}
2476
2477void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2478 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2479}
2480
2481bool AMDGPUOperand::isInlineValue() const {
2482 return isRegKind() && ::isInlineValue(getReg());
2483}
2484
2485//===----------------------------------------------------------------------===//
2486// AsmParser
2487//===----------------------------------------------------------------------===//
2488
2489void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2490 // TODO: make those pre-defined variables read-only.
2491 // Currently there is none suitable machinery in the core llvm-mc for this.
2492 // MCSymbol::isRedefinable is intended for another purpose, and
2493 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2494 MCContext &Ctx = getContext();
2495 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2496 Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
2497}
2498
2499static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2500 if (Is == IS_VGPR) {
2501 switch (RegWidth) {
2502 default: return -1;
2503 case 32:
2504 return AMDGPU::VGPR_32RegClassID;
2505 case 64:
2506 return AMDGPU::VReg_64RegClassID;
2507 case 96:
2508 return AMDGPU::VReg_96RegClassID;
2509 case 128:
2510 return AMDGPU::VReg_128RegClassID;
2511 case 160:
2512 return AMDGPU::VReg_160RegClassID;
2513 case 192:
2514 return AMDGPU::VReg_192RegClassID;
2515 case 224:
2516 return AMDGPU::VReg_224RegClassID;
2517 case 256:
2518 return AMDGPU::VReg_256RegClassID;
2519 case 288:
2520 return AMDGPU::VReg_288RegClassID;
2521 case 320:
2522 return AMDGPU::VReg_320RegClassID;
2523 case 352:
2524 return AMDGPU::VReg_352RegClassID;
2525 case 384:
2526 return AMDGPU::VReg_384RegClassID;
2527 case 512:
2528 return AMDGPU::VReg_512RegClassID;
2529 case 1024:
2530 return AMDGPU::VReg_1024RegClassID;
2531 }
2532 } else if (Is == IS_TTMP) {
2533 switch (RegWidth) {
2534 default: return -1;
2535 case 32:
2536 return AMDGPU::TTMP_32RegClassID;
2537 case 64:
2538 return AMDGPU::TTMP_64RegClassID;
2539 case 128:
2540 return AMDGPU::TTMP_128RegClassID;
2541 case 256:
2542 return AMDGPU::TTMP_256RegClassID;
2543 case 512:
2544 return AMDGPU::TTMP_512RegClassID;
2545 }
2546 } else if (Is == IS_SGPR) {
2547 switch (RegWidth) {
2548 default: return -1;
2549 case 32:
2550 return AMDGPU::SGPR_32RegClassID;
2551 case 64:
2552 return AMDGPU::SGPR_64RegClassID;
2553 case 96:
2554 return AMDGPU::SGPR_96RegClassID;
2555 case 128:
2556 return AMDGPU::SGPR_128RegClassID;
2557 case 160:
2558 return AMDGPU::SGPR_160RegClassID;
2559 case 192:
2560 return AMDGPU::SGPR_192RegClassID;
2561 case 224:
2562 return AMDGPU::SGPR_224RegClassID;
2563 case 256:
2564 return AMDGPU::SGPR_256RegClassID;
2565 case 288:
2566 return AMDGPU::SGPR_288RegClassID;
2567 case 320:
2568 return AMDGPU::SGPR_320RegClassID;
2569 case 352:
2570 return AMDGPU::SGPR_352RegClassID;
2571 case 384:
2572 return AMDGPU::SGPR_384RegClassID;
2573 case 512:
2574 return AMDGPU::SGPR_512RegClassID;
2575 }
2576 } else if (Is == IS_AGPR) {
2577 switch (RegWidth) {
2578 default: return -1;
2579 case 32:
2580 return AMDGPU::AGPR_32RegClassID;
2581 case 64:
2582 return AMDGPU::AReg_64RegClassID;
2583 case 96:
2584 return AMDGPU::AReg_96RegClassID;
2585 case 128:
2586 return AMDGPU::AReg_128RegClassID;
2587 case 160:
2588 return AMDGPU::AReg_160RegClassID;
2589 case 192:
2590 return AMDGPU::AReg_192RegClassID;
2591 case 224:
2592 return AMDGPU::AReg_224RegClassID;
2593 case 256:
2594 return AMDGPU::AReg_256RegClassID;
2595 case 288:
2596 return AMDGPU::AReg_288RegClassID;
2597 case 320:
2598 return AMDGPU::AReg_320RegClassID;
2599 case 352:
2600 return AMDGPU::AReg_352RegClassID;
2601 case 384:
2602 return AMDGPU::AReg_384RegClassID;
2603 case 512:
2604 return AMDGPU::AReg_512RegClassID;
2605 case 1024:
2606 return AMDGPU::AReg_1024RegClassID;
2607 }
2608 }
2609 return -1;
2610}
2611
2614 .Case("exec", AMDGPU::EXEC)
2615 .Case("vcc", AMDGPU::VCC)
2616 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2617 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2618 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2619 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2620 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2621 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2622 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2623 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2624 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2625 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2626 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2627 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2628 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2629 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2630 .Case("m0", AMDGPU::M0)
2631 .Case("vccz", AMDGPU::SRC_VCCZ)
2632 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2633 .Case("execz", AMDGPU::SRC_EXECZ)
2634 .Case("src_execz", AMDGPU::SRC_EXECZ)
2635 .Case("scc", AMDGPU::SRC_SCC)
2636 .Case("src_scc", AMDGPU::SRC_SCC)
2637 .Case("tba", AMDGPU::TBA)
2638 .Case("tma", AMDGPU::TMA)
2639 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2640 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2641 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2642 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2643 .Case("vcc_lo", AMDGPU::VCC_LO)
2644 .Case("vcc_hi", AMDGPU::VCC_HI)
2645 .Case("exec_lo", AMDGPU::EXEC_LO)
2646 .Case("exec_hi", AMDGPU::EXEC_HI)
2647 .Case("tma_lo", AMDGPU::TMA_LO)
2648 .Case("tma_hi", AMDGPU::TMA_HI)
2649 .Case("tba_lo", AMDGPU::TBA_LO)
2650 .Case("tba_hi", AMDGPU::TBA_HI)
2651 .Case("pc", AMDGPU::PC_REG)
2652 .Case("null", AMDGPU::SGPR_NULL)
2653 .Default(AMDGPU::NoRegister);
2654}
2655
2656bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2657 SMLoc &EndLoc, bool RestoreOnFailure) {
2658 auto R = parseRegister();
2659 if (!R) return true;
2660 assert(R->isReg());
2661 RegNo = R->getReg();
2662 StartLoc = R->getStartLoc();
2663 EndLoc = R->getEndLoc();
2664 return false;
2665}
2666
2667bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2668 SMLoc &EndLoc) {
2669 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2670}
2671
2672ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2673 SMLoc &EndLoc) {
2674 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2675 bool PendingErrors = getParser().hasPendingError();
2676 getParser().clearPendingErrors();
2677 if (PendingErrors)
2678 return ParseStatus::Failure;
2679 if (Result)
2680 return ParseStatus::NoMatch;
2681 return ParseStatus::Success;
2682}
2683
2684bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2685 RegisterKind RegKind, unsigned Reg1,
2686 SMLoc Loc) {
2687 switch (RegKind) {
2688 case IS_SPECIAL:
2689 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2690 Reg = AMDGPU::EXEC;
2691 RegWidth = 64;
2692 return true;
2693 }
2694 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2695 Reg = AMDGPU::FLAT_SCR;
2696 RegWidth = 64;
2697 return true;
2698 }
2699 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2700 Reg = AMDGPU::XNACK_MASK;
2701 RegWidth = 64;
2702 return true;
2703 }
2704 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2705 Reg = AMDGPU::VCC;
2706 RegWidth = 64;
2707 return true;
2708 }
2709 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2710 Reg = AMDGPU::TBA;
2711 RegWidth = 64;
2712 return true;
2713 }
2714 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2715 Reg = AMDGPU::TMA;
2716 RegWidth = 64;
2717 return true;
2718 }
2719 Error(Loc, "register does not fit in the list");
2720 return false;
2721 case IS_VGPR:
2722 case IS_SGPR:
2723 case IS_AGPR:
2724 case IS_TTMP:
2725 if (Reg1 != Reg + RegWidth / 32) {
2726 Error(Loc, "registers in a list must have consecutive indices");
2727 return false;
2728 }
2729 RegWidth += 32;
2730 return true;
2731 default:
2732 llvm_unreachable("unexpected register kind");
2733 }
2734}
2735
2736struct RegInfo {
2738 RegisterKind Kind;
2739};
2740
2741static constexpr RegInfo RegularRegisters[] = {
2742 {{"v"}, IS_VGPR},
2743 {{"s"}, IS_SGPR},
2744 {{"ttmp"}, IS_TTMP},
2745 {{"acc"}, IS_AGPR},
2746 {{"a"}, IS_AGPR},
2747};
2748
2749static bool isRegularReg(RegisterKind Kind) {
2750 return Kind == IS_VGPR ||
2751 Kind == IS_SGPR ||
2752 Kind == IS_TTMP ||
2753 Kind == IS_AGPR;
2754}
2755
2757 for (const RegInfo &Reg : RegularRegisters)
2758 if (Str.starts_with(Reg.Name))
2759 return &Reg;
2760 return nullptr;
2761}
2762
2763static bool getRegNum(StringRef Str, unsigned& Num) {
2764 return !Str.getAsInteger(10, Num);
2765}
2766
2767bool
2768AMDGPUAsmParser::isRegister(const AsmToken &Token,
2769 const AsmToken &NextToken) const {
2770
2771 // A list of consecutive registers: [s0,s1,s2,s3]
2772 if (Token.is(AsmToken::LBrac))
2773 return true;
2774
2775 if (!Token.is(AsmToken::Identifier))
2776 return false;
2777
2778 // A single register like s0 or a range of registers like s[0:1]
2779
2780 StringRef Str = Token.getString();
2781 const RegInfo *Reg = getRegularRegInfo(Str);
2782 if (Reg) {
2783 StringRef RegName = Reg->Name;
2784 StringRef RegSuffix = Str.substr(RegName.size());
2785 if (!RegSuffix.empty()) {
2786 RegSuffix.consume_back(".l");
2787 RegSuffix.consume_back(".h");
2788 unsigned Num;
2789 // A single register with an index: rXX
2790 if (getRegNum(RegSuffix, Num))
2791 return true;
2792 } else {
2793 // A range of registers: r[XX:YY].
2794 if (NextToken.is(AsmToken::LBrac))
2795 return true;
2796 }
2797 }
2798
2799 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2800}
2801
2802bool
2803AMDGPUAsmParser::isRegister()
2804{
2805 return isRegister(getToken(), peekToken());
2806}
2807
2808unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2809 unsigned SubReg, unsigned RegWidth,
2810 SMLoc Loc) {
2811 assert(isRegularReg(RegKind));
2812
2813 unsigned AlignSize = 1;
2814 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2815 // SGPR and TTMP registers must be aligned.
2816 // Max required alignment is 4 dwords.
2817 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2818 }
2819
2820 if (RegNum % AlignSize != 0) {
2821 Error(Loc, "invalid register alignment");
2822 return AMDGPU::NoRegister;
2823 }
2824
2825 unsigned RegIdx = RegNum / AlignSize;
2826 int RCID = getRegClass(RegKind, RegWidth);
2827 if (RCID == -1) {
2828 Error(Loc, "invalid or unsupported register size");
2829 return AMDGPU::NoRegister;
2830 }
2831
2832 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2833 const MCRegisterClass RC = TRI->getRegClass(RCID);
2834 if (RegIdx >= RC.getNumRegs()) {
2835 Error(Loc, "register index is out of range");
2836 return AMDGPU::NoRegister;
2837 }
2838
2839 unsigned Reg = RC.getRegister(RegIdx);
2840
2841 if (SubReg) {
2842 Reg = TRI->getSubReg(Reg, SubReg);
2843
2844 // Currently all regular registers have their .l and .h subregisters, so
2845 // we should never need to generate an error here.
2846 assert(Reg && "Invalid subregister!");
2847 }
2848
2849 return Reg;
2850}
2851
2852bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2853 int64_t RegLo, RegHi;
2854 if (!skipToken(AsmToken::LBrac, "missing register index"))
2855 return false;
2856
2857 SMLoc FirstIdxLoc = getLoc();
2858 SMLoc SecondIdxLoc;
2859
2860 if (!parseExpr(RegLo))
2861 return false;
2862
2863 if (trySkipToken(AsmToken::Colon)) {
2864 SecondIdxLoc = getLoc();
2865 if (!parseExpr(RegHi))
2866 return false;
2867 } else {
2868 RegHi = RegLo;
2869 }
2870
2871 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2872 return false;
2873
2874 if (!isUInt<32>(RegLo)) {
2875 Error(FirstIdxLoc, "invalid register index");
2876 return false;
2877 }
2878
2879 if (!isUInt<32>(RegHi)) {
2880 Error(SecondIdxLoc, "invalid register index");
2881 return false;
2882 }
2883
2884 if (RegLo > RegHi) {
2885 Error(FirstIdxLoc, "first register index should not exceed second index");
2886 return false;
2887 }
2888
2889 Num = static_cast<unsigned>(RegLo);
2890 RegWidth = 32 * ((RegHi - RegLo) + 1);
2891 return true;
2892}
2893
2894unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2895 unsigned &RegNum, unsigned &RegWidth,
2896 SmallVectorImpl<AsmToken> &Tokens) {
2897 assert(isToken(AsmToken::Identifier));
2898 unsigned Reg = getSpecialRegForName(getTokenStr());
2899 if (Reg) {
2900 RegNum = 0;
2901 RegWidth = 32;
2902 RegKind = IS_SPECIAL;
2903 Tokens.push_back(getToken());
2904 lex(); // skip register name
2905 }
2906 return Reg;
2907}
2908
2909unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2910 unsigned &RegNum, unsigned &RegWidth,
2911 SmallVectorImpl<AsmToken> &Tokens) {
2912 assert(isToken(AsmToken::Identifier));
2913 StringRef RegName = getTokenStr();
2914 auto Loc = getLoc();
2915
2916 const RegInfo *RI = getRegularRegInfo(RegName);
2917 if (!RI) {
2918 Error(Loc, "invalid register name");
2919 return AMDGPU::NoRegister;
2920 }
2921
2922 Tokens.push_back(getToken());
2923 lex(); // skip register name
2924
2925 RegKind = RI->Kind;
2926 StringRef RegSuffix = RegName.substr(RI->Name.size());
2927 unsigned SubReg = NoSubRegister;
2928 if (!RegSuffix.empty()) {
2929 // We don't know the opcode till we are done parsing, so we don't know if
2930 // registers should be 16 or 32 bit. It is therefore mandatory to put .l or
2931 // .h to correctly specify 16 bit registers. We also can't determine class
2932 // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16.
2933 if (RegSuffix.consume_back(".l"))
2934 SubReg = AMDGPU::lo16;
2935 else if (RegSuffix.consume_back(".h"))
2936 SubReg = AMDGPU::hi16;
2937
2938 // Single 32-bit register: vXX.
2939 if (!getRegNum(RegSuffix, RegNum)) {
2940 Error(Loc, "invalid register index");
2941 return AMDGPU::NoRegister;
2942 }
2943 RegWidth = 32;
2944 } else {
2945 // Range of registers: v[XX:YY]. ":YY" is optional.
2946 if (!ParseRegRange(RegNum, RegWidth))
2947 return AMDGPU::NoRegister;
2948 }
2949
2950 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2951}
2952
2953unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2954 unsigned &RegWidth,
2955 SmallVectorImpl<AsmToken> &Tokens) {
2956 unsigned Reg = AMDGPU::NoRegister;
2957 auto ListLoc = getLoc();
2958
2959 if (!skipToken(AsmToken::LBrac,
2960 "expected a register or a list of registers")) {
2961 return AMDGPU::NoRegister;
2962 }
2963
2964 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2965
2966 auto Loc = getLoc();
2967 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2968 return AMDGPU::NoRegister;
2969 if (RegWidth != 32) {
2970 Error(Loc, "expected a single 32-bit register");
2971 return AMDGPU::NoRegister;
2972 }
2973
2974 for (; trySkipToken(AsmToken::Comma); ) {
2975 RegisterKind NextRegKind;
2976 unsigned NextReg, NextRegNum, NextRegWidth;
2977 Loc = getLoc();
2978
2979 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2980 NextRegNum, NextRegWidth,
2981 Tokens)) {
2982 return AMDGPU::NoRegister;
2983 }
2984 if (NextRegWidth != 32) {
2985 Error(Loc, "expected a single 32-bit register");
2986 return AMDGPU::NoRegister;
2987 }
2988 if (NextRegKind != RegKind) {
2989 Error(Loc, "registers in a list must be of the same kind");
2990 return AMDGPU::NoRegister;
2991 }
2992 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2993 return AMDGPU::NoRegister;
2994 }
2995
2996 if (!skipToken(AsmToken::RBrac,
2997 "expected a comma or a closing square bracket")) {
2998 return AMDGPU::NoRegister;
2999 }
3000
3001 if (isRegularReg(RegKind))
3002 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3003
3004 return Reg;
3005}
3006
3007bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3008 unsigned &RegNum, unsigned &RegWidth,
3009 SmallVectorImpl<AsmToken> &Tokens) {
3010 auto Loc = getLoc();
3011 Reg = AMDGPU::NoRegister;
3012
3013 if (isToken(AsmToken::Identifier)) {
3014 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3015 if (Reg == AMDGPU::NoRegister)
3016 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3017 } else {
3018 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3019 }
3020
3021 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3022 if (Reg == AMDGPU::NoRegister) {
3023 assert(Parser.hasPendingError());
3024 return false;
3025 }
3026
3027 if (!subtargetHasRegister(*TRI, Reg)) {
3028 if (Reg == AMDGPU::SGPR_NULL) {
3029 Error(Loc, "'null' operand is not supported on this GPU");
3030 } else {
3031 Error(Loc, "register not available on this GPU");
3032 }
3033 return false;
3034 }
3035
3036 return true;
3037}
3038
3039bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3040 unsigned &RegNum, unsigned &RegWidth,
3041 bool RestoreOnFailure /*=false*/) {
3042 Reg = AMDGPU::NoRegister;
3043
3045 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3046 if (RestoreOnFailure) {
3047 while (!Tokens.empty()) {
3048 getLexer().UnLex(Tokens.pop_back_val());
3049 }
3050 }
3051 return true;
3052 }
3053 return false;
3054}
3055
3056std::optional<StringRef>
3057AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3058 switch (RegKind) {
3059 case IS_VGPR:
3060 return StringRef(".amdgcn.next_free_vgpr");
3061 case IS_SGPR:
3062 return StringRef(".amdgcn.next_free_sgpr");
3063 default:
3064 return std::nullopt;
3065 }
3066}
3067
3068void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3069 auto SymbolName = getGprCountSymbolName(RegKind);
3070 assert(SymbolName && "initializing invalid register kind");
3071 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3072 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
3073}
3074
3075bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3076 unsigned DwordRegIndex,
3077 unsigned RegWidth) {
3078 // Symbols are only defined for GCN targets
3079 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3080 return true;
3081
3082 auto SymbolName = getGprCountSymbolName(RegKind);
3083 if (!SymbolName)
3084 return true;
3085 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3086
3087 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3088 int64_t OldCount;
3089
3090 if (!Sym->isVariable())
3091 return !Error(getLoc(),
3092 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3093 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
3094 return !Error(
3095 getLoc(),
3096 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3097
3098 if (OldCount <= NewMax)
3099 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
3100
3101 return true;
3102}
3103
3104std::unique_ptr<AMDGPUOperand>
3105AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3106 const auto &Tok = getToken();
3107 SMLoc StartLoc = Tok.getLoc();
3108 SMLoc EndLoc = Tok.getEndLoc();
3109 RegisterKind RegKind;
3110 unsigned Reg, RegNum, RegWidth;
3111
3112 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3113 return nullptr;
3114 }
3115 if (isHsaAbi(getSTI())) {
3116 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3117 return nullptr;
3118 } else
3119 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3120 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3121}
3122
3123ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3124 bool HasSP3AbsModifier, bool HasLit) {
3125 // TODO: add syntactic sugar for 1/(2*PI)
3126
3127 if (isRegister())
3128 return ParseStatus::NoMatch;
3129 assert(!isModifier());
3130
3131 if (!HasLit) {
3132 HasLit = trySkipId("lit");
3133 if (HasLit) {
3134 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3135 return ParseStatus::Failure;
3136 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
3137 if (S.isSuccess() &&
3138 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3139 return ParseStatus::Failure;
3140 return S;
3141 }
3142 }
3143
3144 const auto& Tok = getToken();
3145 const auto& NextTok = peekToken();
3146 bool IsReal = Tok.is(AsmToken::Real);
3147 SMLoc S = getLoc();
3148 bool Negate = false;
3149
3150 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3151 lex();
3152 IsReal = true;
3153 Negate = true;
3154 }
3155
3156 AMDGPUOperand::Modifiers Mods;
3157 Mods.Lit = HasLit;
3158
3159 if (IsReal) {
3160 // Floating-point expressions are not supported.
3161 // Can only allow floating-point literals with an
3162 // optional sign.
3163
3164 StringRef Num = getTokenStr();
3165 lex();
3166
3167 APFloat RealVal(APFloat::IEEEdouble());
3168 auto roundMode = APFloat::rmNearestTiesToEven;
3169 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3170 return ParseStatus::Failure;
3171 if (Negate)
3172 RealVal.changeSign();
3173
3174 Operands.push_back(
3175 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3176 AMDGPUOperand::ImmTyNone, true));
3177 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3178 Op.setModifiers(Mods);
3179
3180 return ParseStatus::Success;
3181
3182 } else {
3183 int64_t IntVal;
3184 const MCExpr *Expr;
3185 SMLoc S = getLoc();
3186
3187 if (HasSP3AbsModifier) {
3188 // This is a workaround for handling expressions
3189 // as arguments of SP3 'abs' modifier, for example:
3190 // |1.0|
3191 // |-1|
3192 // |1+x|
3193 // This syntax is not compatible with syntax of standard
3194 // MC expressions (due to the trailing '|').
3195 SMLoc EndLoc;
3196 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3197 return ParseStatus::Failure;
3198 } else {
3199 if (Parser.parseExpression(Expr))
3200 return ParseStatus::Failure;
3201 }
3202
3203 if (Expr->evaluateAsAbsolute(IntVal)) {
3204 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3205 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3206 Op.setModifiers(Mods);
3207 } else {
3208 if (HasLit)
3209 return ParseStatus::NoMatch;
3210 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3211 }
3212
3213 return ParseStatus::Success;
3214 }
3215
3216 return ParseStatus::NoMatch;
3217}
3218
3219ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3220 if (!isRegister())
3221 return ParseStatus::NoMatch;
3222
3223 if (auto R = parseRegister()) {
3224 assert(R->isReg());
3225 Operands.push_back(std::move(R));
3226 return ParseStatus::Success;
3227 }
3228 return ParseStatus::Failure;
3229}
3230
3231ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3232 bool HasSP3AbsMod, bool HasLit) {
3233 ParseStatus Res = parseReg(Operands);
3234 if (!Res.isNoMatch())
3235 return Res;
3236 if (isModifier())
3237 return ParseStatus::NoMatch;
3238 return parseImm(Operands, HasSP3AbsMod, HasLit);
3239}
3240
3241bool
3242AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3243 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3244 const auto &str = Token.getString();
3245 return str == "abs" || str == "neg" || str == "sext";
3246 }
3247 return false;
3248}
3249
3250bool
3251AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3252 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3253}
3254
3255bool
3256AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3257 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3258}
3259
3260bool
3261AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3262 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3263}
3264
3265// Check if this is an operand modifier or an opcode modifier
3266// which may look like an expression but it is not. We should
3267// avoid parsing these modifiers as expressions. Currently
3268// recognized sequences are:
3269// |...|
3270// abs(...)
3271// neg(...)
3272// sext(...)
3273// -reg
3274// -|...|
3275// -abs(...)
3276// name:...
3277//
3278bool
3279AMDGPUAsmParser::isModifier() {
3280
3281 AsmToken Tok = getToken();
3282 AsmToken NextToken[2];
3283 peekTokens(NextToken);
3284
3285 return isOperandModifier(Tok, NextToken[0]) ||
3286 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3287 isOpcodeModifierWithVal(Tok, NextToken[0]);
3288}
3289
3290// Check if the current token is an SP3 'neg' modifier.
3291// Currently this modifier is allowed in the following context:
3292//
3293// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3294// 2. Before an 'abs' modifier: -abs(...)
3295// 3. Before an SP3 'abs' modifier: -|...|
3296//
3297// In all other cases "-" is handled as a part
3298// of an expression that follows the sign.
3299//
3300// Note: When "-" is followed by an integer literal,
3301// this is interpreted as integer negation rather
3302// than a floating-point NEG modifier applied to N.
3303// Beside being contr-intuitive, such use of floating-point
3304// NEG modifier would have resulted in different meaning
3305// of integer literals used with VOP1/2/C and VOP3,
3306// for example:
3307// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3308// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3309// Negative fp literals with preceding "-" are
3310// handled likewise for uniformity
3311//
3312bool
3313AMDGPUAsmParser::parseSP3NegModifier() {
3314
3315 AsmToken NextToken[2];
3316 peekTokens(NextToken);
3317
3318 if (isToken(AsmToken::Minus) &&
3319 (isRegister(NextToken[0], NextToken[1]) ||
3320 NextToken[0].is(AsmToken::Pipe) ||
3321 isId(NextToken[0], "abs"))) {
3322 lex();
3323 return true;
3324 }
3325
3326 return false;
3327}
3328
3330AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3331 bool AllowImm) {
3332 bool Neg, SP3Neg;
3333 bool Abs, SP3Abs;
3334 bool Lit;
3335 SMLoc Loc;
3336
3337 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3338 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3339 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3340
3341 SP3Neg = parseSP3NegModifier();
3342
3343 Loc = getLoc();
3344 Neg = trySkipId("neg");
3345 if (Neg && SP3Neg)
3346 return Error(Loc, "expected register or immediate");
3347 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3348 return ParseStatus::Failure;
3349
3350 Abs = trySkipId("abs");
3351 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3352 return ParseStatus::Failure;
3353
3354 Lit = trySkipId("lit");
3355 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3356 return ParseStatus::Failure;
3357
3358 Loc = getLoc();
3359 SP3Abs = trySkipToken(AsmToken::Pipe);
3360 if (Abs && SP3Abs)
3361 return Error(Loc, "expected register or immediate");
3362
3363 ParseStatus Res;
3364 if (AllowImm) {
3365 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3366 } else {
3367 Res = parseReg(Operands);
3368 }
3369 if (!Res.isSuccess())
3370 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3371
3372 if (Lit && !Operands.back()->isImm())
3373 Error(Loc, "expected immediate with lit modifier");
3374
3375 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3376 return ParseStatus::Failure;
3377 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3378 return ParseStatus::Failure;
3379 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3380 return ParseStatus::Failure;
3381 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3382 return ParseStatus::Failure;
3383
3384 AMDGPUOperand::Modifiers Mods;
3385 Mods.Abs = Abs || SP3Abs;
3386 Mods.Neg = Neg || SP3Neg;
3387 Mods.Lit = Lit;
3388
3389 if (Mods.hasFPModifiers() || Lit) {
3390 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3391 if (Op.isExpr())
3392 return Error(Op.getStartLoc(), "expected an absolute expression");
3393 Op.setModifiers(Mods);
3394 }
3395 return ParseStatus::Success;
3396}
3397
3399AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3400 bool AllowImm) {
3401 bool Sext = trySkipId("sext");
3402 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3403 return ParseStatus::Failure;
3404
3405 ParseStatus Res;
3406 if (AllowImm) {
3407 Res = parseRegOrImm(Operands);
3408 } else {
3409 Res = parseReg(Operands);
3410 }
3411 if (!Res.isSuccess())
3412 return Sext ? ParseStatus::Failure : Res;
3413
3414 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3415 return ParseStatus::Failure;
3416
3417 AMDGPUOperand::Modifiers Mods;
3418 Mods.Sext = Sext;
3419
3420 if (Mods.hasIntModifiers()) {
3421 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3422 if (Op.isExpr())
3423 return Error(Op.getStartLoc(), "expected an absolute expression");
3424 Op.setModifiers(Mods);
3425 }
3426
3427 return ParseStatus::Success;
3428}
3429
3430ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3431 return parseRegOrImmWithFPInputMods(Operands, false);
3432}
3433
3434ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3435 return parseRegOrImmWithIntInputMods(Operands, false);
3436}
3437
3438ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3439 auto Loc = getLoc();
3440 if (trySkipId("off")) {
3441 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3442 AMDGPUOperand::ImmTyOff, false));
3443 return ParseStatus::Success;
3444 }
3445
3446 if (!isRegister())
3447 return ParseStatus::NoMatch;
3448
3449 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3450 if (Reg) {
3451 Operands.push_back(std::move(Reg));
3452 return ParseStatus::Success;
3453 }
3454
3455 return ParseStatus::Failure;
3456}
3457
3458unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3459 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3460
3461 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3462 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3463 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3464 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3465 return Match_InvalidOperand;
3466
3467 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3468 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3469 // v_mac_f32/16 allow only dst_sel == DWORD;
3470 auto OpNum =
3471 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3472 const auto &Op = Inst.getOperand(OpNum);
3473 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3474 return Match_InvalidOperand;
3475 }
3476 }
3477
3478 return Match_Success;
3479}
3480
3482 static const unsigned Variants[] = {
3486 };
3487
3488 return ArrayRef(Variants);
3489}
3490
3491// What asm variants we should check
3492ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3493 if (isForcedDPP() && isForcedVOP3()) {
3494 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3495 return ArrayRef(Variants);
3496 }
3497 if (getForcedEncodingSize() == 32) {
3498 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3499 return ArrayRef(Variants);
3500 }
3501
3502 if (isForcedVOP3()) {
3503 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3504 return ArrayRef(Variants);
3505 }
3506
3507 if (isForcedSDWA()) {
3508 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3510 return ArrayRef(Variants);
3511 }
3512
3513 if (isForcedDPP()) {
3514 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3515 return ArrayRef(Variants);
3516 }
3517
3518 return getAllVariants();
3519}
3520
3521StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3522 if (isForcedDPP() && isForcedVOP3())
3523 return "e64_dpp";
3524
3525 if (getForcedEncodingSize() == 32)
3526 return "e32";
3527
3528 if (isForcedVOP3())
3529 return "e64";
3530
3531 if (isForcedSDWA())
3532 return "sdwa";
3533
3534 if (isForcedDPP())
3535 return "dpp";
3536
3537 return "";
3538}
3539
3540unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3541 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3542 for (MCPhysReg Reg : Desc.implicit_uses()) {
3543 switch (Reg) {
3544 case AMDGPU::FLAT_SCR:
3545 case AMDGPU::VCC:
3546 case AMDGPU::VCC_LO:
3547 case AMDGPU::VCC_HI:
3548 case AMDGPU::M0:
3549 return Reg;
3550 default:
3551 break;
3552 }
3553 }
3554 return AMDGPU::NoRegister;
3555}
3556
3557// NB: This code is correct only when used to check constant
3558// bus limitations because GFX7 support no f16 inline constants.
3559// Note that there are no cases when a GFX7 opcode violates
3560// constant bus limitations due to the use of an f16 constant.
3561bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3562 unsigned OpIdx) const {
3563 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3564
3565 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3566 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3567 return false;
3568 }
3569
3570 const MCOperand &MO = Inst.getOperand(OpIdx);
3571
3572 int64_t Val = MO.getImm();
3573 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3574
3575 switch (OpSize) { // expected operand size
3576 case 8:
3577 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3578 case 4:
3579 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3580 case 2: {
3581 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3585 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3586
3591
3596
3601
3606 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3607
3612 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3613
3614 llvm_unreachable("invalid operand type");
3615 }
3616 default:
3617 llvm_unreachable("invalid operand size");
3618 }
3619}
3620
3621unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3622 if (!isGFX10Plus())
3623 return 1;
3624
3625 switch (Opcode) {
3626 // 64-bit shift instructions can use only one scalar value input
3627 case AMDGPU::V_LSHLREV_B64_e64:
3628 case AMDGPU::V_LSHLREV_B64_gfx10:
3629 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3630 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3631 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3632 case AMDGPU::V_LSHRREV_B64_e64:
3633 case AMDGPU::V_LSHRREV_B64_gfx10:
3634 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3635 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3636 case AMDGPU::V_ASHRREV_I64_e64:
3637 case AMDGPU::V_ASHRREV_I64_gfx10:
3638 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3639 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3640 case AMDGPU::V_LSHL_B64_e64:
3641 case AMDGPU::V_LSHR_B64_e64:
3642 case AMDGPU::V_ASHR_I64_e64:
3643 return 1;
3644 default:
3645 return 2;
3646 }
3647}
3648
3649constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3651
3652// Get regular operand indices in the same order as specified
3653// in the instruction (but append mandatory literals to the end).
3655 bool AddMandatoryLiterals = false) {
3656
3657 int16_t ImmIdx =
3658 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3659
3660 if (isVOPD(Opcode)) {
3661 int16_t ImmDeferredIdx =
3662 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3663 : -1;
3664
3665 return {getNamedOperandIdx(Opcode, OpName::src0X),
3666 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3667 getNamedOperandIdx(Opcode, OpName::src0Y),
3668 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3669 ImmDeferredIdx,
3670 ImmIdx};
3671 }
3672
3673 return {getNamedOperandIdx(Opcode, OpName::src0),
3674 getNamedOperandIdx(Opcode, OpName::src1),
3675 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3676}
3677
3678bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3679 const MCOperand &MO = Inst.getOperand(OpIdx);
3680 if (MO.isImm())
3681 return !isInlineConstant(Inst, OpIdx);
3682 if (MO.isReg()) {
3683 auto Reg = MO.getReg();
3684 if (!Reg)
3685 return false;
3686 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3687 auto PReg = mc2PseudoReg(Reg);
3688 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3689 }
3690 return true;
3691}
3692
3693// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3694// Writelane is special in that it can use SGPR and M0 (which would normally
3695// count as using the constant bus twice - but in this case it is allowed since
3696// the lane selector doesn't count as a use of the constant bus). However, it is
3697// still required to abide by the 1 SGPR rule.
3698static bool checkWriteLane(const MCInst &Inst) {
3699 const unsigned Opcode = Inst.getOpcode();
3700 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3701 return false;
3702 const MCOperand &LaneSelOp = Inst.getOperand(2);
3703 if (!LaneSelOp.isReg())
3704 return false;
3705 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3706 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3707}
3708
3709bool AMDGPUAsmParser::validateConstantBusLimitations(
3710 const MCInst &Inst, const OperandVector &Operands) {
3711 const unsigned Opcode = Inst.getOpcode();
3712 const MCInstrDesc &Desc = MII.get(Opcode);
3713 unsigned LastSGPR = AMDGPU::NoRegister;
3714 unsigned ConstantBusUseCount = 0;
3715 unsigned NumLiterals = 0;
3716 unsigned LiteralSize;
3717
3718 if (!(Desc.TSFlags &
3721 !isVOPD(Opcode))
3722 return true;
3723
3724 if (checkWriteLane(Inst))
3725 return true;
3726
3727 // Check special imm operands (used by madmk, etc)
3728 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3729 ++NumLiterals;
3730 LiteralSize = 4;
3731 }
3732
3733 SmallDenseSet<unsigned> SGPRsUsed;
3734 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3735 if (SGPRUsed != AMDGPU::NoRegister) {
3736 SGPRsUsed.insert(SGPRUsed);
3737 ++ConstantBusUseCount;
3738 }
3739
3740 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3741
3742 for (int OpIdx : OpIndices) {
3743 if (OpIdx == -1)
3744 continue;
3745
3746 const MCOperand &MO = Inst.getOperand(OpIdx);
3747 if (usesConstantBus(Inst, OpIdx)) {
3748 if (MO.isReg()) {
3749 LastSGPR = mc2PseudoReg(MO.getReg());
3750 // Pairs of registers with a partial intersections like these
3751 // s0, s[0:1]
3752 // flat_scratch_lo, flat_scratch
3753 // flat_scratch_lo, flat_scratch_hi
3754 // are theoretically valid but they are disabled anyway.
3755 // Note that this code mimics SIInstrInfo::verifyInstruction
3756 if (SGPRsUsed.insert(LastSGPR).second) {
3757 ++ConstantBusUseCount;
3758 }
3759 } else { // Expression or a literal
3760
3761 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3762 continue; // special operand like VINTERP attr_chan
3763
3764 // An instruction may use only one literal.
3765 // This has been validated on the previous step.
3766 // See validateVOPLiteral.
3767 // This literal may be used as more than one operand.
3768 // If all these operands are of the same size,
3769 // this literal counts as one scalar value.
3770 // Otherwise it counts as 2 scalar values.
3771 // See "GFX10 Shader Programming", section 3.6.2.3.
3772
3773 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3774 if (Size < 4)
3775 Size = 4;
3776
3777 if (NumLiterals == 0) {
3778 NumLiterals = 1;
3779 LiteralSize = Size;
3780 } else if (LiteralSize != Size) {
3781 NumLiterals = 2;
3782 }
3783 }
3784 }
3785 }
3786 ConstantBusUseCount += NumLiterals;
3787
3788 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3789 return true;
3790
3791 SMLoc LitLoc = getLitLoc(Operands);
3792 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3793 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3794 Error(Loc, "invalid operand (violates constant bus restrictions)");
3795 return false;
3796}
3797
3798bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3799 const MCInst &Inst, const OperandVector &Operands) {
3800
3801 const unsigned Opcode = Inst.getOpcode();
3802 if (!isVOPD(Opcode))
3803 return true;
3804
3805 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3806
3807 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3808 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3809 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3810 ? Opr.getReg()
3812 };
3813
3814 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3815 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3816
3817 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3818 auto InvalidCompOprIdx =
3819 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3820 if (!InvalidCompOprIdx)
3821 return true;
3822
3823 auto CompOprIdx = *InvalidCompOprIdx;
3824 auto ParsedIdx =
3825 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3826 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3827 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3828
3829 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3830 if (CompOprIdx == VOPD::Component::DST) {
3831 Error(Loc, "one dst register must be even and the other odd");
3832 } else {
3833 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3834 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3835 " operands must use different VGPR banks");
3836 }
3837
3838 return false;
3839}
3840
3841bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3842
3843 const unsigned Opc = Inst.getOpcode();
3844 const MCInstrDesc &Desc = MII.get(Opc);
3845
3846 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3847 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3848 assert(ClampIdx != -1);
3849 return Inst.getOperand(ClampIdx).getImm() == 0;
3850 }
3851
3852 return true;
3853}
3854
3857
3858bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3859 const SMLoc &IDLoc) {
3860
3861 const unsigned Opc = Inst.getOpcode();
3862 const MCInstrDesc &Desc = MII.get(Opc);
3863
3864 if ((Desc.TSFlags & MIMGFlags) == 0)
3865 return true;
3866
3867 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3868 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3869 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3870
3871 assert(VDataIdx != -1);
3872
3873 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3874 return true;
3875
3876 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3877 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3878 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3879 if (DMask == 0)
3880 DMask = 1;
3881
3882 bool IsPackedD16 = false;
3883 unsigned DataSize =
3884 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3885 if (hasPackedD16()) {
3886 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3887 IsPackedD16 = D16Idx >= 0;
3888 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3889 DataSize = (DataSize + 1) / 2;
3890 }
3891
3892 if ((VDataSize / 4) == DataSize + TFESize)
3893 return true;
3894
3895 StringRef Modifiers;
3896 if (isGFX90A())
3897 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3898 else
3899 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3900
3901 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3902 return false;
3903}
3904
3905bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3906 const SMLoc &IDLoc) {
3907 const unsigned Opc = Inst.getOpcode();
3908 const MCInstrDesc &Desc = MII.get(Opc);
3909
3910 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3911 return true;
3912
3914
3915 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3917 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3918 int RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
3919 : AMDGPU::OpName::rsrc;
3920 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3921 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3922 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3923
3924 assert(VAddr0Idx != -1);
3925 assert(SrsrcIdx != -1);
3926 assert(SrsrcIdx > VAddr0Idx);
3927
3928 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3929 if (BaseOpcode->BVH) {
3930 if (IsA16 == BaseOpcode->A16)
3931 return true;
3932 Error(IDLoc, "image address size does not match a16");
3933 return false;
3934 }
3935
3936 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3938 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3939 unsigned ActualAddrSize =
3940 IsNSA ? SrsrcIdx - VAddr0Idx
3941 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3942
3943 unsigned ExpectedAddrSize =
3944 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3945
3946 if (IsNSA) {
3947 if (hasPartialNSAEncoding() &&
3948 ExpectedAddrSize >
3950 int VAddrLastIdx = SrsrcIdx - 1;
3951 unsigned VAddrLastSize =
3952 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3953
3954 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3955 }
3956 } else {
3957 if (ExpectedAddrSize > 12)
3958 ExpectedAddrSize = 16;
3959
3960 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3961 // This provides backward compatibility for assembly created
3962 // before 160b/192b/224b types were directly supported.
3963 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3964 return true;
3965 }
3966
3967 if (ActualAddrSize == ExpectedAddrSize)
3968 return true;
3969
3970 Error(IDLoc, "image address size does not match dim and a16");
3971 return false;
3972}
3973
3974bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3975
3976 const unsigned Opc = Inst.getOpcode();
3977 const MCInstrDesc &Desc = MII.get(Opc);
3978
3979 if ((Desc.TSFlags & MIMGFlags) == 0)
3980 return true;
3981 if (!Desc.mayLoad() || !Desc.mayStore())
3982 return true; // Not atomic
3983
3984 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3985 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3986
3987 // This is an incomplete check because image_atomic_cmpswap
3988 // may only use 0x3 and 0xf while other atomic operations
3989 // may use 0x1 and 0x3. However these limitations are
3990 // verified when we check that dmask matches dst size.
3991 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3992}
3993
3994bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3995
3996 const unsigned Opc = Inst.getOpcode();
3997 const MCInstrDesc &Desc = MII.get(Opc);
3998
3999 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4000 return true;
4001
4002 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4003 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4004
4005 // GATHER4 instructions use dmask in a different fashion compared to
4006 // other MIMG instructions. The only useful DMASK values are
4007 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4008 // (red,red,red,red) etc.) The ISA document doesn't mention
4009 // this.
4010 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4011}
4012
4013bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4014 const unsigned Opc = Inst.getOpcode();
4015 const MCInstrDesc &Desc = MII.get(Opc);
4016
4017 if ((Desc.TSFlags & MIMGFlags) == 0)
4018 return true;
4019
4021 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4023
4024 if (!BaseOpcode->MSAA)
4025 return true;
4026
4027 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4028 assert(DimIdx != -1);
4029
4030 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4032
4033 return DimInfo->MSAA;
4034}
4035
4036static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4037{
4038 switch (Opcode) {
4039 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4040 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4041 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4042 return true;
4043 default:
4044 return false;
4045 }
4046}
4047
4048// movrels* opcodes should only allow VGPRS as src0.
4049// This is specified in .td description for vop1/vop3,
4050// but sdwa is handled differently. See isSDWAOperand.
4051bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4052 const OperandVector &Operands) {
4053
4054 const unsigned Opc = Inst.getOpcode();
4055 const MCInstrDesc &Desc = MII.get(Opc);
4056
4057 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4058 return true;
4059
4060 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4061 assert(Src0Idx != -1);
4062
4063 SMLoc ErrLoc;
4064 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4065 if (Src0.isReg()) {
4066 auto Reg = mc2PseudoReg(Src0.getReg());
4067 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4068 if (!isSGPR(Reg, TRI))
4069 return true;
4070 ErrLoc = getRegLoc(Reg, Operands);
4071 } else {
4072 ErrLoc = getConstLoc(Operands);
4073 }
4074
4075 Error(ErrLoc, "source operand must be a VGPR");
4076 return false;
4077}
4078
4079bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4080 const OperandVector &Operands) {
4081
4082 const unsigned Opc = Inst.getOpcode();
4083
4084 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4085 return true;
4086
4087 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4088 assert(Src0Idx != -1);
4089
4090 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4091 if (!Src0.isReg())
4092 return true;
4093
4094 auto Reg = mc2PseudoReg(Src0.getReg());
4095 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4096 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4097 Error(getRegLoc(Reg, Operands),
4098 "source operand must be either a VGPR or an inline constant");
4099 return false;
4100 }
4101
4102 return true;
4103}
4104
4105bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4106 const OperandVector &Operands) {
4107 unsigned Opcode = Inst.getOpcode();
4108 const MCInstrDesc &Desc = MII.get(Opcode);
4109
4110 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4111 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4112 return true;
4113
4114 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4115 if (Src2Idx == -1)
4116 return true;
4117
4118 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4119 Error(getConstLoc(Operands),
4120 "inline constants are not allowed for this operand");
4121 return false;
4122 }
4123
4124 return true;
4125}
4126
4127bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4128 const OperandVector &Operands) {
4129 const unsigned Opc = Inst.getOpcode();
4130 const MCInstrDesc &Desc = MII.get(Opc);
4131
4132 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4133 return true;
4134
4135 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4136 if (Src2Idx == -1)
4137 return true;
4138
4139 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4140 if (!Src2.isReg())
4141 return true;
4142
4143 MCRegister Src2Reg = Src2.getReg();
4144 MCRegister DstReg = Inst.getOperand(0).getReg();
4145 if (Src2Reg == DstReg)
4146 return true;
4147
4148 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4149 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4150 return true;
4151
4152 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4153 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
4154 "source 2 operand must not partially overlap with dst");
4155 return false;
4156 }
4157
4158 return true;
4159}
4160
4161bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4162 switch (Inst.getOpcode()) {
4163 default:
4164 return true;
4165 case V_DIV_SCALE_F32_gfx6_gfx7:
4166 case V_DIV_SCALE_F32_vi:
4167 case V_DIV_SCALE_F32_gfx10:
4168 case V_DIV_SCALE_F64_gfx6_gfx7:
4169 case V_DIV_SCALE_F64_vi:
4170 case V_DIV_SCALE_F64_gfx10:
4171 break;
4172 }
4173
4174 // TODO: Check that src0 = src1 or src2.
4175
4176 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4177 AMDGPU::OpName::src2_modifiers,
4178 AMDGPU::OpName::src2_modifiers}) {
4180 .getImm() &
4182 return false;
4183 }
4184 }
4185
4186 return true;
4187}
4188
4189bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4190
4191 const unsigned Opc = Inst.getOpcode();
4192 const MCInstrDesc &Desc = MII.get(Opc);
4193
4194 if ((Desc.TSFlags & MIMGFlags) == 0)
4195 return true;
4196
4197 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4198 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4199 if (isCI() || isSI())
4200 return false;
4201 }
4202
4203 return true;
4204}
4205
4206static bool IsRevOpcode(const unsigned Opcode)
4207{
4208 switch (Opcode) {
4209 case AMDGPU::V_SUBREV_F32_e32:
4210 case AMDGPU::V_SUBREV_F32_e64:
4211 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4212 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4213 case AMDGPU::V_SUBREV_F32_e32_vi:
4214 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4215 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4216 case AMDGPU::V_SUBREV_F32_e64_vi:
4217
4218 case AMDGPU::V_SUBREV_CO_U32_e32:
4219 case AMDGPU::V_SUBREV_CO_U32_e64:
4220 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4221 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4222
4223 case AMDGPU::V_SUBBREV_U32_e32:
4224 case AMDGPU::V_SUBBREV_U32_e64:
4225 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4226 case AMDGPU::V_SUBBREV_U32_e32_vi:
4227 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4228 case AMDGPU::V_SUBBREV_U32_e64_vi:
4229
4230 case AMDGPU::V_SUBREV_U32_e32:
4231 case AMDGPU::V_SUBREV_U32_e64:
4232 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4233 case AMDGPU::V_SUBREV_U32_e32_vi:
4234 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4235 case AMDGPU::V_SUBREV_U32_e64_vi:
4236
4237 case AMDGPU::V_SUBREV_F16_e32:
4238 case AMDGPU::V_SUBREV_F16_e64:
4239 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4240 case AMDGPU::V_SUBREV_F16_e32_vi:
4241 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4242 case AMDGPU::V_SUBREV_F16_e64_vi:
4243
4244 case AMDGPU::V_SUBREV_U16_e32:
4245 case AMDGPU::V_SUBREV_U16_e64:
4246 case AMDGPU::V_SUBREV_U16_e32_vi:
4247 case AMDGPU::V_SUBREV_U16_e64_vi:
4248
4249 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4250 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4251 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4252
4253 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4254 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4255
4256 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4257 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4258
4259 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4260 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4261
4262 case AMDGPU::V_LSHRREV_B32_e32:
4263 case AMDGPU::V_LSHRREV_B32_e64:
4264 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4265 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4266 case AMDGPU::V_LSHRREV_B32_e32_vi:
4267 case AMDGPU::V_LSHRREV_B32_e64_vi:
4268 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4269 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4270
4271 case AMDGPU::V_ASHRREV_I32_e32:
4272 case AMDGPU::V_ASHRREV_I32_e64:
4273 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4274 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4275 case AMDGPU::V_ASHRREV_I32_e32_vi:
4276 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4277 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4278 case AMDGPU::V_ASHRREV_I32_e64_vi:
4279
4280 case AMDGPU::V_LSHLREV_B32_e32:
4281 case AMDGPU::V_LSHLREV_B32_e64:
4282 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4283 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4284 case AMDGPU::V_LSHLREV_B32_e32_vi:
4285 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4286 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4287 case AMDGPU::V_LSHLREV_B32_e64_vi:
4288
4289 case AMDGPU::V_LSHLREV_B16_e32:
4290 case AMDGPU::V_LSHLREV_B16_e64:
4291 case AMDGPU::V_LSHLREV_B16_e32_vi:
4292 case AMDGPU::V_LSHLREV_B16_e64_vi:
4293 case AMDGPU::V_LSHLREV_B16_gfx10:
4294
4295 case AMDGPU::V_LSHRREV_B16_e32:
4296 case AMDGPU::V_LSHRREV_B16_e64:
4297 case AMDGPU::V_LSHRREV_B16_e32_vi:
4298 case AMDGPU::V_LSHRREV_B16_e64_vi:
4299 case AMDGPU::V_LSHRREV_B16_gfx10:
4300
4301 case AMDGPU::V_ASHRREV_I16_e32:
4302 case AMDGPU::V_ASHRREV_I16_e64:
4303 case AMDGPU::V_ASHRREV_I16_e32_vi:
4304 case AMDGPU::V_ASHRREV_I16_e64_vi:
4305 case AMDGPU::V_ASHRREV_I16_gfx10:
4306
4307 case AMDGPU::V_LSHLREV_B64_e64:
4308 case AMDGPU::V_LSHLREV_B64_gfx10:
4309 case AMDGPU::V_LSHLREV_B64_vi:
4310
4311 case AMDGPU::V_LSHRREV_B64_e64:
4312 case AMDGPU::V_LSHRREV_B64_gfx10:
4313 case AMDGPU::V_LSHRREV_B64_vi:
4314
4315 case AMDGPU::V_ASHRREV_I64_e64:
4316 case AMDGPU::V_ASHRREV_I64_gfx10:
4317 case AMDGPU::V_ASHRREV_I64_vi:
4318
4319 case AMDGPU::V_PK_LSHLREV_B16:
4320 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4321 case AMDGPU::V_PK_LSHLREV_B16_vi:
4322
4323 case AMDGPU::V_PK_LSHRREV_B16:
4324 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4325 case AMDGPU::V_PK_LSHRREV_B16_vi:
4326 case AMDGPU::V_PK_ASHRREV_I16:
4327 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4328 case AMDGPU::V_PK_ASHRREV_I16_vi:
4329 return true;
4330 default:
4331 return false;
4332 }
4333}
4334
4335std::optional<StringRef>
4336AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4337
4338 using namespace SIInstrFlags;
4339 const unsigned Opcode = Inst.getOpcode();
4340 const MCInstrDesc &Desc = MII.get(Opcode);
4341
4342 // lds_direct register is defined so that it can be used
4343 // with 9-bit operands only. Ignore encodings which do not accept these.
4344 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4345 if ((Desc.TSFlags & Enc) == 0)
4346 return std::nullopt;
4347
4348 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4349 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4350 if (SrcIdx == -1)
4351 break;
4352 const auto &Src = Inst.getOperand(SrcIdx);
4353 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4354
4355 if (isGFX90A() || isGFX11Plus())
4356 return StringRef("lds_direct is not supported on this GPU");
4357
4358 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4359 return StringRef("lds_direct cannot be used with this instruction");
4360
4361 if (SrcName != OpName::src0)
4362 return StringRef("lds_direct may be used as src0 only");
4363 }
4364 }
4365
4366 return std::nullopt;
4367}
4368
4369SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4370 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4371 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4372 if (Op.isFlatOffset())
4373 return Op.getStartLoc();
4374 }
4375 return getLoc();
4376}
4377
4378bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4379 const OperandVector &Operands) {
4380 auto Opcode = Inst.getOpcode();
4381 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4382 if (OpNum == -1)
4383 return true;
4384
4385 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4386 if ((TSFlags & SIInstrFlags::FLAT))
4387 return validateFlatOffset(Inst, Operands);
4388
4389 if ((TSFlags & SIInstrFlags::SMRD))
4390 return validateSMEMOffset(Inst, Operands);
4391
4392 const auto &Op = Inst.getOperand(OpNum);
4393 if (isGFX12Plus() &&
4394 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4395 const unsigned OffsetSize = 24;
4396 if (!isIntN(OffsetSize, Op.getImm())) {
4397 Error(getFlatOffsetLoc(Operands),
4398 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4399 return false;
4400 }
4401 } else {
4402 const unsigned OffsetSize = 16;
4403 if (!isUIntN(OffsetSize, Op.getImm())) {
4404 Error(getFlatOffsetLoc(Operands),
4405 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4406 return false;
4407 }
4408 }
4409 return true;
4410}
4411
4412bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4413 const OperandVector &Operands) {
4414 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4415 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4416 return true;
4417
4418 auto Opcode = Inst.getOpcode();
4419 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4420 assert(OpNum != -1);
4421
4422 const auto &Op = Inst.getOperand(OpNum);
4423 if (!hasFlatOffsets() && Op.getImm() != 0) {
4424 Error(getFlatOffsetLoc(Operands),
4425 "flat offset modifier is not supported on this GPU");
4426 return false;
4427 }
4428
4429 // For pre-GFX12 FLAT instructions the offset must be positive;
4430 // MSB is ignored and forced to zero.
4431 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4432 bool AllowNegative =
4434 isGFX12Plus();
4435 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4436 Error(getFlatOffsetLoc(Operands),
4437 Twine("expected a ") +
4438 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4439 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4440 return false;
4441 }
4442
4443 return true;
4444}
4445
4446SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4447 // Start with second operand because SMEM Offset cannot be dst or src0.
4448 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4449 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4450 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4451 return Op.getStartLoc();
4452 }
4453 return getLoc();
4454}
4455
4456bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4457 const OperandVector &Operands) {
4458 if (isCI() || isSI())
4459 return true;
4460
4461 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4462 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4463 return true;
4464
4465 auto Opcode = Inst.getOpcode();
4466 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4467 if (OpNum == -1)
4468 return true;
4469
4470 const auto &Op = Inst.getOperand(OpNum);
4471 if (!Op.isImm())
4472 return true;
4473
4474 uint64_t Offset = Op.getImm();
4475 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4478 return true;
4479
4480 Error(getSMEMOffsetLoc(Operands),
4481 isGFX12Plus() ? "expected a 24-bit signed offset"
4482 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4483 : "expected a 21-bit signed offset");
4484
4485 return false;
4486}
4487
4488bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4489 unsigned Opcode = Inst.getOpcode();
4490 const MCInstrDesc &Desc = MII.get(Opcode);
4491 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4492 return true;
4493
4494 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4495 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4496
4497 const int OpIndices[] = { Src0Idx, Src1Idx };
4498
4499 unsigned NumExprs = 0;
4500 unsigned NumLiterals = 0;
4502
4503 for (int OpIdx : OpIndices) {
4504 if (OpIdx == -1) break;
4505
4506 const MCOperand &MO = Inst.getOperand(OpIdx);
4507 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4508 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4509 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4510 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4511 if (NumLiterals == 0 || LiteralValue != Value) {
4513 ++NumLiterals;
4514 }
4515 } else if (MO.isExpr()) {
4516 ++NumExprs;
4517 }
4518 }
4519 }
4520
4521 return NumLiterals + NumExprs <= 1;
4522}
4523
4524bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4525 const unsigned Opc = Inst.getOpcode();
4526 if (isPermlane16(Opc)) {
4527 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4528 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4529
4530 if (OpSel & ~3)
4531 return false;
4532 }
4533
4534 uint64_t TSFlags = MII.get(Opc).TSFlags;
4535
4536 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4537 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4538 if (OpSelIdx != -1) {
4539 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4540 return false;
4541 }
4542 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4543 if (OpSelHiIdx != -1) {
4544 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4545 return false;
4546 }
4547 }
4548
4549 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4550 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4551 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4552 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4553 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4554 if (OpSel & 3)
4555 return false;
4556 }
4557
4558 return true;
4559}
4560
4561bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
4562 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4563
4564 const unsigned Opc = Inst.getOpcode();
4565 uint64_t TSFlags = MII.get(Opc).TSFlags;
4566
4567 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4568 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4569 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4570 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4571 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4572 !(TSFlags & SIInstrFlags::IsSWMMAC))
4573 return true;
4574
4575 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4576 if (NegIdx == -1)
4577 return true;
4578
4579 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4580
4581 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4582 // on some src operands but not allowed on other.
4583 // It is convenient that such instructions don't have src_modifiers operand
4584 // for src operands that don't allow neg because they also don't allow opsel.
4585
4586 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4587 AMDGPU::OpName::src1_modifiers,
4588 AMDGPU::OpName::src2_modifiers};
4589
4590 for (unsigned i = 0; i < 3; ++i) {
4591 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4592 if (Neg & (1 << i))
4593 return false;
4594 }
4595 }
4596
4597 return true;
4598}
4599
4600bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4601 const OperandVector &Operands) {
4602 const unsigned Opc = Inst.getOpcode();
4603 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4604 if (DppCtrlIdx >= 0) {
4605 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4606
4607 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4608 AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4609 // DP ALU DPP is supported for row_newbcast only on GFX9*
4610 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4611 Error(S, "DP ALU dpp only supports row_newbcast");
4612 return false;
4613 }
4614 }
4615
4616 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4617 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4618
4619 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4620 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4621 if (Src1Idx >= 0) {
4622 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4623 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4624 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
4625 auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());
4626 SMLoc S = getRegLoc(Reg, Operands);
4627 Error(S, "invalid operand for instruction");
4628 return false;
4629 }
4630 if (Src1.isImm()) {
4631 Error(getInstLoc(Operands),
4632 "src1 immediate operand invalid for instruction");
4633 return false;
4634 }
4635 }
4636 }
4637
4638 return true;
4639}
4640
4641// Check if VCC register matches wavefront size
4642bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4643 auto FB = getFeatureBits();
4644 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4645 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4646}
4647
4648// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4649bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4650 const OperandVector &Operands) {
4651 unsigned Opcode = Inst.getOpcode();
4652 const MCInstrDesc &Desc = MII.get(Opcode);
4653 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4654 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4655 !HasMandatoryLiteral && !isVOPD(Opcode))
4656 return true;
4657
4658 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4659
4660 unsigned NumExprs = 0;
4661 unsigned NumLiterals = 0;
4663
4664 for (int OpIdx : OpIndices) {
4665 if (OpIdx == -1)
4666 continue;
4667
4668 const MCOperand &MO = Inst.getOperand(OpIdx);
4669 if (!MO.isImm() && !MO.isExpr())
4670 continue;
4671 if (!isSISrcOperand(Desc, OpIdx))
4672 continue;
4673
4674 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4675 uint64_t Value = static_cast<uint64_t>(MO.getImm());
4676 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4677 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4678 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4679
4680 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4681 Error(getLitLoc(Operands), "invalid operand for instruction");
4682 return false;
4683 }
4684
4685 if (IsFP64 && IsValid32Op)
4686 Value = Hi_32(Value);
4687
4688 if (NumLiterals == 0 || LiteralValue != Value) {
4690 ++NumLiterals;
4691 }
4692 } else if (MO.isExpr()) {
4693 ++NumExprs;
4694 }
4695 }
4696 NumLiterals += NumExprs;
4697
4698 if (!NumLiterals)
4699 return true;
4700
4701 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4702 Error(getLitLoc(Operands), "literal operands are not supported");
4703 return false;
4704 }
4705
4706 if (NumLiterals > 1) {
4707 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4708 return false;
4709 }
4710
4711 return true;
4712}
4713
4714// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4715static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4716 const MCRegisterInfo *MRI) {
4717 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4718 if (OpIdx < 0)
4719 return -1;
4720
4721 const MCOperand &Op = Inst.getOperand(OpIdx);
4722 if (!Op.isReg())
4723 return -1;
4724
4725 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4726 auto Reg = Sub ? Sub : Op.getReg();
4727 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4728 return AGPR32.contains(Reg) ? 1 : 0;
4729}
4730
4731bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4732 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4733 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4735 SIInstrFlags::DS)) == 0)
4736 return true;
4737
4738 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4739 : AMDGPU::OpName::vdata;
4740
4741 const MCRegisterInfo *MRI = getMRI();
4742 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4743 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4744
4745 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4746 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4747 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4748 return false;
4749 }
4750
4751 auto FB = getFeatureBits();
4752 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4753 if (DataAreg < 0 || DstAreg < 0)
4754 return true;
4755 return DstAreg == DataAreg;
4756 }
4757
4758 return DstAreg < 1 && DataAreg < 1;
4759}
4760
4761bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4762 auto FB = getFeatureBits();
4763 if (!FB[AMDGPU::FeatureGFX90AInsts])
4764 return true;
4765
4766 const MCRegisterInfo *MRI = getMRI();
4767 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4768 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4769 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4770 const MCOperand &Op = Inst.getOperand(I);
4771 if (!Op.isReg())
4772 continue;
4773
4774 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4775 if (!Sub)
4776 continue;
4777
4778 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4779 return false;
4780 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4781 return false;
4782 }
4783
4784 return true;
4785}
4786
4787SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4788 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4789 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4790 if (Op.isBLGP())
4791 return Op.getStartLoc();
4792 }
4793 return SMLoc();
4794}
4795
4796bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4797 const OperandVector &Operands) {
4798 unsigned Opc = Inst.getOpcode();
4799 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4800 if (BlgpIdx == -1)
4801 return true;
4802 SMLoc BLGPLoc = getBLGPLoc(Operands);
4803 if (!BLGPLoc.isValid())
4804 return true;
4805 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4806 auto FB = getFeatureBits();
4807 bool UsesNeg = false;
4808 if (FB[AMDGPU::FeatureGFX940Insts]) {
4809 switch (Opc) {
4810 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4811 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4812 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4813 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4814 UsesNeg = true;
4815 }
4816 }
4817
4818 if (IsNeg == UsesNeg)
4819 return true;
4820
4821 Error(BLGPLoc,
4822 UsesNeg ? "invalid modifier: blgp is not supported"
4823 : "invalid modifier: neg is not supported");
4824
4825 return false;
4826}
4827
4828bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4829 const OperandVector &Operands) {
4830 if (!isGFX11Plus())
4831 return true;
4832
4833 unsigned Opc = Inst.getOpcode();
4834 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4835 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4836 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4837 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4838 return true;
4839
4840 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4841 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4842 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4843 if (Reg == AMDGPU::SGPR_NULL)
4844 return true;
4845
4846 SMLoc RegLoc = getRegLoc(Reg, Operands);
4847 Error(RegLoc, "src0 must be null");
4848 return false;
4849}
4850
4851bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4852 const OperandVector &Operands) {
4853 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4854 if ((TSFlags & SIInstrFlags::DS) == 0)
4855 return true;
4856 if (TSFlags & SIInstrFlags::GWS)
4857 return validateGWS(Inst, Operands);
4858 // Only validate GDS for non-GWS instructions.
4859 if (hasGDS())
4860 return true;
4861 int GDSIdx =
4862 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4863 if (GDSIdx < 0)
4864 return true;
4865 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4866 if (GDS) {
4867 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4868 Error(S, "gds modifier is not supported on this GPU");
4869 return false;
4870 }
4871 return true;
4872}
4873
4874// gfx90a has an undocumented limitation:
4875// DS_GWS opcodes must use even aligned registers.
4876bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4877 const OperandVector &Operands) {
4878 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4879 return true;
4880
4881 int Opc = Inst.getOpcode();
4882 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4883 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4884 return true;
4885
4886 const MCRegisterInfo *MRI = getMRI();
4887 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4888 int Data0Pos =
4889 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4890 assert(Data0Pos != -1);
4891 auto Reg = Inst.getOperand(Data0Pos).getReg();
4892 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4893 if (RegIdx & 1) {
4894 SMLoc RegLoc = getRegLoc(Reg, Operands);
4895 Error(RegLoc, "vgpr must be even aligned");
4896 return false;
4897 }
4898
4899 return true;
4900}
4901
4902bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4903 const OperandVector &Operands,
4904 const SMLoc &IDLoc) {
4905 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4906 AMDGPU::OpName::cpol);
4907 if (CPolPos == -1)
4908 return true;
4909
4910 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4911
4912 if (isGFX12Plus())
4913 return validateTHAndScopeBits(Inst, Operands, CPol);
4914
4915 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4916 if (TSFlags & SIInstrFlags::SMRD) {
4917 if (CPol && (isSI() || isCI())) {
4918 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4919 Error(S, "cache policy is not supported for SMRD instructions");
4920 return false;
4921 }
4922 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4923 Error(IDLoc, "invalid cache policy for SMEM instruction");
4924 return false;
4925 }
4926 }
4927
4928 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4929 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4932 if (!(TSFlags & AllowSCCModifier)) {
4933 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4934 StringRef CStr(S.getPointer());
4935 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4936 Error(S,
4937 "scc modifier is not supported for this instruction on this GPU");
4938 return false;
4939 }
4940 }
4941
4943 return true;
4944
4945 if (TSFlags & SIInstrFlags::IsAtomicRet) {
4946 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4947 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4948 : "instruction must use glc");
4949 return false;
4950 }
4951 } else {
4952 if (CPol & CPol::GLC) {
4953 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4954 StringRef CStr(S.getPointer());
4956 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4957 Error(S, isGFX940() ? "instruction must not use sc0"
4958 : "instruction must not use glc");
4959 return false;
4960 }
4961 }
4962
4963 return true;
4964}
4965
4966bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4967 const OperandVector &Operands,
4968 const unsigned CPol) {
4969 const unsigned TH = CPol & AMDGPU::CPol::TH;
4970 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
4971
4972 const unsigned Opcode = Inst.getOpcode();
4973 const MCInstrDesc &TID = MII.get(Opcode);
4974
4975 auto PrintError = [&](StringRef Msg) {
4976 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4977 Error(S, Msg);
4978 return false;
4979 };
4980
4981 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
4984 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
4985
4986 if (TH == 0)
4987 return true;
4988
4989 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
4990 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
4991 (TH == AMDGPU::CPol::TH_NT_HT)))
4992 return PrintError("invalid th value for SMEM instruction");
4993
4994 if (TH == AMDGPU::CPol::TH_BYPASS) {
4995 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
4997 (Scope == AMDGPU::CPol::SCOPE_SYS &&
4999 return PrintError("scope and th combination is not valid");
5000 }
5001
5002 bool IsStore = TID.mayStore();
5003 bool IsAtomic =
5005
5006 if (IsAtomic) {
5007 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5008 return PrintError("invalid th value for atomic instructions");
5009 } else if (IsStore) {
5010 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5011 return PrintError("invalid th value for store instructions");
5012 } else {
5013 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5014 return PrintError("invalid th value for load instructions");
5015 }
5016
5017 return true;
5018}
5019
5020bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
5021 if (!isGFX11Plus())
5022 return true;
5023 for (auto &Operand : Operands) {
5024 if (!Operand->isReg())
5025 continue;
5026 unsigned Reg = Operand->getReg();
5027 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
5028 Error(getRegLoc(Reg, Operands),
5029 "execz and vccz are not supported on this GPU");
5030 return false;
5031 }
5032 }
5033 return true;
5034}
5035
5036bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5037 const OperandVector &Operands) {
5038 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5039 if (Desc.mayStore() &&
5041 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5042 if (Loc != getInstLoc(Operands)) {
5043 Error(Loc, "TFE modifier has no meaning for store instructions");
5044 return false;
5045 }
5046 }
5047
5048 return true;
5049}
5050
5051bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5052 const SMLoc &IDLoc,
5053 const OperandVector &Operands) {
5054 if (auto ErrMsg = validateLdsDirect(Inst)) {
5055 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
5056 return false;
5057 }
5058 if (!validateSOPLiteral(Inst)) {
5059 Error(getLitLoc(Operands),
5060 "only one unique literal operand is allowed");
5061 return false;
5062 }
5063 if (!validateVOPLiteral(Inst, Operands)) {
5064 return false;
5065 }
5066 if (!validateConstantBusLimitations(Inst, Operands)) {
5067 return false;
5068 }
5069 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5070 return false;
5071 }
5072 if (!validateIntClampSupported(Inst)) {
5073 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5074 "integer clamping is not supported on this GPU");
5075 return false;
5076 }
5077 if (!validateOpSel(Inst)) {
5078 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5079 "invalid op_sel operand");
5080 return false;
5081 }
5082 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5083 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5084 "invalid neg_lo operand");
5085 return false;
5086 }
5087 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5088 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5089 "invalid neg_hi operand");
5090 return false;
5091 }
5092 if (!validateDPP(Inst, Operands)) {
5093 return false;
5094 }
5095 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5096 if (!validateMIMGD16(Inst)) {
5097 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5098 "d16 modifier is not supported on this GPU");
5099 return false;
5100 }
5101 if (!validateMIMGMSAA(Inst)) {
5102 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5103 "invalid dim; must be MSAA type");
5104 return false;
5105 }
5106 if (!validateMIMGDataSize(Inst, IDLoc)) {
5107 return false;
5108 }
5109 if (!validateMIMGAddrSize(Inst, IDLoc))
5110 return false;
5111 if (!validateMIMGAtomicDMask(Inst)) {
5112 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5113 "invalid atomic image dmask");
5114 return false;
5115 }
5116 if (!validateMIMGGatherDMask(Inst)) {
5117 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5118 "invalid image_gather dmask: only one bit must be set");
5119 return false;
5120 }
5121 if (!validateMovrels(Inst, Operands)) {
5122 return false;
5123 }
5124 if (!validateOffset(Inst, Operands)) {
5125 return false;
5126 }
5127 if (!validateMAIAccWrite(Inst, Operands)) {
5128 return false;
5129 }
5130 if (!validateMAISrc2(Inst, Operands)) {
5131 return false;
5132 }
5133 if (!validateMFMA(Inst, Operands)) {
5134 return false;
5135 }
5136 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5137 return false;
5138 }
5139
5140 if (!validateAGPRLdSt(Inst)) {
5141 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5142 ? "invalid register class: data and dst should be all VGPR or AGPR"
5143 : "invalid register class: agpr loads and stores not supported on this GPU"
5144 );
5145 return false;
5146 }
5147 if (!validateVGPRAlign(Inst)) {
5148 Error(IDLoc,
5149 "invalid register class: vgpr tuples must be 64 bit aligned");
5150 return false;
5151 }
5152 if (!validateDS(Inst, Operands)) {
5153 return false;
5154 }
5155
5156 if (!validateBLGP(Inst, Operands)) {
5157 return false;
5158 }
5159
5160 if (!validateDivScale(Inst)) {
5161 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5162 return false;
5163 }
5164 if (!validateWaitCnt(Inst, Operands)) {
5165 return false;
5166 }
5167 if (!validateExeczVcczOperands(Operands)) {
5168 return false;
5169 }
5170 if (!validateTFE(Inst, Operands)) {
5171 return false;
5172 }
5173
5174 return true;
5175}
5176
5178 const FeatureBitset &FBS,
5179 unsigned VariantID = 0);
5180
5181static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5182 const FeatureBitset &AvailableFeatures,
5183 unsigned VariantID);
5184
5185bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5186 const FeatureBitset &FBS) {
5187 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5188}
5189
5190bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5191 const FeatureBitset &FBS,
5192 ArrayRef<unsigned> Variants) {
5193 for (auto Variant : Variants) {
5194 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5195 return true;
5196 }
5197
5198 return false;
5199}
5200
5201bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5202 const SMLoc &IDLoc) {
5203 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5204
5205 // Check if requested instruction variant is supported.
5206 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5207 return false;
5208
5209 // This instruction is not supported.
5210 // Clear any other pending errors because they are no longer relevant.
5211 getParser().clearPendingErrors();
5212
5213 // Requested instruction variant is not supported.
5214 // Check if any other variants are supported.
5215 StringRef VariantName = getMatchedVariantName();
5216 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5217 return Error(IDLoc,
5218 Twine(VariantName,
5219 " variant of this instruction is not supported"));
5220 }
5221
5222 // Check if this instruction may be used with a different wavesize.
5223 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5224 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5225
5226 FeatureBitset FeaturesWS32 = getFeatureBits();
5227 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5228 .flip(AMDGPU::FeatureWavefrontSize32);
5229 FeatureBitset AvailableFeaturesWS32 =
5230 ComputeAvailableFeatures(FeaturesWS32);
5231
5232 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5233 return Error(IDLoc, "instruction requires wavesize=32");
5234 }
5235
5236 // Finally check if this instruction is supported on any other GPU.
5237 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5238 return Error(IDLoc, "instruction not supported on this GPU");
5239 }
5240
5241 // Instruction not supported on any GPU. Probably a typo.
5242 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5243 return Error(IDLoc, "invalid instruction" + Suggestion);
5244}
5245
5247 uint64_t InvalidOprIdx) {
5248 assert(InvalidOprIdx < Operands.size());
5249 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5250 if (Op.isToken() && InvalidOprIdx > 1) {
5251 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5252 return PrevOp.isToken() && PrevOp.getToken() == "::";
5253 }
5254 return false;
5255}
5256
5257bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5259 MCStreamer &Out,
5261 bool MatchingInlineAsm) {
5262 MCInst Inst;
5263 unsigned Result = Match_Success;
5264 for (auto Variant : getMatchedVariants()) {
5265 uint64_t EI;
5266 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5267 Variant);
5268 // We order match statuses from least to most specific. We use most specific
5269 // status as resulting
5270 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5271 if (R == Match_Success || R == Match_MissingFeature ||
5272 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5273 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5274 Result != Match_MissingFeature)) {
5275 Result = R;
5276 ErrorInfo = EI;
5277 }
5278 if (R == Match_Success)
5279 break;
5280 }
5281
5282 if (Result == Match_Success) {
5283 if (!validateInstruction(Inst, IDLoc, Operands)) {
5284 return true;
5285 }
5286 Inst.setLoc(IDLoc);
5287 Out.emitInstruction(Inst, getSTI());
5288 return false;
5289 }
5290
5291 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5292 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5293 return true;
5294 }
5295
5296 switch (Result) {
5297 default: break;
5298 case Match_MissingFeature:
5299 // It has been verified that the specified instruction
5300 // mnemonic is valid. A match was found but it requires
5301 // features which are not supported on this GPU.
5302 return Error(IDLoc, "operands are not valid for this GPU or mode");
5303
5304 case Match_InvalidOperand: {
5305 SMLoc ErrorLoc = IDLoc;
5306 if (ErrorInfo != ~0ULL) {
5307 if (ErrorInfo >= Operands.size()) {
5308 return Error(IDLoc, "too few operands for instruction");
5309 }
5310 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5311 if (ErrorLoc == SMLoc())
5312 ErrorLoc = IDLoc;
5313
5315 return Error(ErrorLoc, "invalid VOPDY instruction");
5316 }
5317 return Error(ErrorLoc, "invalid operand for instruction");
5318 }
5319
5320 case Match_MnemonicFail:
5321 llvm_unreachable("Invalid instructions should have been handled already");
5322 }
5323 llvm_unreachable("Implement any new match types added!");
5324}
5325
5326bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5327 int64_t Tmp = -1;
5328 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5329 return true;
5330 }
5331 if (getParser().parseAbsoluteExpression(Tmp)) {
5332 return true;
5333 }
5334 Ret = static_cast<uint32_t>(Tmp);
5335 return false;
5336}
5337
5338bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5339 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5340 return TokError("directive only supported for amdgcn architecture");
5341
5342 std::string TargetIDDirective;
5343 SMLoc TargetStart = getTok().getLoc();
5344 if (getParser().parseEscapedString(TargetIDDirective))
5345 return true;
5346
5347 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5348 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5349 return getParser().Error(TargetRange.Start,
5350 (Twine(".amdgcn_target directive's target id ") +
5351 Twine(TargetIDDirective) +
5352 Twine(" does not match the specified target id ") +
5353 Twine(getTargetStreamer().getTargetID()->toString())).str());
5354
5355 return false;
5356}
5357
5358bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5359 return Error(Range.Start, "value out of range", Range);
5360}
5361
5362bool AMDGPUAsmParser::calculateGPRBlocks(
5363 const FeatureBitset &Features, const MCExpr *VCCUsed,
5364 const MCExpr *FlatScrUsed, bool XNACKUsed,
5365 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5366 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5367 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5368 // TODO(scott.linder): These calculations are duplicated from
5369 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5370 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5371 MCContext &Ctx = getContext();
5372
5373 const MCExpr *NumSGPRs = NextFreeSGPR;
5374 int64_t EvaluatedSGPRs;
5375
5376 if (Version.Major >= 10)
5378 else {
5379 unsigned MaxAddressableNumSGPRs =
5381
5382 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5383 !Features.test(FeatureSGPRInitBug) &&
5384 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5385 return OutOfRangeError(SGPRRange);
5386
5387 const MCExpr *ExtraSGPRs =
5388 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5389 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5390
5391 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5392 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5393 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5394 return OutOfRangeError(SGPRRange);
5395
5396 if (Features.test(FeatureSGPRInitBug))
5397 NumSGPRs =
5399 }
5400
5401 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5402 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5403 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5404 unsigned Granule) -> const MCExpr * {
5405 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5406 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5407 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5408 const MCExpr *AlignToGPR =
5409 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5410 const MCExpr *DivGPR =
5411 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5412 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5413 return SubGPR;
5414 };
5415
5416 VGPRBlocks = GetNumGPRBlocks(
5417 NextFreeVGPR,
5418 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5419 SGPRBlocks =
5420 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5421
5422 return false;
5423}
5424
5425bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5426 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5427 return TokError("directive only supported for amdgcn architecture");
5428
5429 if (!isHsaAbi(getSTI()))
5430 return TokError("directive only supported for amdhsa OS");
5431
5432 StringRef KernelName;
5433 if (getParser().parseIdentifier(KernelName))
5434 return true;
5435
5438 &getSTI(), getContext());
5439
5440 StringSet<> Seen;
5441
5442 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5443
5444 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5445 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5446
5447 SMRange VGPRRange;
5448 const MCExpr *NextFreeVGPR = ZeroExpr;
5449 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5450 uint64_t SharedVGPRCount = 0;
5451 uint64_t PreloadLength = 0;
5452 uint64_t PreloadOffset = 0;
5453 SMRange SGPRRange;
5454 const MCExpr *NextFreeSGPR = ZeroExpr;
5455
5456 // Count the number of user SGPRs implied from the enabled feature bits.
5457 unsigned ImpliedUserSGPRCount = 0;
5458
5459 // Track if the asm explicitly contains the directive for the user SGPR
5460 // count.
5461 std::optional<unsigned> ExplicitUserSGPRCount;
5462 const MCExpr *ReserveVCC = OneExpr;
5463 const MCExpr *ReserveFlatScr = OneExpr;
5464 std::optional<bool> EnableWavefrontSize32;
5465
5466 while (true) {
5467 while (trySkipToken(AsmToken::EndOfStatement));
5468
5469 StringRef ID;
5470 SMRange IDRange = getTok().getLocRange();
5471 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5472 return true;
5473
5474 if (ID == ".end_amdhsa_kernel")
5475 break;
5476
5477 if (!Seen.insert(ID).second)
5478 return TokError(".amdhsa_ directives cannot be repeated");
5479
5480 SMLoc ValStart = getLoc();
5481 const MCExpr *ExprVal;
5482 if (getParser().parseExpression(ExprVal))
5483 return true;
5484 SMLoc ValEnd = getLoc();
5485 SMRange ValRange = SMRange(ValStart, ValEnd);
5486
5487 int64_t IVal = 0;
5488 uint64_t Val = IVal;
5489 bool EvaluatableExpr;
5490 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5491 if (IVal < 0)
5492 return OutOfRangeError(ValRange);
5493 Val = IVal;
5494 }
5495
5496#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5497 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5498 return OutOfRangeError(RANGE); \
5499 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5500 getContext());
5501
5502// Some fields use the parsed value immediately which requires the expression to
5503// be solvable.
5504#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5505 if (!(RESOLVED)) \
5506 return Error(IDRange.Start, "directive should have resolvable expression", \
5507 IDRange);
5508
5509 if (ID == ".amdhsa_group_segment_fixed_size") {
5511 CHAR_BIT>(Val))
5512 return OutOfRangeError(ValRange);
5513 KD.group_segment_fixed_size = ExprVal;
5514 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5516 CHAR_BIT>(Val))
5517 return OutOfRangeError(ValRange);
5518 KD.private_segment_fixed_size = ExprVal;
5519 } else if (ID == ".amdhsa_kernarg_size") {
5520 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
5521 return OutOfRangeError(ValRange);
5522 KD.kernarg_size = ExprVal;
5523 } else if (ID == ".amdhsa_user_sgpr_count") {
5524 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5525 ExplicitUserSGPRCount = Val;
5526 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5527 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5529 return Error(IDRange.Start,
5530 "directive is not supported with architected flat scratch",
5531 IDRange);
5533 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5534 ExprVal, ValRange);
5535 if (Val)
5536 ImpliedUserSGPRCount += 4;
5537 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5538 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5539 if (!hasKernargPreload())
5540 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5541
5542 if (Val > getMaxNumUserSGPRs())
5543 return OutOfRangeError(ValRange);
5544 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
5545 ValRange);
5546 if (Val) {
5547 ImpliedUserSGPRCount += Val;
5548 PreloadLength = Val;
5549 }
5550 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5551 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5552 if (!hasKernargPreload())
5553 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5554
5555 if (Val >= 1024)
5556 return OutOfRangeError(ValRange);
5557 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
5558 ValRange);
5559 if (Val)
5560 PreloadOffset = Val;
5561 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5562 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5564 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5565 ValRange);
5566 if (Val)
5567 ImpliedUserSGPRCount += 2;
5568 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5569 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5571 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5572 ValRange);
5573 if (Val)
5574 ImpliedUserSGPRCount += 2;
5575 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5576 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5578 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5579 ExprVal, ValRange);
5580 if (Val)
5581 ImpliedUserSGPRCount += 2;
5582 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5583 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5585 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5586 ValRange);
5587 if (Val)
5588 ImpliedUserSGPRCount += 2;
5589 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5591 return Error(IDRange.Start,
5592 "directive is not supported with architected flat scratch",
5593 IDRange);
5594 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5596 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5597 ExprVal, ValRange);
5598 if (Val)
5599 ImpliedUserSGPRCount += 2;
5600 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5601 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5603 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5604 ExprVal, ValRange);
5605 if (Val)
5606 ImpliedUserSGPRCount += 1;
5607 } else if (ID == ".amdhsa_wavefront_size32") {
5608 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5609 if (IVersion.Major < 10)
5610 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5611 EnableWavefrontSize32 = Val;
5613 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5614 ValRange);
5615 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5617 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5618 ValRange);
5619 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5621 return Error(IDRange.Start,
5622 "directive is not supported with architected flat scratch",
5623 IDRange);
5625 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5626 ValRange);
5627 } else if (ID == ".amdhsa_enable_private_segment") {
5629 return Error(
5630 IDRange.Start,
5631 "directive is not supported without architected flat scratch",
5632 IDRange);
5634 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5635 ValRange);
5636 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5638 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5639 ValRange);
5640 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5642 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5643 ValRange);
5644 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5646 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5647 ValRange);
5648 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5650 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5651 ValRange);
5652 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5654 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5655 ValRange);
5656 } else if (ID == ".amdhsa_next_free_vgpr") {
5657 VGPRRange = ValRange;
5658 NextFreeVGPR = ExprVal;
5659 } else if (ID == ".amdhsa_next_free_sgpr") {
5660 SGPRRange = ValRange;
5661 NextFreeSGPR = ExprVal;
5662 } else if (ID == ".amdhsa_accum_offset") {
5663 if (!isGFX90A())
5664 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5665 AccumOffset = ExprVal;
5666 } else if (ID == ".amdhsa_reserve_vcc") {
5667 if (EvaluatableExpr && !isUInt<1>(Val))
5668 return OutOfRangeError(ValRange);
5669 ReserveVCC = ExprVal;
5670 } else if (ID == ".amdhsa_reserve_flat_scratch") {
5671 if (IVersion.Major < 7)
5672 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5674 return Error(IDRange.Start,
5675 "directive is not supported with architected flat scratch",
5676 IDRange);
5677 if (EvaluatableExpr && !isUInt<1>(Val))
5678 return OutOfRangeError(ValRange);
5679 ReserveFlatScr = ExprVal;
5680 } else if (ID == ".amdhsa_reserve_xnack_mask") {
5681 if (IVersion.Major < 8)
5682 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5683 if (!isUInt<1>(Val))
5684 return OutOfRangeError(ValRange);
5685 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5686 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5687 IDRange);
5688 } else if (ID == ".amdhsa_float_round_mode_32") {
5690 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5691 ValRange);
5692 } else if (ID == ".amdhsa_float_round_mode_16_64") {
5694 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5695 ValRange);
5696 } else if (ID == ".amdhsa_float_denorm_mode_32") {
5698 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5699 ValRange);
5700 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5702 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5703 ValRange);
5704 } else if (ID == ".amdhsa_dx10_clamp") {
5705 if (IVersion.Major >= 12)
5706 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5708 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5709 ValRange);
5710 } else if (ID == ".amdhsa_ieee_mode") {
5711 if (IVersion.Major >= 12)
5712 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5714 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5715 ValRange);
5716 } else if (ID == ".amdhsa_fp16_overflow") {
5717 if (IVersion.Major < 9)
5718 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5720 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5721 ValRange);
5722 } else if (ID == ".amdhsa_tg_split") {
5723 if (!isGFX90A())
5724 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5725 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
5726 ExprVal, ValRange);
5727 } else if (ID == ".amdhsa_workgroup_processor_mode") {
5728 if (IVersion.Major < 10)
5729 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5731 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5732 ValRange);
5733 } else if (ID == ".amdhsa_memory_ordered") {
5734 if (IVersion.Major < 10)
5735 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5737 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5738 ValRange);
5739 } else if (ID == ".amdhsa_forward_progress") {
5740 if (IVersion.Major < 10)
5741 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5743 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5744 ValRange);
5745 } else if (ID == ".amdhsa_shared_vgpr_count") {
5746 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5747 if (IVersion.Major < 10 || IVersion.Major >= 12)
5748 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
5749 IDRange);
5750 SharedVGPRCount = Val;
5752 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5753 ValRange);
5754 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5757 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5758 ExprVal, ValRange);
5759 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5761 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5762 ExprVal, ValRange);
5763 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5766 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5767 ExprVal, ValRange);
5768 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5770 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5771 ExprVal, ValRange);
5772 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5774 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5775 ExprVal, ValRange);
5776 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5778 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5779 ExprVal, ValRange);
5780 } else if (ID == ".amdhsa_exception_int_div_zero") {
5782 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5783 ExprVal, ValRange);
5784 } else if (ID == ".amdhsa_round_robin_scheduling") {
5785 if (IVersion.Major < 12)
5786 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
5788 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5789 ValRange);
5790 } else {
5791 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5792 }
5793
5794#undef PARSE_BITS_ENTRY
5795 }
5796
5797 if (!Seen.contains(".amdhsa_next_free_vgpr"))
5798 return TokError(".amdhsa_next_free_vgpr directive is required");
5799
5800 if (!Seen.contains(".amdhsa_next_free_sgpr"))
5801 return TokError(".amdhsa_next_free_sgpr directive is required");
5802
5803 const MCExpr *VGPRBlocks;
5804 const MCExpr *SGPRBlocks;
5805 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5806 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5807 EnableWavefrontSize32, NextFreeVGPR,
5808 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5809 SGPRBlocks))
5810 return true;
5811
5812 int64_t EvaluatedVGPRBlocks;
5813 bool VGPRBlocksEvaluatable =
5814 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
5815 if (VGPRBlocksEvaluatable &&
5816 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5817 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
5818 return OutOfRangeError(VGPRRange);
5819 }
5821 KD.compute_pgm_rsrc1, VGPRBlocks,
5822 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5823 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
5824
5825 int64_t EvaluatedSGPRBlocks;
5826 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
5827 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5828 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
5829 return OutOfRangeError(SGPRRange);
5831 KD.compute_pgm_rsrc1, SGPRBlocks,
5832 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5833 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
5834
5835 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5836 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5837 "enabled user SGPRs");
5838
5839 unsigned UserSGPRCount =
5840 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5841
5842 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5843 return TokError("too many user SGPRs enabled");
5845 KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
5846 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5847 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
5848
5849 int64_t IVal = 0;
5850 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
5851 return TokError("Kernarg size should be resolvable");
5852 uint64_t kernarg_size = IVal;
5853 if (PreloadLength && kernarg_size &&
5854 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5855 return TokError("Kernarg preload length + offset is larger than the "
5856 "kernarg segment size");
5857
5858 if (isGFX90A()) {
5859 if (!Seen.contains(".amdhsa_accum_offset"))
5860 return TokError(".amdhsa_accum_offset directive is required");
5861 int64_t EvaluatedAccum;
5862 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
5863 uint64_t UEvaluatedAccum = EvaluatedAccum;
5864 if (AccumEvaluatable &&
5865 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
5866 return TokError("accum_offset should be in range [4..256] in "
5867 "increments of 4");
5868
5869 int64_t EvaluatedNumVGPR;
5870 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
5871 AccumEvaluatable &&
5872 UEvaluatedAccum >
5873 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
5874 return TokError("accum_offset exceeds total VGPR allocation");
5875 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
5877 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
5878 MCConstantExpr::create(1, getContext()), getContext());
5880 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5881 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5882 getContext());
5883 }
5884
5885 if (IVersion.Major >= 10 && IVersion.Major < 12) {
5886 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5887 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5888 return TokError("shared_vgpr_count directive not valid on "
5889 "wavefront size 32");
5890 }
5891
5892 if (VGPRBlocksEvaluatable &&
5893 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
5894 63)) {
5895 return TokError("shared_vgpr_count*2 + "
5896 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5897 "exceed 63\n");
5898 }
5899 }
5900
5901 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5902 NextFreeVGPR, NextFreeSGPR,
5903 ReserveVCC, ReserveFlatScr);
5904 return false;
5905}
5906
5907bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5909 if (ParseAsAbsoluteExpression(Version))
5910 return true;
5911
5912 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5913 return false;
5914}
5915
5916bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5918 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5919 // assembly for backwards compatibility.
5920 if (ID == "max_scratch_backing_memory_byte_size") {
5921 Parser.eatToEndOfStatement();
5922 return false;
5923 }
5924
5925 SmallString<40> ErrStr;
5926 raw_svector_ostream Err(ErrStr);
5927 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
5928 return TokError(Err.str());
5929 }
5930 Lex();
5931
5932 if (ID == "enable_wavefront_size32") {
5933 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5934 if (!isGFX10Plus())
5935 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5936 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5937 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5938 } else {
5939 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5940 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5941 }
5942 }
5943
5944 if (ID == "wavefront_size") {
5945 if (C.wavefront_size == 5) {
5946 if (!isGFX10Plus())
5947 return TokError("wavefront_size=5 is only allowed on GFX10+");
5948 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5949 return TokError("wavefront_size=5 requires +WavefrontSize32");
5950 } else if (C.wavefront_size == 6) {
5951 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5952 return TokError("wavefront_size=6 requires +WavefrontSize64");
5953 }
5954 }
5955
5956 return false;
5957}
5958
5959bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5960 AMDGPUMCKernelCodeT KernelCode;
5961 KernelCode.initDefault(&getSTI(), getContext());
5962
5963 while (true) {
5964 // Lex EndOfStatement. This is in a while loop, because lexing a comment
5965 // will set the current token to EndOfStatement.
5966 while(trySkipToken(AsmToken::EndOfStatement));
5967
5968 StringRef ID;
5969 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5970 return true;
5971
5972 if (ID == ".end_amd_kernel_code_t")
5973 break;
5974
5975 if (ParseAMDKernelCodeTValue(ID, KernelCode))
5976 return true;
5977 }
5978
5979 KernelCode.validate(&getSTI(), getContext());
5980 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
5981
5982 return false;
5983}
5984
5985bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5986 StringRef KernelName;
5987 if (!parseId(KernelName, "expected symbol name"))
5988 return true;
5989
5990 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5992
5993 KernelScope.initialize(getContext());
5994 return false;
5995}
5996
5997bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5998 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5999 return Error(getLoc(),
6000 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6001 "architectures");
6002 }
6003
6004 auto TargetIDDirective = getLexer().getTok().getStringContents();
6005 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6006 return Error(getParser().getTok().getLoc(), "target id must match options");
6007
6008 getTargetStreamer().EmitISAVersion();
6009 Lex();
6010
6011 return false;
6012}
6013
6014bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6015 assert(isHsaAbi(getSTI()));
6016
6017 std::string HSAMetadataString;
6018 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6019 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6020 return true;
6021
6022 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6023 return Error(getLoc(), "invalid HSA metadata");
6024
6025 return false;
6026}
6027
6028/// Common code to parse out a block of text (typically YAML) between start and
6029/// end directives.
6030bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6031 const char *AssemblerDirectiveEnd,
6032 std::string &CollectString) {
6033
6034 raw_string_ostream CollectStream(CollectString);
6035
6036 getLexer().setSkipSpace(false);
6037
6038 bool FoundEnd = false;
6039 while (!isToken(AsmToken::Eof)) {
6040 while (isToken(AsmToken::Space)) {
6041 CollectStream << getTokenStr();
6042 Lex();
6043 }
6044
6045 if (trySkipId(AssemblerDirectiveEnd)) {
6046 FoundEnd = true;
6047 break;
6048 }
6049
6050 CollectStream << Parser.parseStringToEndOfStatement()
6051 << getContext().getAsmInfo()->getSeparatorString();
6052
6053 Parser.eatToEndOfStatement();
6054 }
6055
6056 getLexer().setSkipSpace(true);
6057
6058 if (isToken(AsmToken::Eof) && !FoundEnd) {
6059 return TokError(Twine("expected directive ") +
6060 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6061 }
6062
6063 CollectStream.flush();
6064 return false;
6065}
6066
6067/// Parse the assembler directive for new MsgPack-format PAL metadata.
6068bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6069 std::string String;
6070 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6072 return true;
6073
6074 auto PALMetadata = getTargetStreamer().getPALMetadata();
6075 if (!PALMetadata->setFromString(String))
6076 return Error(getLoc(), "invalid PAL metadata");
6077 return false;
6078}
6079
6080/// Parse the assembler directive for old linear-format PAL metadata.
6081bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6082 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6083 return Error(getLoc(),
6084 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6085 "not available on non-amdpal OSes")).str());
6086 }
6087
6088 auto PALMetadata = getTargetStreamer().getPALMetadata();
6089 PALMetadata->setLegacy();
6090 for (;;) {
6092 if (ParseAsAbsoluteExpression(Key)) {
6093 return TokError(Twine("invalid value in ") +
6095 }
6096 if (!trySkipToken(AsmToken::Comma)) {
6097 return TokError(Twine("expected an even number of values in ") +
6099 }
6100 if (ParseAsAbsoluteExpression(Value)) {
6101 return TokError(Twine("invalid value in ") +
6103 }
6104 PALMetadata->setRegister(Key, Value);
6105 if (!trySkipToken(AsmToken::Comma))
6106 break;
6107 }
6108 return false;
6109}
6110
6111/// ParseDirectiveAMDGPULDS
6112/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6113bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6114 if (getParser().checkForValidSection())
6115 return true;
6116
6118 SMLoc NameLoc = getLoc();
6119 if (getParser().parseIdentifier(Name))
6120 return TokError("expected identifier in directive");
6121
6122 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6123 if (getParser().parseComma())
6124 return true;
6125
6126 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6127
6128 int64_t Size;
6129 SMLoc SizeLoc = getLoc();
6130 if (getParser().parseAbsoluteExpression(Size))
6131 return true;
6132 if (Size < 0)
6133 return Error(SizeLoc, "size must be non-negative");
6134 if (Size > LocalMemorySize)
6135 return Error(SizeLoc, "size is too large");
6136
6137 int64_t Alignment = 4;
6138 if (trySkipToken(AsmToken::Comma)) {
6139 SMLoc AlignLoc = getLoc();
6140 if (getParser().parseAbsoluteExpression(Alignment))
6141 return true;
6142 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6143 return Error(AlignLoc, "alignment must be a power of two");
6144
6145 // Alignment larger than the size of LDS is possible in theory, as long
6146 // as the linker manages to place to symbol at address 0, but we do want
6147 // to make sure the alignment fits nicely into a 32-bit integer.
6148 if (Alignment >= 1u << 31)
6149 return Error(AlignLoc, "alignment is too large");
6150 }
6151
6152 if (parseEOL())
6153 return true;
6154
6155 Symbol->redefineIfPossible();
6156 if (!Symbol->isUndefined())
6157 return Error(NameLoc, "invalid symbol redefinition");
6158
6159 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6160 return false;
6161}
6162
6163bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6164 StringRef IDVal = DirectiveID.getString();
6165
6166 if (isHsaAbi(getSTI())) {
6167 if (IDVal == ".amdhsa_kernel")
6168 return ParseDirectiveAMDHSAKernel();
6169
6170 if (IDVal == ".amdhsa_code_object_version")
6171 return ParseDirectiveAMDHSACodeObjectVersion();
6172
6173 // TODO: Restructure/combine with PAL metadata directive.
6175 return ParseDirectiveHSAMetadata();
6176 } else {
6177 if (IDVal == ".amd_kernel_code_t")
6178 return ParseDirectiveAMDKernelCodeT();
6179
6180 if (IDVal == ".amdgpu_hsa_kernel")
6181 return ParseDirectiveAMDGPUHsaKernel();
6182
6183 if (IDVal == ".amd_amdgpu_isa")
6184 return ParseDirectiveISAVersion();
6185
6187 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6188 Twine(" directive is "
6189 "not available on non-amdhsa OSes"))
6190 .str());
6191 }
6192 }
6193
6194 if (IDVal == ".amdgcn_target")
6195 return ParseDirectiveAMDGCNTarget();
6196
6197 if (IDVal == ".amdgpu_lds")
6198 return ParseDirectiveAMDGPULDS();
6199
6200 if (IDVal == PALMD::AssemblerDirectiveBegin)
6201 return ParseDirectivePALMetadataBegin();
6202
6203 if (IDVal == PALMD::AssemblerDirective)
6204 return ParseDirectivePALMetadata();
6205
6206 return true;
6207}
6208
6209bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6210 unsigned RegNo) {
6211
6212 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
6213 return isGFX9Plus();
6214
6215 // GFX10+ has 2 more SGPRs 104 and 105.
6216 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
6217 return hasSGPR104_SGPR105();
6218
6219 switch (RegNo) {
6220 case AMDGPU::SRC_SHARED_BASE_LO:
6221 case AMDGPU::SRC_SHARED_BASE:
6222 case AMDGPU::SRC_SHARED_LIMIT_LO:
6223 case AMDGPU::SRC_SHARED_LIMIT:
6224 case AMDGPU::SRC_PRIVATE_BASE_LO:
6225 case AMDGPU::SRC_PRIVATE_BASE:
6226 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
6227 case AMDGPU::SRC_PRIVATE_LIMIT:
6228 return isGFX9Plus();
6229 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
6230 return isGFX9Plus() && !isGFX11Plus();
6231 case AMDGPU::TBA:
6232 case AMDGPU::TBA_LO:
6233 case AMDGPU::TBA_HI:
6234 case AMDGPU::TMA:
6235 case AMDGPU::TMA_LO:
6236 case AMDGPU::TMA_HI:
6237 return !isGFX9Plus();
6238 case AMDGPU::XNACK_MASK:
6239 case AMDGPU::XNACK_MASK_LO:
6240 case AMDGPU::XNACK_MASK_HI:
6241 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6242 case AMDGPU::SGPR_NULL:
6243 return isGFX10Plus();
6244 default:
6245 break;
6246 }
6247
6248 if (isCI())
6249 return true;
6250
6251 if (isSI() || isGFX10Plus()) {
6252 // No flat_scr on SI.
6253 // On GFX10Plus flat scratch is not a valid register operand and can only be
6254 // accessed with s_setreg/s_getreg.
6255 switch (RegNo) {
6256 case AMDGPU::FLAT_SCR:
6257 case AMDGPU::FLAT_SCR_LO:
6258 case AMDGPU::FLAT_SCR_HI:
6259 return false;
6260 default:
6261 return true;
6262 }
6263 }
6264
6265 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6266 // SI/CI have.
6267 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
6268 return hasSGPR102_SGPR103();
6269
6270 return true;
6271}
6272
6273ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6274 StringRef Mnemonic,
6275 OperandMode Mode) {
6276 ParseStatus Res = parseVOPD(Operands);
6277 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6278 return Res;
6279
6280 // Try to parse with a custom parser
6281 Res = MatchOperandParserImpl(Operands, Mnemonic);
6282
6283 // If we successfully parsed the operand or if there as an error parsing,
6284 // we are done.
6285 //
6286 // If we are parsing after we reach EndOfStatement then this means we
6287 // are appending default values to the Operands list. This is only done
6288 // by custom parser, so we shouldn't continue on to the generic parsing.
6289 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6290 return Res;
6291
6292 SMLoc RBraceLoc;
6293 SMLoc LBraceLoc = getLoc();
6294 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6295 unsigned Prefix = Operands.size();
6296
6297 for (;;) {
6298 auto Loc = getLoc();
6299 Res = parseReg(Operands);
6300 if (Res.isNoMatch())
6301 Error(Loc, "expected a register");
6302 if (!Res.isSuccess())
6303 return ParseStatus::Failure;
6304
6305 RBraceLoc = getLoc();
6306 if (trySkipToken(AsmToken::RBrac))
6307 break;
6308
6309 if (!skipToken(AsmToken::Comma,
6310 "expected a comma or a closing square bracket"))
6311 return ParseStatus::Failure;
6312 }
6313
6314 if (Operands.size() - Prefix > 1) {
6315 Operands.insert(Operands.begin() + Prefix,
6316 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6317 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6318 }
6319
6320 return ParseStatus::Success;
6321 }
6322
6323 return parseRegOrImm(Operands);
6324}
6325
6326StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6327 // Clear any forced encodings from the previous instruction.
6328 setForcedEncodingSize(0);
6329 setForcedDPP(false);
6330 setForcedSDWA(false);
6331
6332 if (Name.ends_with("_e64_dpp")) {
6333 setForcedDPP(true);
6334 setForcedEncodingSize(64);
6335 return Name.substr(0, Name.size() - 8);
6336 }
6337 if (Name.ends_with("_e64")) {
6338 setForcedEncodingSize(64);
6339 return Name.substr(0, Name.size() - 4);
6340 }
6341 if (Name.ends_with("_e32")) {
6342 setForcedEncodingSize(32);
6343 return Name.substr(0, Name.size() - 4);
6344 }
6345 if (Name.ends_with("_dpp")) {
6346 setForcedDPP(true);
6347 return Name.substr(0, Name.size() - 4);
6348 }
6349 if (Name.ends_with("_sdwa")) {
6350 setForcedSDWA(true);
6351 return Name.substr(0, Name.size() - 5);
6352 }
6353 return Name;
6354}
6355
6356static void applyMnemonicAliases(StringRef &Mnemonic,
6357 const FeatureBitset &Features,
6358 unsigned VariantID);
6359
6360bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
6362 SMLoc NameLoc, OperandVector &Operands) {
6363 // Add the instruction mnemonic
6364 Name = parseMnemonicSuffix(Name);
6365
6366 // If the target architecture uses MnemonicAlias, call it here to parse
6367 // operands correctly.
6368 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6369
6370 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6371
6372 bool IsMIMG = Name.starts_with("image_");
6373
6374 while (!trySkipToken(AsmToken::EndOfStatement)) {
6375 OperandMode Mode = OperandMode_Default;
6376 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6377 Mode = OperandMode_NSA;
6378 ParseStatus Res = parseOperand(Operands, Name, Mode);
6379
6380 if (!Res.isSuccess()) {
6381 checkUnsupportedInstruction(Name, NameLoc);
6382 if (!Parser.hasPendingError()) {
6383 // FIXME: use real operand location rather than the current location.
6384 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6385 : "not a valid operand.";
6386 Error(getLoc(), Msg);
6387 }
6388 while (!trySkipToken(AsmToken::EndOfStatement)) {
6389 lex();
6390 }
6391 return true;
6392 }
6393
6394 // Eat the comma or space if there is one.
6395 trySkipToken(AsmToken::Comma);
6396 }
6397
6398 return false;
6399}
6400
6401//===----------------------------------------------------------------------===//
6402// Utility functions
6403//===----------------------------------------------------------------------===//
6404
6405ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6407 SMLoc S = getLoc();
6408 if (!trySkipId(Name))
6409 return ParseStatus::NoMatch;
6410
6411 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6412 return ParseStatus::Success;
6413}
6414
6415ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6416 int64_t &IntVal) {
6417
6418 if (!trySkipId(Prefix, AsmToken::Colon))
6419 return ParseStatus::NoMatch;
6420
6422}
6423
6424ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6425 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6426 std::function<bool(int64_t &)> ConvertResult) {
6427 SMLoc S = getLoc();
6428 int64_t Value = 0;
6429
6430 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6431 if (!Res.isSuccess())
6432 return Res;
6433
6434 if (ConvertResult && !ConvertResult(Value)) {
6435 Error(S, "invalid " + StringRef(Prefix) + " value.");
6436 }
6437
6438 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6439 return ParseStatus::Success;
6440}
6441
6442ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6443 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6444 bool (*ConvertResult)(int64_t &)) {
6445 SMLoc S = getLoc();
6446 if (!trySkipId(Prefix, AsmToken::Colon))
6447 return ParseStatus::NoMatch;
6448
6449 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6450 return ParseStatus::Failure;
6451
6452 unsigned Val = 0;
6453 const unsigned MaxSize = 4;
6454
6455 // FIXME: How to verify the number of elements matches the number of src
6456 // operands?
6457 for (int I = 0; ; ++I) {
6458 int64_t Op;
6459 SMLoc Loc = getLoc();
6460 if (!parseExpr(Op))
6461 return ParseStatus::Failure;
6462
6463 if (Op != 0 && Op != 1)
6464 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6465
6466 Val |= (Op << I);
6467
6468 if (trySkipToken(AsmToken::RBrac))
6469 break;
6470
6471 if (I + 1 == MaxSize)
6472 return Error(getLoc(), "expected a closing square bracket");
6473
6474 if (!skipToken(AsmToken::Comma, "expected a comma"))
6475 return ParseStatus::Failure;
6476 }
6477
6478 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6479 return ParseStatus::Success;
6480}
6481
6482ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6484 AMDGPUOperand::ImmTy ImmTy) {
6485 int64_t Bit;
6486 SMLoc S = getLoc();
6487
6488 if (trySkipId(Name)) {
6489 Bit = 1;
6490 } else if (trySkipId("no", Name)) {
6491 Bit = 0;
6492 } else {
6493 return ParseStatus::NoMatch;
6494 }
6495
6496 if (Name == "r128" && !hasMIMG_R128())
6497 return Error(S, "r128 modifier is not supported on this GPU");
6498 if (Name == "a16" && !hasA16())
6499 return Error(S, "a16 modifier is not supported on this GPU");
6500
6501 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6502 ImmTy = AMDGPUOperand::ImmTyR128A16;
6503
6504 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6505 return ParseStatus::Success;
6506}
6507
6508unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6509 bool &Disabling) const {
6510 Disabling = Id.consume_front("no");
6511
6512 if (isGFX940() && !Mnemo.starts_with("s_")) {
6513 return StringSwitch<unsigned>(Id)
6514 .Case("nt", AMDGPU::CPol::NT)
6515 .Case("sc0", AMDGPU::CPol::SC0)
6516 .Case("sc1", AMDGPU::CPol::SC1)
6517 .Default(0);
6518 }
6519
6520 return StringSwitch<unsigned>(Id)
6521 .Case("dlc", AMDGPU::CPol::DLC)
6522 .Case("glc", AMDGPU::CPol::GLC)
6523 .Case("scc", AMDGPU::CPol::SCC)
6524 .Case("slc", AMDGPU::CPol::SLC)
6525 .Default(0);
6526}
6527
6528ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6529 if (isGFX12Plus()) {
6530 SMLoc StringLoc = getLoc();
6531
6532 int64_t CPolVal = 0;
6535
6536 for (;;) {
6537 if (ResTH.isNoMatch()) {
6538 int64_t TH;
6539 ResTH = parseTH(Operands, TH);
6540 if (ResTH.isFailure())
6541 return ResTH;
6542 if (ResTH.isSuccess()) {
6543 CPolVal |= TH;
6544 continue;
6545 }
6546 }
6547
6548 if (ResScope.isNoMatch()) {
6549 int64_t Scope;
6550 ResScope = parseScope(Operands, Scope);
6551 if (ResScope.isFailure())
6552 return ResScope;
6553 if (ResScope.isSuccess()) {
6554 CPolVal |= Scope;
6555 continue;
6556 }
6557 }
6558
6559 break;
6560 }
6561
6562 if (ResTH.isNoMatch() && ResScope.isNoMatch())
6563 return ParseStatus::NoMatch;
6564
6565 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6566 AMDGPUOperand::ImmTyCPol));
6567 return ParseStatus::Success;
6568 }
6569
6570 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6571 SMLoc OpLoc = getLoc();
6572 unsigned Enabled = 0, Seen = 0;
6573 for (;;) {
6574 SMLoc S = getLoc();
6575 bool Disabling;
6576 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6577 if (!CPol)
6578 break;
6579
6580 lex();
6581
6582 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6583 return Error(S, "dlc modifier is not supported on this GPU");
6584
6585 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6586 return Error(S, "scc modifier is not supported on this GPU");
6587
6588 if (Seen & CPol)
6589 return Error(S, "duplicate cache policy modifier");
6590
6591 if (!Disabling)
6592 Enabled |= CPol;
6593
6594 Seen |= CPol;
6595 }
6596
6597 if (!Seen)
6598 return ParseStatus::NoMatch;
6599
6600 Operands.push_back(
6601 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6602 return ParseStatus::Success;
6603}
6604
6605ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6606 int64_t &Scope) {
6607 Scope = AMDGPU::CPol::SCOPE_CU; // default;
6608
6610 SMLoc StringLoc;
6611 ParseStatus Res;
6612
6613 Res = parseStringWithPrefix("scope", Value, StringLoc);
6614 if (!Res.isSuccess())
6615 return Res;
6616
6618 .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU)
6619 .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE)
6620 .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV)
6621 .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS)
6622 .Default(0xffffffff);
6623
6624 if (Scope == 0xffffffff)
6625 return Error(StringLoc, "invalid scope value");
6626
6627 return ParseStatus::Success;
6628}
6629
6630ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6631 TH = AMDGPU::CPol::TH_RT; // default
6632
6634 SMLoc StringLoc;
6635 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
6636 if (!Res.isSuccess())
6637 return Res;
6638
6639 if (Value == "TH_DEFAULT")
6641 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
6642 Value == "TH_LOAD_NT_WB") {
6643 return Error(StringLoc, "invalid th value");
6644 } else if (Value.consume_front("TH_ATOMIC_")) {
6646 } else if (Value.consume_front("TH_LOAD_")) {
6648 } else if (Value.consume_front("TH_STORE_")) {
6650 } else {
6651 return Error(StringLoc, "invalid th value");
6652 }
6653
6654 if (Value == "BYPASS")
6656
6657 if (TH != 0) {
6664 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
6667 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
6669 .Default(0xffffffff);
6670 else
6676 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
6677 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
6678 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
6679 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
6680 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
6681 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
6682 .Default(0xffffffff);
6683 }
6684
6685 if (TH == 0xffffffff)
6686 return Error(StringLoc, "invalid th value");
6687
6688 return ParseStatus::Success;
6689}
6690
6692 MCInst& Inst, const OperandVector& Operands,
6693 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6694 AMDGPUOperand::ImmTy ImmT,
6695 int64_t Default = 0) {
6696 auto i = OptionalIdx.find(ImmT);
6697 if (i != OptionalIdx.end()) {
6698 unsigned Idx = i->second;
6699 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6700 } else {
6702 }
6703}
6704
6705ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6707 SMLoc &StringLoc) {
6708 if (!trySkipId(Prefix, AsmToken::Colon))
6709 return ParseStatus::NoMatch;
6710
6711 StringLoc = getLoc();
6712 return parseId(Value, "expected an identifier") ? ParseStatus::Success
6714}
6715
6716//===----------------------------------------------------------------------===//
6717// MTBUF format
6718//===----------------------------------------------------------------------===//
6719
6720bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6721 int64_t MaxVal,
6722 int64_t &Fmt) {
6723 int64_t Val;
6724 SMLoc Loc = getLoc();
6725
6726 auto Res = parseIntWithPrefix(Pref, Val);
6727 if (Res.isFailure())
6728 return false;
6729 if (Res.isNoMatch())
6730 return true;
6731
6732 if (Val < 0 || Val > MaxVal) {
6733 Error(Loc, Twine("out of range ", StringRef(Pref)));
6734 return false;
6735 }
6736
6737 Fmt = Val;
6738 return true;
6739}
6740
6741ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
6742 AMDGPUOperand::ImmTy ImmTy) {
6743 const char *Pref = "index_key";
6744 int64_t ImmVal = 0;
6745 SMLoc Loc = getLoc();
6746 auto Res = parseIntWithPrefix(Pref, ImmVal);
6747 if (!Res.isSuccess())
6748 return Res;
6749
6750 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6751 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6752
6753 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6754 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6755
6756 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
6757 return ParseStatus::Success;
6758}
6759
6760ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
6761 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6762}
6763
6764ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
6765 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6766}
6767
6768// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6769// values to live in a joint format operand in the MCInst encoding.
6770ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6771 using namespace llvm::AMDGPU::MTBUFFormat;
6772
6773 int64_t Dfmt = DFMT_UNDEF;
6774 int64_t Nfmt = NFMT_UNDEF;
6775
6776 // dfmt and nfmt can appear in either order, and each is optional.
6777 for (int I = 0; I < 2; ++I) {
6778 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6779 return ParseStatus::Failure;
6780
6781 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6782 return ParseStatus::Failure;
6783
6784 // Skip optional comma between dfmt/nfmt
6785 // but guard against 2 commas following each other.
6786 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6787 !peekToken().is(AsmToken::Comma)) {
6788 trySkipToken(AsmToken::Comma);
6789 }
6790 }
6791
6792 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6793 return ParseStatus::NoMatch;
6794
6795 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6796 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6797
6798 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6799 return ParseStatus::Success;
6800}
6801
6802ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6803 using namespace llvm::AMDGPU::MTBUFFormat;
6804
6805 int64_t Fmt = UFMT_UNDEF;
6806
6807 if (!tryParseFmt("format", UFMT_MAX, Fmt))
6808 return ParseStatus::Failure;
6809
6810 if (Fmt == UFMT_UNDEF)
6811 return ParseStatus::NoMatch;
6812
6813 Format = Fmt;
6814 return ParseStatus::Success;
6815}
6816
6817bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6818 int64_t &Nfmt,
6819 StringRef FormatStr,
6820 SMLoc Loc) {
6821 using namespace llvm::AMDGPU::MTBUFFormat;
6822 int64_t Format;
6823
6824 Format = getDfmt(FormatStr);
6825 if (Format != DFMT_UNDEF) {
6826 Dfmt = Format;
6827 return true;
6828 }
6829
6830 Format = getNfmt(FormatStr, getSTI());
6831 if (Format != NFMT_UNDEF) {
6832 Nfmt = Format;
6833 return true;
6834 }
6835
6836 Error(Loc, "unsupported format");
6837 return false;
6838}
6839
6840ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6841 SMLoc FormatLoc,
6842 int64_t &Format) {
6843 using namespace llvm::AMDGPU::MTBUFFormat;
6844
6845 int64_t Dfmt = DFMT_UNDEF;
6846 int64_t Nfmt = NFMT_UNDEF;
6847 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6848 return ParseStatus::Failure;
6849
6850 if (trySkipToken(AsmToken::Comma)) {
6851 StringRef Str;
6852 SMLoc Loc = getLoc();
6853 if (!parseId(Str, "expected a format string") ||
6854 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6855 return ParseStatus::Failure;
6856 if (Dfmt == DFMT_UNDEF)
6857 return Error(Loc, "duplicate numeric format");
6858 if (Nfmt == NFMT_UNDEF)
6859 return Error(Loc, "duplicate data format");
6860 }
6861
6862 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6863 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6864
6865 if (isGFX10Plus()) {
6866 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6867 if (Ufmt == UFMT_UNDEF)
6868 return Error(FormatLoc, "unsupported format");
6869 Format = Ufmt;
6870 } else {
6871 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6872 }
6873
6874 return ParseStatus::Success;
6875}
6876
6877ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6878 SMLoc Loc,
6879 int64_t &Format) {
6880 using namespace llvm::AMDGPU::MTBUFFormat;
6881
6882 auto Id = getUnifiedFormat(FormatStr, getSTI());
6883 if (Id == UFMT_UNDEF)
6884 return ParseStatus::NoMatch;
6885
6886 if (!isGFX10Plus())
6887 return Error(Loc, "unified format is not supported on this GPU");
6888
6889 Format = Id;
6890 return ParseStatus::Success;
6891}
6892
6893ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6894 using namespace llvm::AMDGPU::MTBUFFormat;
6895 SMLoc Loc = getLoc();
6896
6897 if (!parseExpr(Format))
6898 return ParseStatus::Failure;
6899 if (!isValidFormatEncoding(Format, getSTI()))
6900 return Error(Loc, "out of range format");
6901
6902 return ParseStatus::Success;
6903}
6904
6905ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6906 using namespace llvm::AMDGPU::MTBUFFormat;
6907
6908 if (!trySkipId("format", AsmToken::Colon))
6909 return ParseStatus::NoMatch;
6910
6911 if (trySkipToken(AsmToken::LBrac)) {
6912 StringRef FormatStr;
6913 SMLoc Loc = getLoc();
6914 if (!parseId(FormatStr, "expected a format string"))
6915 return ParseStatus::Failure;
6916
6917 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6918 if (Res.isNoMatch())
6919 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6920 if (!Res.isSuccess())
6921 return Res;
6922
6923 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6924 return ParseStatus::Failure;
6925
6926 return ParseStatus::Success;
6927 }
6928
6929 return parseNumericFormat(Format);
6930}
6931
6932ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6933 using namespace llvm::AMDGPU::MTBUFFormat;
6934
6935 int64_t Format = getDefaultFormatEncoding(getSTI());
6936 ParseStatus Res;
6937 SMLoc Loc = getLoc();
6938
6939 // Parse legacy format syntax.
6940 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6941 if (Res.isFailure())
6942 return Res;
6943
6944 bool FormatFound = Res.isSuccess();
6945
6946 Operands.push_back(
6947 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6948
6949 if (FormatFound)
6950 trySkipToken(AsmToken::Comma);
6951
6952 if (isToken(AsmToken::EndOfStatement)) {
6953 // We are expecting an soffset operand,
6954 // but let matcher handle the error.
6955 return ParseStatus::Success;
6956 }
6957
6958 // Parse soffset.
6959 Res = parseRegOrImm(Operands);
6960 if (!Res.isSuccess())
6961 return Res;
6962
6963 trySkipToken(AsmToken::Comma);
6964
6965 if (!FormatFound) {
6966 Res = parseSymbolicOrNumericFormat(Format);
6967 if (Res.isFailure())
6968 return Res;
6969 if (Res.isSuccess()) {
6970 auto Size = Operands.size();
6971 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6972 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6973 Op.setImm(Format);
6974 }
6975 return ParseStatus::Success;
6976 }
6977
6978 if (isId("format") && peekToken().is(AsmToken::Colon))
6979 return Error(getLoc(), "duplicate format");
6980 return ParseStatus::Success;
6981}
6982
6983ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
6984 ParseStatus Res =
6985 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
6986 if (Res.isNoMatch()) {
6987 Res = parseIntWithPrefix("inst_offset", Operands,
6988 AMDGPUOperand::ImmTyInstOffset);
6989 }
6990 return Res;
6991}
6992
6993ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
6994 ParseStatus Res =
6995 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
6996 if (Res.isNoMatch())
6997 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
6998 return Res;
6999}
7000
7001ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7002 ParseStatus Res =
7003 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7004 if (Res.isNoMatch()) {
7005 Res =
7006 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7007 }
7008 return Res;
7009}
7010
7011//===----------------------------------------------------------------------===//
7012// Exp
7013//===----------------------------------------------------------------------===//
7014
7015void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7016 OptionalImmIndexMap OptionalIdx;
7017
7018 unsigned OperandIdx[4];
7019 unsigned EnMask = 0;
7020 int SrcIdx = 0;
7021
7022 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7023 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7024
7025 // Add the register arguments
7026 if (Op.isReg()) {
7027 assert(SrcIdx < 4);
7028 OperandIdx[SrcIdx] = Inst.size();
7029 Op.addRegOperands(Inst, 1);
7030 ++SrcIdx;
7031 continue;
7032 }
7033
7034 if (Op.isOff()) {
7035 assert(SrcIdx < 4);
7036 OperandIdx[SrcIdx] = Inst.size();
7037 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
7038 ++SrcIdx;
7039 continue;
7040 }
7041
7042 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7043 Op.addImmOperands(Inst, 1);
7044 continue;
7045 }
7046
7047 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7048 continue;
7049
7050 // Handle optional arguments
7051 OptionalIdx[Op.getImmTy()] = i;
7052 }
7053
7054 assert(SrcIdx == 4);
7055
7056 bool Compr = false;
7057 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7058 Compr = true;
7059 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7060 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
7061 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
7062 }
7063
7064 for (auto i = 0; i < SrcIdx; ++i) {
7065 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
7066 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7067 }
7068 }
7069
7070 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7071 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7072
7073 Inst.addOperand(MCOperand::createImm(EnMask));
7074}
7075
7076//===----------------------------------------------------------------------===//
7077// s_waitcnt
7078//===----------------------------------------------------------------------===//
7079
7080static bool
7082 const AMDGPU::IsaVersion ISA,
7083 int64_t &IntVal,
7084 int64_t CntVal,
7085 bool Saturate,
7086 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7087 unsigned (*decode)(const IsaVersion &Version, unsigned))
7088{
7089 bool Failed = false;
7090
7091 IntVal = encode(ISA, IntVal, CntVal);
7092 if (CntVal != decode(ISA, IntVal)) {
7093 if (Saturate) {
7094 IntVal = encode(ISA, IntVal, -1);
7095 } else {
7096 Failed = true;
7097 }
7098 }
7099 return Failed;
7100}
7101
7102bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7103
7104 SMLoc CntLoc = getLoc();
7105 StringRef CntName = getTokenStr();
7106
7107 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7108 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7109 return false;
7110
7111 int64_t CntVal;
7112 SMLoc ValLoc = getLoc();
7113 if (!parseExpr(CntVal))
7114 return false;
7115
7117
7118 bool Failed = true;
7119 bool Sat = CntName.ends_with("_sat");
7120
7121 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7122 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7123 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7124 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7125 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7126 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7127 } else {
7128 Error(CntLoc, "invalid counter name " + CntName);
7129 return false;
7130 }
7131
7132 if (Failed) {
7133 Error(ValLoc, "too large value for " + CntName);
7134 return false;
7135 }
7136
7137 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7138 return false;
7139
7140 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7141 if (isToken(AsmToken::EndOfStatement)) {
7142 Error(getLoc(), "expected a counter name");
7143 return false;
7144 }
7145 }
7146
7147 return true;
7148}
7149
7150ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7152 int64_t Waitcnt = getWaitcntBitMask(ISA);
7153 SMLoc S = getLoc();
7154
7155 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7156 while (!isToken(AsmToken::EndOfStatement)) {
7157 if (!parseCnt(Waitcnt))
7158 return ParseStatus::Failure;
7159 }
7160 } else {
7161 if (!parseExpr(Waitcnt))
7162 return ParseStatus::Failure;
7163 }
7164
7165 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7166 return ParseStatus::Success;
7167}
7168
7169bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7170 SMLoc FieldLoc = getLoc();
7171 StringRef FieldName = getTokenStr();
7172 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7173 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7174 return false;
7175
7176 SMLoc ValueLoc = getLoc();
7177 StringRef ValueName = getTokenStr();
7178 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7179 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7180 return false;
7181
7182 unsigned Shift;
7183 if (FieldName == "instid0") {
7184 Shift = 0;
7185 } else if (FieldName == "instskip") {
7186 Shift = 4;
7187 } else if (FieldName == "instid1") {
7188 Shift = 7;
7189 } else {
7190 Error(FieldLoc, "invalid field name " + FieldName);
7191 return false;
7192 }
7193
7194 int Value;
7195 if (Shift == 4) {
7196 // Parse values for instskip.
7198 .Case("SAME", 0)
7199 .Case("NEXT", 1)
7200 .Case("SKIP_1", 2)
7201 .Case("SKIP_2", 3)
7202 .Case("SKIP_3", 4)
7203 .Case("SKIP_4", 5)
7204 .Default(-1);
7205 } else {
7206 // Parse values for instid0 and instid1.
7208 .Case("NO_DEP", 0)
7209 .Case("VALU_DEP_1", 1)
7210 .Case("VALU_DEP_2", 2)
7211 .Case("VALU_DEP_3", 3)
7212 .Case("VALU_DEP_4", 4)
7213 .Case("TRANS32_DEP_1", 5)
7214 .Case("TRANS32_DEP_2", 6)
7215 .Case("TRANS32_DEP_3", 7)
7216 .Case("FMA_ACCUM_CYCLE_1", 8)
7217 .Case("SALU_CYCLE_1", 9)
7218 .Case("SALU_CYCLE_2", 10)
7219 .Case("SALU_CYCLE_3", 11)
7220 .Default(-1);
7221 }
7222 if (Value < 0) {
7223 Error(ValueLoc, "invalid value name " + ValueName);
7224 return false;
7225 }
7226
7227 Delay |= Value << Shift;
7228 return true;
7229}
7230
7231ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7232 int64_t Delay = 0;
7233 SMLoc S = getLoc();
7234
7235 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7236 do {
7237 if (!parseDelay(Delay))
7238 return ParseStatus::Failure;
7239 } while (trySkipToken(AsmToken::Pipe));
7240 } else {
7241 if (!parseExpr(Delay))
7242 return ParseStatus::Failure;
7243 }
7244
7245 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7246 return ParseStatus::Success;
7247}
7248
7249bool
7250AMDGPUOperand::isSWaitCnt() const {
7251 return isImm();
7252}
7253
7254bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7255
7256//===----------------------------------------------------------------------===//
7257// DepCtr
7258//===----------------------------------------------------------------------===//
7259
7260void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7261 StringRef DepCtrName) {
7262 switch (ErrorId) {
7263 case OPR_ID_UNKNOWN:
7264 Error(Loc, Twine("invalid counter name ", DepCtrName));
7265 return;
7266 case OPR_ID_UNSUPPORTED:
7267 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7268 return;
7269 case OPR_ID_DUPLICATE:
7270 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7271 return;
7272 case OPR_VAL_INVALID:
7273 Error(Loc, Twine("invalid value for ", DepCtrName));
7274 return;
7275 default:
7276 assert(false);
7277 }
7278}
7279
7280bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7281
7282 using namespace llvm::AMDGPU::DepCtr;
7283
7284 SMLoc DepCtrLoc = getLoc();
7285 StringRef DepCtrName = getTokenStr();
7286
7287 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7288 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7289 return false;
7290
7291 int64_t ExprVal;
7292 if (!parseExpr(ExprVal))
7293 return false;
7294
7295 unsigned PrevOprMask = UsedOprMask;
7296 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7297
7298 if (CntVal < 0) {
7299 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7300 return false;
7301 }
7302
7303 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7304 return false;
7305
7306 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7307 if (isToken(AsmToken::EndOfStatement)) {
7308 Error(getLoc(), "expected a counter name");
7309 return false;
7310 }
7311 }
7312
7313 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7314 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7315 return true;
7316}
7317
7318ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7319 using namespace llvm::AMDGPU::DepCtr;
7320
7321 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7322 SMLoc Loc = getLoc();
7323
7324 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7325 unsigned UsedOprMask = 0;
7326 while (!isToken(AsmToken::EndOfStatement)) {
7327 if (!parseDepCtr(DepCtr, UsedOprMask))
7328 return ParseStatus::Failure;
7329 }
7330 } else {
7331 if (!parseExpr(DepCtr))
7332 return ParseStatus::Failure;
7333 }
7334
7335 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7336 return ParseStatus::Success;
7337}
7338
7339bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7340
7341//===----------------------------------------------------------------------===//
7342// hwreg
7343//===----------------------------------------------------------------------===//
7344
7345ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7346 OperandInfoTy &Offset,
7347 OperandInfoTy &Width) {
7348 using namespace llvm::AMDGPU::Hwreg;
7349
7350 if (!trySkipId("hwreg", AsmToken::LParen))
7351 return ParseStatus::NoMatch;
7352
7353 // The register may be specified by name or using a numeric code
7354 HwReg.Loc = getLoc();
7355 if (isToken(AsmToken::Identifier) &&
7356 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7357 HwReg.IsSymbolic = true;
7358 lex(); // skip register name
7359 } else if (!parseExpr(HwReg.Val, "a register name")) {
7360 return ParseStatus::Failure;
7361 }
7362
7363 if (trySkipToken(AsmToken::RParen))
7364 return ParseStatus::Success;
7365
7366 // parse optional params
7367 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7368 return ParseStatus::Failure;
7369
7370 Offset.Loc = getLoc();
7371 if (!parseExpr(Offset.Val))
7372 return ParseStatus::Failure;
7373
7374 if (!skipToken(AsmToken::Comma, "expected a comma"))
7375 return ParseStatus::Failure;
7376
7377 Width.Loc = getLoc();
7378 if (!parseExpr(Width.Val) ||
7379 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7380 return ParseStatus::Failure;
7381
7382 return ParseStatus::Success;
7383}
7384
7385ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7386 using namespace llvm::AMDGPU::Hwreg;
7387
7388 int64_t ImmVal = 0;
7389 SMLoc Loc = getLoc();
7390
7391 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
7392 HwregId::Default);
7393 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
7394 HwregOffset::Default);
7395 struct : StructuredOpField {
7396 using StructuredOpField::StructuredOpField;
7397 bool validate(AMDGPUAsmParser &Parser) const override {
7398 if (!isUIntN(Width, Val - 1))
7399 return Error(Parser, "only values from 1 to 32 are legal");
7400 return true;
7401 }
7402 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
7403 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
7404
7405 if (Res.isNoMatch())
7406 Res = parseHwregFunc(HwReg, Offset, Width);
7407
7408 if (Res.isSuccess()) {
7409 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
7410 return ParseStatus::Failure;
7411 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
7412 }
7413
7414 if (Res.isNoMatch() &&
7415 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
7417
7418 if (!Res.isSuccess())
7419 return ParseStatus::Failure;
7420
7421 if (!isUInt<16>(ImmVal))
7422 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7423 Operands.push_back(
7424 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7425 return ParseStatus::Success;
7426}
7427
7428bool AMDGPUOperand::isHwreg() const {
7429 return isImmTy(ImmTyHwreg);
7430}
7431
7432//===----------------------------------------------------------------------===//
7433// sendmsg
7434//===----------------------------------------------------------------------===//
7435
7436bool
7437AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7438 OperandInfoTy &Op,
7439 OperandInfoTy &Stream) {
7440 using namespace llvm::AMDGPU::SendMsg;
7441
7442 Msg.Loc = getLoc();
7443 if (isToken(AsmToken::Identifier) &&
7444 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7445 Msg.IsSymbolic = true;
7446 lex(); // skip message name
7447 } else if (!parseExpr(Msg.Val, "a message name")) {
7448 return false;
7449 }
7450
7451 if (trySkipToken(AsmToken::Comma)) {
7452 Op.IsDefined = true;
7453 Op.Loc = getLoc();
7454 if (isToken(AsmToken::Identifier) &&
7455 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
7457 lex(); // skip operation name
7458 } else if (!parseExpr(Op.Val, "an operation name")) {
7459 return false;
7460 }
7461
7462 if (trySkipToken(AsmToken::Comma)) {
7463 Stream.IsDefined = true;
7464 Stream.Loc = getLoc();
7465 if (!parseExpr(Stream.Val))
7466 return false;
7467 }
7468 }
7469
7470 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7471}
7472
7473bool
7474AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7475 const OperandInfoTy &Op,
7476 const OperandInfoTy &Stream) {
7477 using namespace llvm::AMDGPU::SendMsg;
7478
7479 // Validation strictness depends on whether message is specified
7480 // in a symbolic or in a numeric form. In the latter case
7481 // only encoding possibility is checked.
7482 bool Strict = Msg.IsSymbolic;
7483
7484 if (Strict) {
7485 if (Msg.Val == OPR_ID_UNSUPPORTED) {
7486 Error(Msg.Loc, "specified message id is not supported on this GPU");
7487 return false;
7488 }
7489 } else {
7490 if (!isValidMsgId(Msg.Val, getSTI())) {
7491 Error(Msg.Loc, "invalid message id");
7492 return false;
7493 }
7494 }
7495 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
7496 if (Op.IsDefined) {
7497 Error(Op.Loc, "message does not support operations");
7498 } else {
7499 Error(Msg.Loc, "missing message operation");
7500 }
7501 return false;
7502 }
7503 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
7504 if (Op.Val == OPR_ID_UNSUPPORTED)
7505 Error(Op.Loc, "specified operation id is not supported on this GPU");
7506 else
7507 Error(Op.Loc, "invalid operation id");
7508 return false;
7509 }
7510 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
7511 Stream.IsDefined) {
7512 Error(Stream.Loc, "message operation does not support streams");
7513 return false;
7514 }
7515 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
7516 Error(Stream.Loc, "invalid message stream id");
7517 return false;
7518 }
7519 return true;
7520}
7521
7522ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7523 using namespace llvm::AMDGPU::SendMsg;
7524
7525 int64_t ImmVal = 0;
7526 SMLoc Loc = getLoc();
7527
7528 if (trySkipId("sendmsg", AsmToken::LParen)) {
7529 OperandInfoTy Msg(OPR_ID_UNKNOWN);
7530 OperandInfoTy Op(OP_NONE_);
7531 OperandInfoTy Stream(STREAM_ID_NONE_);
7532 if (parseSendMsgBody(Msg, Op, Stream) &&
7533 validateSendMsg(Msg, Op, Stream)) {
7534 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
7535 } else {
7536 return ParseStatus::Failure;
7537 }
7538 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7539 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7540 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7541 } else {
7542 return ParseStatus::Failure;
7543 }
7544
7545 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7546 return ParseStatus::Success;
7547}
7548
7549bool AMDGPUOperand::isSendMsg() const {
7550 return isImmTy(ImmTySendMsg);
7551}
7552
7553//===----------------------------------------------------------------------===//
7554// v_interp
7555//===----------------------------------------------------------------------===//
7556
7557ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7558 StringRef Str;
7559 SMLoc S = getLoc();
7560
7561 if (!parseId(Str))
7562 return ParseStatus::NoMatch;
7563
7564 int Slot = StringSwitch<int>(Str)
7565 .Case("p10", 0)
7566 .Case("p20", 1)
7567 .Case("p0", 2)
7568 .Default(-1);
7569
7570 if (Slot == -1)
7571 return Error(S, "invalid interpolation slot");
7572
7573 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
7574 AMDGPUOperand::ImmTyInterpSlot));
7575 return ParseStatus::Success;
7576}
7577
7578ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7579 StringRef Str;
7580 SMLoc S = getLoc();
7581
7582 if (!parseId(Str))
7583 return ParseStatus::NoMatch;
7584
7585 if (!Str.starts_with("attr"))
7586 return Error(S, "invalid interpolation attribute");
7587
7588 StringRef Chan = Str.take_back(2);
7589 int AttrChan = StringSwitch<int>(Chan)
7590 .Case(".x", 0)
7591 .Case(".y", 1)
7592 .Case(".z", 2)
7593 .Case(".w", 3)
7594 .Default(-1);
7595 if (AttrChan == -1)
7596 return Error(S, "invalid or missing interpolation attribute channel");
7597
7598 Str = Str.drop_back(2).drop_front(4);
7599
7600 uint8_t Attr;
7601 if (Str.getAsInteger(10, Attr))
7602 return Error(S, "invalid or missing interpolation attribute number");
7603
7604 if (Attr > 32)
7605 return Error(S, "out of bounds interpolation attribute number");
7606
7607 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
7608
7609 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
7610 AMDGPUOperand::ImmTyInterpAttr));
7611 Operands.push_back(AMDGPUOperand::CreateImm(
7612 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7613 return ParseStatus::Success;
7614}
7615
7616//===----------------------------------------------------------------------===//
7617// exp
7618//===----------------------------------------------------------------------===//
7619
7620ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7621 using namespace llvm::AMDGPU::Exp;
7622
7623 StringRef Str;
7624 SMLoc S = getLoc();
7625
7626 if (!parseId(Str))
7627 return ParseStatus::NoMatch;
7628
7629 unsigned Id = getTgtId(Str);
7630 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
7631 return Error(S, (Id == ET_INVALID)
7632 ? "invalid exp target"
7633 : "exp target is not supported on this GPU");
7634
7635 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
7636 AMDGPUOperand::ImmTyExpTgt));
7637 return ParseStatus::Success;
7638}
7639
7640//===----------------------------------------------------------------------===//
7641// parser helpers
7642//===----------------------------------------------------------------------===//
7643
7644bool
7645AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7646 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
7647}
7648
7649bool
7650AMDGPUAsmParser::isId(const StringRef Id) const {
7651 return isId(getToken(), Id);
7652}
7653
7654bool
7655AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7656 return getTokenKind() == Kind;
7657}
7658
7659StringRef AMDGPUAsmParser::getId() const {
7660 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7661}
7662
7663bool
7664AMDGPUAsmParser::trySkipId(const StringRef Id) {
7665 if (isId(Id)) {
7666 lex();
7667 return true;
7668 }
7669 return false;
7670}
7671
7672bool
7673AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7674 if (isToken(AsmToken::Identifier)) {
7675 StringRef Tok = getTokenStr();
7676 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
7677 lex();
7678 return true;
7679 }
7680 }
7681 return false;
7682}
7683
7684bool
7685AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7686 if (isId(Id) && peekToken().is(Kind)) {
7687 lex();
7688 lex();
7689 return true;
7690 }
7691 return false;
7692}
7693
7694bool
7695AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7696 if (isToken(Kind)) {
7697 lex();
7698 return true;
7699 }
7700 return false;
7701}
7702
7703bool
7704AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7705 const StringRef ErrMsg) {
7706 if (!trySkipToken(Kind)) {
7707 Error(getLoc(), ErrMsg);
7708 return false;
7709 }
7710 return true;
7711}
7712
7713bool
7714AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7715 SMLoc S = getLoc();
7716
7717 const MCExpr *Expr;
7718 if (Parser.parseExpression(Expr))
7719 return false;
7720
7721 if (Expr->evaluateAsAbsolute(Imm))
7722 return true;
7723
7724 if (Expected.empty()) {
7725 Error(S, "expected absolute expression");
7726 } else {
7727 Error(S, Twine("expected ", Expected) +
7728 Twine(" or an absolute expression"));
7729 }
7730 return false;
7731}
7732
7733bool
7734AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7735 SMLoc S = getLoc();
7736
7737 const MCExpr *Expr;
7738 if (Parser.parseExpression(Expr))
7739 return false;
7740
7741 int64_t IntVal;
7742 if (Expr->evaluateAsAbsolute(IntVal)) {
7743 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7744 } else {
7745 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7746 }
7747 return true;
7748}
7749
7750bool
7751AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7752 if (isToken(AsmToken::String)) {
7753 Val = getToken().getStringContents();
7754 lex();
7755 return true;
7756 }
7757 Error(getLoc(), ErrMsg);
7758 return false;
7759}
7760
7761bool
7762AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7763 if (isToken(AsmToken::Identifier)) {
7764 Val = getTokenStr();
7765 lex();
7766 return true;
7767 }
7768 if (!ErrMsg.empty())
7769 Error(getLoc(), ErrMsg);
7770 return false;
7771}
7772
7774AMDGPUAsmParser::getToken() const {
7775 return Parser.getTok();
7776}
7777
7778AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7779 return isToken(AsmToken::EndOfStatement)
7780 ? getToken()
7781 : getLexer().peekTok(ShouldSkipSpace);
7782}
7783
7784void
7785AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7786 auto TokCount = getLexer().peekTokens(Tokens);
7787
7788 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7789 Tokens[Idx] = AsmToken(AsmToken::Error, "");
7790}
7791
7793AMDGPUAsmParser::getTokenKind() const {
7794 return getLexer().getKind();
7795}
7796
7797SMLoc
7798AMDGPUAsmParser::getLoc() const {
7799 return getToken().getLoc();
7800}
7801
7803AMDGPUAsmParser::getTokenStr() const {
7804 return getToken().getString();
7805}
7806
7807void
7808AMDGPUAsmParser::lex() {
7809 Parser.Lex();
7810}
7811
7812SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7813 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7814}
7815
7816SMLoc
7817AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7818 const OperandVector &Operands) const {
7819 for (unsigned i = Operands.size() - 1; i > 0; --i) {
7820 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7821 if (Test(Op))
7822 return Op.getStartLoc();
7823 }
7824 return getInstLoc(Operands);
7825}
7826
7827SMLoc
7828AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7829 const OperandVector &Operands) const {
7830 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7831 return getOperandLoc(Test, Operands);
7832}
7833
7834SMLoc
7835AMDGPUAsmParser::getRegLoc(unsigned Reg,
7836 const OperandVector &Operands) const {
7837 auto Test = [=](const AMDGPUOperand& Op) {
7838 return Op.isRegKind() && Op.getReg() == Reg;
7839 };
7840 return getOperandLoc(Test, Operands);
7841}
7842
7843SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7844 bool SearchMandatoryLiterals) const {
7845 auto Test = [](const AMDGPUOperand& Op) {
7846 return Op.IsImmKindLiteral() || Op.isExpr();
7847 };
7848 SMLoc Loc = getOperandLoc(Test, Operands);
7849 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7850 Loc = getMandatoryLitLoc(Operands);
7851 return Loc;
7852}
7853
7854SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7855 auto Test = [](const AMDGPUOperand &Op) {
7856 return Op.IsImmKindMandatoryLiteral();
7857 };
7858 return getOperandLoc(Test, Operands);
7859}
7860
7861SMLoc
7862AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7863 auto Test = [](const AMDGPUOperand& Op) {
7864 return Op.isImmKindConst();
7865 };
7866 return getOperandLoc(Test, Operands);
7867}
7868
7870AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
7871 if (!trySkipToken(AsmToken::LCurly))
7872 return ParseStatus::NoMatch;
7873
7874 bool First = true;
7875 while (!trySkipToken(AsmToken::RCurly)) {
7876 if (!First &&
7877 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
7878 return ParseStatus::Failure;
7879
7880 StringRef Id = getTokenStr();
7881 SMLoc IdLoc = getLoc();
7882 if (!skipToken(AsmToken::Identifier, "field name expected") ||
7883 !skipToken(AsmToken::Colon, "colon expected"))
7884 return ParseStatus::Failure;
7885
7886 auto I =
7887 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
7888 if (I == Fields.end())
7889 return Error(IdLoc, "unknown field");
7890 if ((*I)->IsDefined)
7891 return Error(IdLoc, "duplicate field");
7892
7893 // TODO: Support symbolic values.
7894 (*I)->Loc = getLoc();
7895 if (!parseExpr((*I)->Val))
7896 return ParseStatus::Failure;
7897 (*I)->IsDefined = true;
7898
7899 First = false;
7900 }
7901 return ParseStatus::Success;
7902}
7903
7904bool AMDGPUAsmParser::validateStructuredOpFields(
7906 return all_of(Fields, [this](const StructuredOpField *F) {
7907 return F->validate(*this);
7908 });
7909}
7910
7911//===----------------------------------------------------------------------===//
7912// swizzle
7913//===----------------------------------------------------------------------===//
7914
7916static unsigned
7917encodeBitmaskPerm(const unsigned AndMask,
7918 const unsigned OrMask,
7919 const unsigned XorMask) {
7920 using namespace llvm::AMDGPU::Swizzle;
7921
7922 return BITMASK_PERM_ENC |
7923 (AndMask << BITMASK_AND_SHIFT) |
7924 (OrMask << BITMASK_OR_SHIFT) |
7925 (XorMask << BITMASK_XOR_SHIFT);
7926}
7927
7928bool
7929AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7930 const unsigned MinVal,
7931 const unsigned MaxVal,
7932 const StringRef ErrMsg,
7933 SMLoc &Loc) {
7934 if (!skipToken(AsmToken::Comma, "expected a comma")) {
7935 return false;
7936 }
7937 Loc = getLoc();
7938 if (!parseExpr(Op)) {
7939 return false;
7940 }
7941 if (Op < MinVal || Op > MaxVal) {
7942 Error(Loc, ErrMsg);
7943 return false;
7944 }
7945
7946 return true;
7947}
7948
7949bool
7950AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7951 const unsigned MinVal,
7952 const unsigned MaxVal,
7953 const StringRef ErrMsg) {
7954 SMLoc Loc;
7955 for (unsigned i = 0; i < OpNum; ++i) {
7956 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7957 return false;
7958 }
7959
7960 return true;
7961}
7962
7963bool
7964AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7965 using namespace llvm::AMDGPU::Swizzle;
7966
7967 int64_t Lane[LANE_NUM];
7968 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7969 "expected a 2-bit lane id")) {
7971 for (unsigned I = 0; I < LANE_NUM; ++I) {
7972 Imm |= Lane[I] << (LANE_SHIFT * I);
7973 }
7974 return true;
7975 }
7976 return false;
7977}
7978
7979bool
7980AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7981 using namespace llvm::AMDGPU::Swizzle;
7982
7983 SMLoc Loc;
7984 int64_t GroupSize;
7985 int64_t LaneIdx;
7986
7987 if (!parseSwizzleOperand(GroupSize,
7988 2, 32,
7989 "group size must be in the interval [2,32]",
7990 Loc)) {
7991 return false;
7992 }
7993 if (!isPowerOf2_64(GroupSize)) {
7994 Error(Loc, "group size must be a power of two");
7995 return false;
7996 }
7997 if (parseSwizzleOperand(LaneIdx,
7998 0, GroupSize - 1,
7999 "lane id must be in the interval [0,group size - 1]",
8000 Loc)) {
8001 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8002 return true;
8003 }
8004 return false;
8005}
8006
8007bool
8008AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8009 using namespace llvm::AMDGPU::Swizzle;
8010
8011 SMLoc Loc;
8012 int64_t GroupSize;
8013
8014 if (!parseSwizzleOperand(GroupSize,
8015 2, 32,
8016 "group size must be in the interval [2,32]",
8017 Loc)) {
8018 return false;
8019 }
8020 if (!isPowerOf2_64(GroupSize)) {
8021 Error(Loc, "group size must be a power of two");
8022 return false;
8023 }
8024
8025 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8026 return true;
8027}
8028
8029bool
8030AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8031 using namespace llvm::AMDGPU::Swizzle;
8032
8033 SMLoc Loc;
8034 int64_t GroupSize;
8035
8036 if (!parseSwizzleOperand(GroupSize,
8037 1, 16,
8038 "group size must be in the interval [1,16]",
8039 Loc)) {
8040 return false;
8041 }
8042 if (!isPowerOf2_64(GroupSize)) {
8043 Error(Loc, "group size must be a power of two");
8044 return false;
8045 }
8046
8047 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8048 return true;
8049}
8050
8051bool
8052AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8053 using namespace llvm::AMDGPU::Swizzle;
8054
8055 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8056 return false;
8057 }
8058
8059 StringRef Ctl;
8060 SMLoc StrLoc = getLoc();
8061 if (!parseString(Ctl)) {
8062 return false;
8063 }
8064 if (Ctl.size() != BITMASK_WIDTH) {
8065 Error(StrLoc, "expected a 5-character mask");
8066 return false;
8067 }
8068
8069 unsigned AndMask = 0;
8070 unsigned OrMask = 0;
8071 unsigned XorMask = 0;
8072
8073 for (size_t i = 0; i < Ctl.size(); ++i) {
8074 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8075 switch(Ctl[i]) {
8076 default:
8077 Error(StrLoc, "invalid mask");
8078 return false;
8079 case '0':
8080 break;
8081 case '1':
8082 OrMask |= Mask;
8083 break;
8084 case 'p':
8085 AndMask |= Mask;
8086 break;
8087 case 'i':
8088 AndMask |= Mask;
8089 XorMask |= Mask;
8090 break;
8091 }
8092 }
8093
8094 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8095 return true;
8096}
8097
8098bool
8099AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8100
8101 SMLoc OffsetLoc = getLoc();
8102
8103 if (!parseExpr(Imm, "a swizzle macro")) {
8104 return false;
8105 }
8106 if (!isUInt<16>(Imm)) {
8107 Error(OffsetLoc, "expected a 16-bit offset");
8108 return false;
8109 }
8110 return true;
8111}
8112
8113bool
8114AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8115 using namespace llvm::AMDGPU::Swizzle;
8116
8117 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8118
8119 SMLoc ModeLoc = getLoc();
8120 bool Ok = false;
8121
8122 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8123 Ok = parseSwizzleQuadPerm(Imm);
8124 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8125 Ok = parseSwizzleBitmaskPerm(Imm);
8126 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8127 Ok = parseSwizzleBroadcast(Imm);
8128 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8129 Ok = parseSwizzleSwap(Imm);
8130 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8131 Ok = parseSwizzleReverse(Imm);
8132 } else {
8133 Error(ModeLoc, "expected a swizzle mode");
8134 }
8135
8136 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8137 }
8138
8139 return false;
8140}
8141
8142ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8143 SMLoc S = getLoc();
8144 int64_t Imm = 0;
8145
8146 if (trySkipId("offset")) {
8147
8148 bool Ok = false;
8149 if (skipToken(AsmToken::Colon, "expected a colon")) {
8150 if (trySkipId("swizzle")) {
8151 Ok = parseSwizzleMacro(Imm);
8152 } else {
8153 Ok = parseSwizzleOffset(Imm);
8154 }
8155 }
8156
8157 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8158
8160 }
8161 return ParseStatus::NoMatch;
8162}
8163
8164bool
8165AMDGPUOperand::isSwizzle() const {
8166 return isImmTy(ImmTySwizzle);
8167}
8168
8169//===----------------------------------------------------------------------===//
8170// VGPR Index Mode
8171//===----------------------------------------------------------------------===//
8172
8173int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8174
8175 using namespace llvm::AMDGPU::VGPRIndexMode;
8176
8177 if (trySkipToken(AsmToken::RParen)) {
8178 return OFF;
8179 }
8180
8181 int64_t Imm = 0;
8182
8183 while (true) {
8184 unsigned Mode = 0;
8185 SMLoc S = getLoc();
8186
8187 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8188 if (trySkipId(IdSymbolic[ModeId])) {
8189 Mode = 1 << ModeId;
8190 break;
8191 }
8192 }
8193
8194 if (Mode == 0) {
8195 Error(S, (Imm == 0)?
8196 "expected a VGPR index mode or a closing parenthesis" :
8197 "expected a VGPR index mode");
8198 return UNDEF;
8199 }
8200
8201 if (Imm & Mode) {
8202 Error(S, "duplicate VGPR index mode");
8203 return UNDEF;
8204 }
8205 Imm |= Mode;
8206
8207 if (trySkipToken(AsmToken::RParen))
8208 break;
8209 if (!skipToken(AsmToken::Comma,
8210 "expected a comma or a closing parenthesis"))
8211 return UNDEF;
8212 }
8213
8214 return Imm;
8215}
8216
8217ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8218
8219 using namespace llvm::AMDGPU::VGPRIndexMode;
8220
8221 int64_t Imm = 0;
8222 SMLoc S = getLoc();
8223
8224 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8225 Imm = parseGPRIdxMacro();
8226 if (Imm == UNDEF)
8227 return ParseStatus::Failure;
8228 } else {
8229 if (getParser().parseAbsoluteExpression(Imm))
8230 return ParseStatus::Failure;
8231 if (Imm < 0 || !isUInt<4>(Imm))
8232 return Error(S, "invalid immediate: only 4-bit values are legal");
8233 }
8234
8235 Operands.push_back(
8236 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8237 return ParseStatus::Success;
8238}
8239
8240bool AMDGPUOperand::isGPRIdxMode() const {
8241 return isImmTy(ImmTyGprIdxMode);
8242}
8243
8244//===----------------------------------------------------------------------===//
8245// sopp branch targets
8246//===----------------------------------------------------------------------===//
8247
8248ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8249
8250 // Make sure we are not parsing something
8251 // that looks like a label or an expression but is not.
8252 // This will improve error messages.
8253 if (isRegister() || isModifier())
8254 return ParseStatus::NoMatch;
8255
8256 if (!parseExpr(Operands))
8257 return ParseStatus::Failure;
8258
8259 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8260 assert(Opr.isImm() || Opr.isExpr());
8261 SMLoc Loc = Opr.getStartLoc();
8262
8263 // Currently we do not support arbitrary expressions as branch targets.
8264 // Only labels and absolute expressions are accepted.
8265 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8266 Error(Loc, "expected an absolute expression or a label");
8267 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8268 Error(Loc, "expected a 16-bit signed jump offset");
8269 }
8270
8271 return ParseStatus::Success;
8272}
8273
8274//===----------------------------------------------------------------------===//
8275// Boolean holding registers
8276//===----------------------------------------------------------------------===//
8277
8278ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8279 return parseReg(Operands);
8280}
8281
8282//===----------------------------------------------------------------------===//
8283// mubuf
8284//===----------------------------------------------------------------------===//
8285
8286void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8287 const OperandVector &Operands,
8288 bool IsAtomic) {
8289 OptionalImmIndexMap OptionalIdx;
8290 unsigned FirstOperandIdx = 1;
8291 bool IsAtomicReturn = false;
8292
8293 if (IsAtomic) {
8294 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
8296 }
8297
8298 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8299 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8300
8301 // Add the register arguments
8302 if (Op.isReg()) {
8303 Op.addRegOperands(Inst, 1);
8304 // Insert a tied src for atomic return dst.
8305 // This cannot be postponed as subsequent calls to
8306 // addImmOperands rely on correct number of MC operands.
8307 if (IsAtomicReturn && i == FirstOperandIdx)
8308 Op.addRegOperands(Inst, 1);
8309 continue;
8310 }
8311
8312 // Handle the case where soffset is an immediate
8313 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8314 Op.addImmOperands(Inst, 1);
8315 continue;
8316 }
8317
8318 // Handle tokens like 'offen' which are sometimes hard-coded into the
8319 // asm string. There are no MCInst operands for these.
8320 if (Op.isToken()) {
8321 continue;
8322 }
8323 assert(Op.isImm());
8324
8325 // Handle optional arguments
8326 OptionalIdx[Op.getImmTy()] = i;
8327 }
8328
8329 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8330 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8331}
8332
8333//===----------------------------------------------------------------------===//
8334// smrd
8335//===----------------------------------------------------------------------===//
8336
8337bool AMDGPUOperand::isSMRDOffset8() const {
8338 return isImmLiteral() && isUInt<8>(getImm());
8339}
8340
8341bool AMDGPUOperand::isSMEMOffset() const {
8342 // Offset range is checked later by validator.
8343 return isImmLiteral();
8344}
8345
8346bool AMDGPUOperand::isSMRDLiteralOffset() const {
8347 // 32-bit literals are only supported on CI and we only want to use them
8348 // when the offset is > 8-bits.
8349 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8350}
8351
8352//===----------------------------------------------------------------------===//
8353// vop3
8354//===----------------------------------------------------------------------===//
8355
8356static bool ConvertOmodMul(int64_t &Mul) {
8357 if (Mul != 1 && Mul != 2 && Mul != 4)
8358 return false;
8359
8360 Mul >>= 1;
8361 return true;
8362}
8363
8364static bool ConvertOmodDiv(int64_t &Div) {
8365 if (Div == 1) {
8366 Div = 0;
8367 return true;
8368 }
8369
8370 if (Div == 2) {
8371 Div = 3;
8372 return true;
8373 }
8374
8375 return false;
8376}
8377
8378// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8379// This is intentional and ensures compatibility with sp3.
8380// See bug 35397 for details.
8381bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8382 if (BoundCtrl == 0 || BoundCtrl == 1) {
8383 if (!isGFX11Plus())
8384 BoundCtrl = 1;
8385 return true;
8386 }
8387 return false;
8388}
8389
8390void AMDGPUAsmParser::onBeginOfFile() {
8391 if (!getParser().getStreamer().getTargetStreamer() ||
8392 getSTI().getTargetTriple().getArch() == Triple::r600)
8393 return;
8394
8395 if (!getTargetStreamer().getTargetID())
8396 getTargetStreamer().initializeTargetID(getSTI(),
8397 getSTI().getFeatureString());
8398
8399 if (isHsaAbi(getSTI()))
8400 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8401}
8402
8403/// Parse AMDGPU specific expressions.
8404///
8405/// expr ::= or(expr, ...) |
8406/// max(expr, ...)
8407///
8408bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8409 using AGVK = AMDGPUMCExpr::VariantKind;
8410
8411 if (isToken(AsmToken::Identifier)) {
8412 StringRef TokenId = getTokenStr();
8413 AGVK VK = StringSwitch<AGVK>(TokenId)
8414 .Case("max", AGVK::AGVK_Max)
8415 .Case("or", AGVK::AGVK_Or)
8416 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
8417 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
8418 .Case("alignto", AGVK::AGVK_AlignTo)
8419 .Case("occupancy", AGVK::AGVK_Occupancy)
8420 .Default(AGVK::AGVK_None);
8421
8422 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
8424 uint64_t CommaCount = 0;
8425 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
8426 lex(); // Eat '('
8427 while (true) {
8428 if (trySkipToken(AsmToken::RParen)) {
8429 if (Exprs.empty()) {
8430 Error(getToken().getLoc(),
8431 "empty " + Twine(TokenId) + " expression");
8432 return true;
8433 }
8434 if (CommaCount + 1 != Exprs.size()) {
8435 Error(getToken().getLoc(),
8436 "mismatch of commas in " + Twine(TokenId) + " expression");
8437 return true;
8438 }
8439 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
8440 return false;
8441 }
8442 const MCExpr *Expr;
8443 if (getParser().parseExpression(Expr, EndLoc))
8444 return true;
8445 Exprs.push_back(Expr);
8446 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
8447 if (LastTokenWasComma)
8448 CommaCount++;
8449 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
8450 Error(getToken().getLoc(),
8451 "unexpected token in " + Twine(TokenId) + " expression");
8452 return true;
8453 }
8454 }
8455 }
8456 }
8457 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
8458}
8459
8460ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8461 StringRef Name = getTokenStr();
8462 if (Name == "mul") {
8463 return parseIntWithPrefix("mul", Operands,
8464 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8465 }
8466
8467 if (Name == "div") {
8468 return parseIntWithPrefix("div", Operands,
8469 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8470 }
8471
8472 return ParseStatus::NoMatch;
8473}
8474
8475// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8476// the number of src operands present, then copies that bit into src0_modifiers.
8477static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
8478 int Opc = Inst.getOpcode();
8479 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8480 if (OpSelIdx == -1)
8481 return;
8482
8483 int SrcNum;
8484 const int Ops[] = { AMDGPU::OpName::src0,
8485 AMDGPU::OpName::src1,
8486 AMDGPU::OpName::src2 };
8487 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8488 ++SrcNum)
8489 ;
8490 assert(SrcNum > 0);
8491
8492 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8493
8494 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
8495 if (DstIdx == -1)
8496 return;
8497
8498 const MCOperand &DstOp = Inst.getOperand(DstIdx);
8499 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8500 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8501 if (DstOp.isReg() &&
8502 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
8503 if (AMDGPU::isHi(DstOp.getReg(), MRI))
8504 ModVal |= SISrcMods::DST_OP_SEL;
8505 } else {
8506 if ((OpSel & (1 << SrcNum)) != 0)
8507 ModVal |= SISrcMods::DST_OP_SEL;
8508 }
8509 Inst.getOperand(ModIdx).setImm(ModVal);
8510}
8511
8512void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8513 const OperandVector &Operands) {
8514 cvtVOP3P(Inst, Operands);
8515 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8516}
8517
8518void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8519 OptionalImmIndexMap &OptionalIdx) {
8520 cvtVOP3P(Inst, Operands, OptionalIdx);
8521 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8522}
8523
8524static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8525 return
8526 // 1. This operand is input modifiers
8527 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8528 // 2. This is not last operand
8529 && Desc.NumOperands > (OpNum + 1)
8530 // 3. Next operand is register class
8531 && Desc.operands()[OpNum + 1].RegClass != -1
8532 // 4. Next register is not tied to any other operand
8533 && Desc.getOperandConstraint(OpNum + 1,
8534 MCOI::OperandConstraint::TIED_TO) == -1;
8535}
8536
8537void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8538{
8539 OptionalImmIndexMap OptionalIdx;
8540 unsigned Opc = Inst.getOpcode();
8541
8542 unsigned I = 1;
8543 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8544 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8545 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8546 }
8547
8548 for (unsigned E = Operands.size(); I != E; ++I) {
8549 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8551 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8552 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
8553 Op.isInterpAttrChan()) {
8554 Inst.addOperand(MCOperand::createImm(Op.getImm()));
8555 } else if (Op.isImmModifier()) {
8556 OptionalIdx[Op.getImmTy()] = I;
8557 } else {
8558 llvm_unreachable("unhandled operand type");
8559 }
8560 }
8561
8562 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
8563 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8564 AMDGPUOperand::ImmTyHigh);
8565
8566 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8567 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8568 AMDGPUOperand::ImmTyClamp);
8569
8570 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8571 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8572 AMDGPUOperand::ImmTyOModSI);
8573}
8574
8575void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8576{
8577 OptionalImmIndexMap OptionalIdx;
8578 unsigned Opc = Inst.getOpcode();
8579
8580 unsigned I = 1;
8581 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8582 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8583 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8584 }
8585
8586 for (unsigned E = Operands.size(); I != E; ++I) {
8587 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8589 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8590 } else if (Op.isImmModifier()) {
8591 OptionalIdx[Op.getImmTy()] = I;
8592 } else {
8593 llvm_unreachable("unhandled operand type");
8594 }
8595 }
8596
8597 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
8598
8599 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8600 if (OpSelIdx != -1)
8601 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8602
8603 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8604
8605 if (OpSelIdx == -1)
8606 return;
8607
8608 const int Ops[] = { AMDGPU::OpName::src0,
8609 AMDGPU::OpName::src1,
8610 AMDGPU::OpName::src2 };
8611 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8612 AMDGPU::OpName::src1_modifiers,
8613 AMDGPU::OpName::src2_modifiers };
8614
8615 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8616
8617 for (int J = 0; J < 3; ++J) {
8618 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8619 if (OpIdx == -1)
8620 break;
8621
8622 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8623 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8624
8625 if ((OpSel & (1 << J)) != 0)
8626 ModVal |= SISrcMods::OP_SEL_0;
8627 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8628 (OpSel & (1 << 3)) != 0)
8629 ModVal |= SISrcMods::DST_OP_SEL;
8630
8631 Inst.getOperand(ModIdx).setImm(ModVal);
8632 }
8633}
8634
8635void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8636 OptionalImmIndexMap &OptionalIdx) {
8637 unsigned Opc = Inst.getOpcode();
8638
8639 unsigned I = 1;
8640 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8641 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8642 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8643 }
8644
8645 for (unsigned E = Operands.size(); I != E; ++I) {
8646 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8648 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8649 } else if (Op.isImmModifier()) {
8650 OptionalIdx[Op.getImmTy()] = I;
8651 } else if (Op.isRegOrImm()) {
8652 Op.addRegOrImmOperands(Inst, 1);
8653 } else {
8654 llvm_unreachable("unhandled operand type");
8655 }
8656 }
8657
8658 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
8659 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
8660 Inst.addOperand(Inst.getOperand(0));
8661 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8662 AMDGPUOperand::ImmTyByteSel);
8663 }
8664
8665 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8666 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8667 AMDGPUOperand::ImmTyClamp);
8668
8669 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8670 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8671 AMDGPUOperand::ImmTyOModSI);
8672
8673 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8674 // it has src2 register operand that is tied to dst operand
8675 // we don't allow modifiers for this operand in assembler so src2_modifiers
8676 // should be 0.
8677 if (isMAC(Opc)) {
8678 auto it = Inst.begin();
8679 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8680 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8681 ++it;
8682 // Copy the operand to ensure it's not invalidated when Inst grows.
8683 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8684 }
8685}
8686
8687void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8688 OptionalImmIndexMap OptionalIdx;
8689 cvtVOP3(Inst, Operands, OptionalIdx);
8690}
8691
8692void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8693 OptionalImmIndexMap &OptIdx) {
8694 const int Opc = Inst.getOpcode();
8695 const MCInstrDesc &Desc = MII.get(Opc);
8696
8697 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8698
8699 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8700 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8701 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
8702 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
8703 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8704 Inst.addOperand(Inst.getOperand(0));
8705 }
8706
8707 // Adding vdst_in operand is already covered for these DPP instructions in
8708 // cvtVOP3DPP.
8709 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
8710 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8711 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8712 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8713 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 ||
8714 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
8715 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
8716 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
8717 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
8718 assert(!IsPacked);
8719 Inst.addOperand(Inst.getOperand(0));
8720 }
8721
8722 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8723 // instruction, and then figure out where to actually put the modifiers
8724
8725 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8726 if (OpSelIdx != -1) {
8727 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8728 }
8729
8730 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8731 if (OpSelHiIdx != -1) {
8732 int DefaultVal = IsPacked ? -1 : 0;
8733 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8734 DefaultVal);
8735 }
8736
8737 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8738 if (NegLoIdx != -1)
8739 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8740
8741 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8742 if (NegHiIdx != -1)
8743 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8744
8745 const int Ops[] = { AMDGPU::OpName::src0,
8746 AMDGPU::OpName::src1,
8747 AMDGPU::OpName::src2 };
8748 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8749 AMDGPU::OpName::src1_modifiers,
8750 AMDGPU::OpName::src2_modifiers };
8751
8752 unsigned OpSel = 0;
8753 unsigned OpSelHi = 0;
8754 unsigned NegLo = 0;
8755 unsigned NegHi = 0;
8756
8757 if (OpSelIdx != -1)
8758 OpSel = Inst.getOperand(OpSelIdx).getImm();
8759
8760 if (OpSelHiIdx != -1)
8761 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8762
8763 if (NegLoIdx != -1)
8764 NegLo = Inst.getOperand(NegLoIdx).getImm();
8765
8766 if (NegHiIdx != -1)
8767 NegHi = Inst.getOperand(NegHiIdx).getImm();
8768
8769 for (int J = 0; J < 3; ++J) {
8770 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8771 if (OpIdx == -1)
8772 break;
8773
8774 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8775
8776 if (ModIdx == -1)
8777 continue;
8778
8779 uint32_t ModVal = 0;
8780
8781 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
8782 if (SrcOp.isReg() && getMRI()
8783 ->getRegClass(AMDGPU::VGPR_16RegClassID)
8784 .contains(SrcOp.getReg())) {
8785 bool VGPRSuffixIsHi = AMDGPU::isHi(SrcOp.getReg(), *getMRI());
8786 if (VGPRSuffixIsHi)
8787 ModVal |= SISrcMods::OP_SEL_0;
8788 } else {
8789 if ((OpSel & (1 << J)) != 0)
8790 ModVal |= SISrcMods::OP_SEL_0;
8791 }
8792
8793 if ((OpSelHi & (1 << J)) != 0)
8794 ModVal |= SISrcMods::OP_SEL_1;
8795
8796 if ((NegLo & (1 << J)) != 0)
8797 ModVal |= SISrcMods::NEG;
8798
8799 if ((NegHi & (1 << J)) != 0)
8800 ModVal |= SISrcMods::NEG_HI;
8801
8802 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8803 }
8804}
8805
8806void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8807 OptionalImmIndexMap OptIdx;
8808 cvtVOP3(Inst, Operands, OptIdx);
8809 cvtVOP3P(Inst, Operands, OptIdx);
8810}
8811
8813 unsigned i, unsigned Opc, unsigned OpName) {
8814 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
8815 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8816 else
8817 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
8818}
8819
8820void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
8821 unsigned Opc = Inst.getOpcode();
8822
8823 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
8824 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
8825 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
8826 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
8827 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
8828
8829 OptionalImmIndexMap OptIdx;
8830 for (unsigned i = 5; i < Operands.size(); ++i) {
8831 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8832 OptIdx[Op.getImmTy()] = i;
8833 }
8834
8835 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
8836 addOptionalImmOperand(Inst, Operands, OptIdx,
8837 AMDGPUOperand::ImmTyIndexKey8bit);
8838
8839 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
8840 addOptionalImmOperand(Inst, Operands, OptIdx,
8841 AMDGPUOperand::ImmTyIndexKey16bit);
8842
8843 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8844 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
8845
8846 cvtVOP3P(Inst, Operands, OptIdx);
8847}
8848
8849//===----------------------------------------------------------------------===//
8850// VOPD
8851//===----------------------------------------------------------------------===//
8852
8853ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8854 if (!hasVOPD(getSTI()))
8855 return ParseStatus::NoMatch;
8856
8857 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8858 SMLoc S = getLoc();
8859 lex();
8860 lex();
8861 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8862 SMLoc OpYLoc = getLoc();
8863 StringRef OpYName;
8864 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8865 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8866 return ParseStatus::Success;
8867 }
8868 return Error(OpYLoc, "expected a VOPDY instruction after ::");
8869 }
8870 return ParseStatus::NoMatch;
8871}
8872
8873// Create VOPD MCInst operands using parsed assembler operands.
8874void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8875 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8876 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8877 if (Op.isReg()) {
8878 Op.addRegOperands(Inst, 1);
8879 return;
8880 }
8881 if (Op.isImm()) {
8882 Op.addImmOperands(Inst, 1);
8883 return;
8884 }
8885 llvm_unreachable("Unhandled operand type in cvtVOPD");
8886 };
8887
8888 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8889
8890 // MCInst operands are ordered as follows:
8891 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8892
8893 for (auto CompIdx : VOPD::COMPONENTS) {
8894 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8895 }
8896
8897 for (auto CompIdx : VOPD::COMPONENTS) {
8898 const auto &CInfo = InstInfo[CompIdx];
8899 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8900 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8901 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8902 if (CInfo.hasSrc2Acc())
8903 addOp(CInfo.getIndexOfDstInParsedOperands());
8904 }
8905}
8906
8907//===----------------------------------------------------------------------===//
8908// dpp
8909//===----------------------------------------------------------------------===//
8910
8911bool AMDGPUOperand::isDPP8() const {
8912 return isImmTy(ImmTyDPP8);
8913}
8914
8915bool AMDGPUOperand::isDPPCtrl() const {
8916 using namespace AMDGPU::DPP;
8917
8918 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8919 if (result) {
8920 int64_t Imm = getImm();
8921 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8922 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8923 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8924 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8925 (Imm == DppCtrl::WAVE_SHL1) ||
8926 (Imm == DppCtrl::WAVE_ROL1) ||
8927 (Imm == DppCtrl::WAVE_SHR1) ||
8928 (Imm == DppCtrl::WAVE_ROR1) ||
8929 (Imm == DppCtrl::ROW_MIRROR) ||
8930 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8931 (Imm == DppCtrl::BCAST15) ||
8932 (Imm == DppCtrl::BCAST31) ||
8933 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8934 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8935 }
8936 return false;
8937}
8938
8939//===----------------------------------------------------------------------===//
8940// mAI
8941//===----------------------------------------------------------------------===//
8942
8943bool AMDGPUOperand::isBLGP() const {
8944 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8945}
8946
8947bool AMDGPUOperand::isS16Imm() const {
8948 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8949}
8950
8951bool AMDGPUOperand::isU16Imm() const {
8952 return isImmLiteral() && isUInt<16>(getImm());
8953}
8954
8955//===----------------------------------------------------------------------===//
8956// dim
8957//===----------------------------------------------------------------------===//
8958
8959bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8960 // We want to allow "dim:1D" etc.,
8961 // but the initial 1 is tokenized as an integer.
8962 std::string Token;
8963 if (isToken(AsmToken::Integer)) {
8964 SMLoc Loc = getToken().getEndLoc();
8965 Token = std::string(getTokenStr());
8966 lex();
8967 if (getLoc() != Loc)
8968 return false;
8969 }
8970
8971 StringRef Suffix;
8972 if (!parseId(Suffix))
8973 return false;
8974 Token += Suffix;
8975
8976 StringRef DimId = Token;
8977 if (DimId.starts_with("SQ_RSRC_IMG_"))
8978 DimId = DimId.drop_front(12);
8979
8981 if (!DimInfo)
8982 return false;
8983
8984 Encoding = DimInfo->Encoding;
8985 return true;
8986}
8987
8988ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8989 if (!isGFX10Plus())
8990 return ParseStatus::NoMatch;
8991
8992 SMLoc S = getLoc();
8993
8994 if (!trySkipId("dim", AsmToken::Colon))
8995 return ParseStatus::NoMatch;
8996
8997 unsigned Encoding;
8998 SMLoc Loc = getLoc();
8999 if (!parseDimId(Encoding))
9000 return Error(Loc, "invalid dim value");
9001
9002 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9003 AMDGPUOperand::ImmTyDim));
9004 return ParseStatus::Success;
9005}
9006
9007//===----------------------------------------------------------------------===//
9008// dpp
9009//===----------------------------------------------------------------------===//
9010
9011ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9012 SMLoc S = getLoc();
9013
9014 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9015 return ParseStatus::NoMatch;
9016
9017 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9018
9019 int64_t Sels[8];
9020
9021 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9022 return ParseStatus::Failure;
9023
9024 for (size_t i = 0; i < 8; ++i) {
9025 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9026 return ParseStatus::Failure;
9027
9028 SMLoc Loc = getLoc();
9029 if (getParser().parseAbsoluteExpression(Sels[i]))
9030 return ParseStatus::Failure;
9031 if (0 > Sels[i] || 7 < Sels[i])
9032 return Error(Loc, "expected a 3-bit value");
9033 }
9034
9035 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9036 return ParseStatus::Failure;
9037
9038 unsigned DPP8 = 0;
9039 for (size_t i = 0; i < 8; ++i)
9040 DPP8 |= (Sels[i] << (i * 3));
9041
9042 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9043 return ParseStatus::Success;
9044}
9045
9046bool
9047AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9048 const OperandVector &Operands) {
9049 if (Ctrl == "row_newbcast")
9050 return isGFX90A();
9051
9052 if (Ctrl == "row_share" ||
9053 Ctrl == "row_xmask")
9054 return isGFX10Plus();
9055
9056 if (Ctrl == "wave_shl" ||
9057 Ctrl == "wave_shr" ||
9058 Ctrl == "wave_rol" ||
9059 Ctrl == "wave_ror" ||
9060 Ctrl == "row_bcast")
9061 return isVI() || isGFX9();
9062
9063 return Ctrl == "row_mirror" ||
9064 Ctrl == "row_half_mirror" ||
9065 Ctrl == "quad_perm" ||
9066 Ctrl == "row_shl" ||
9067 Ctrl == "row_shr" ||
9068 Ctrl == "row_ror";
9069}
9070
9071int64_t
9072AMDGPUAsmParser::parseDPPCtrlPerm() {
9073 // quad_perm:[%d,%d,%d,%d]
9074
9075 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9076 return -1;
9077
9078 int64_t Val = 0;
9079 for (int i = 0; i < 4; ++i) {
9080 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9081 return -1;
9082
9083 int64_t Temp;
9084 SMLoc Loc = getLoc();
9085 if (getParser().parseAbsoluteExpression(Temp))
9086 return -1;
9087 if (Temp < 0 || Temp > 3) {
9088 Error(Loc, "expected a 2-bit value");
9089 return -1;
9090 }
9091
9092 Val += (Temp << i * 2);
9093 }
9094
9095 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9096 return -1;
9097
9098 return Val;
9099}
9100
9101int64_t
9102AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9103 using namespace AMDGPU::DPP;
9104
9105 // sel:%d
9106
9107 int64_t Val;
9108 SMLoc Loc = getLoc();
9109
9110 if (getParser().parseAbsoluteExpression(Val))
9111 return -1;
9112
9113 struct DppCtrlCheck {
9114 int64_t Ctrl;
9115 int Lo;
9116 int Hi;
9117 };
9118
9119 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9120 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9121 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9122 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9123 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9124 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9125 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9126 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9127 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9128 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9129 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9130 .Default({-1, 0, 0});
9131
9132 bool Valid;
9133 if (Check.Ctrl == -1) {
9134 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
9135 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9136 } else {
9137 Valid = Check.Lo <= Val && Val <= Check.Hi;
9138 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
9139 }
9140
9141 if (!Valid) {
9142 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
9143 return -1;
9144 }
9145
9146 return Val;
9147}
9148
9149ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
9150 using namespace AMDGPU::DPP;
9151
9152 if (!isToken(AsmToken::Identifier) ||
9153 !isSupportedDPPCtrl(getTokenStr(), Operands))
9154 return ParseStatus::NoMatch;
9155
9156 SMLoc S = getLoc();
9157 int64_t Val = -1;
9159
9160 parseId(Ctrl);
9161
9162 if (Ctrl == "row_mirror") {
9163 Val = DppCtrl::ROW_MIRROR;
9164 } else if (Ctrl == "row_half_mirror") {
9165 Val = DppCtrl::ROW_HALF_MIRROR;
9166 } else {
9167 if (skipToken(AsmToken::Colon, "expected a colon")) {
9168 if (Ctrl == "quad_perm") {
9169 Val = parseDPPCtrlPerm();
9170 } else {
9171 Val = parseDPPCtrlSel(Ctrl);
9172 }
9173 }
9174 }
9175
9176 if (Val == -1)
9177 return ParseStatus::Failure;
9178
9179 Operands.push_back(
9180 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9181 return ParseStatus::Success;
9182}
9183
9184void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
9185 bool IsDPP8) {
9186 OptionalImmIndexMap OptionalIdx;
9187 unsigned Opc = Inst.getOpcode();
9188 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9189
9190 // MAC instructions are special because they have 'old'
9191 // operand which is not tied to dst (but assumed to be).
9192 // They also have dummy unused src2_modifiers.
9193 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
9194 int Src2ModIdx =
9195 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
9196 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9197 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
9198
9199 unsigned I = 1;
9200 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9201 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9202 }
9203
9204 int Fi = 0;
9205 for (unsigned E = Operands.size(); I != E; ++I) {
9206
9207 if (IsMAC) {
9208 int NumOperands = Inst.getNumOperands();
9209 if (OldIdx == NumOperands) {
9210 // Handle old operand
9211 constexpr int DST_IDX = 0;
9212 Inst.addOperand(Inst.getOperand(DST_IDX));
9213 } else if (Src2ModIdx == NumOperands) {
9214 // Add unused dummy src2_modifiers
9216 }
9217 }
9218
9219 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9220 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
9221 Inst.addOperand(Inst.getOperand(0));
9222 }
9223
9224 bool IsVOP3CvtSrDpp =
9225 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9226 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9227 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9228 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9229 if (IsVOP3CvtSrDpp) {
9230 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
9233 }
9234 }
9235
9236 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9238 if (TiedTo != -1) {
9239 assert((unsigned)TiedTo < Inst.getNumOperands());
9240 // handle tied old or src2 for MAC instructions
9241 Inst.addOperand(Inst.getOperand(TiedTo));
9242 }
9243 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9244 // Add the register arguments
9245 if (IsDPP8 && Op.isDppFI()) {
9246 Fi = Op.getImm();
9247 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9248 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9249 } else if (Op.isReg()) {
9250 Op.addRegOperands(Inst, 1);
9251 } else if (Op.isImm() &&
9252 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
9253 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
9254 Op.addImmOperands(Inst, 1);
9255 } else if (Op.isImm()) {
9256 OptionalIdx[Op.getImmTy()] = I;
9257 } else {
9258 llvm_unreachable("unhandled operand type");
9259 }
9260 }
9261
9262 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel))
9263 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9264 AMDGPUOperand::ImmTyByteSel);
9265
9266 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9267 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9268 AMDGPUOperand::ImmTyClamp);
9269
9270 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9271 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
9272
9273 if (Desc.TSFlags & SIInstrFlags::VOP3P)
9274 cvtVOP3P(Inst, Operands, OptionalIdx);
9275 else if (Desc.TSFlags & SIInstrFlags::VOP3)
9276 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
9277 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9278 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9279 }
9280
9281 if (IsDPP8) {
9282 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
9283 using namespace llvm::AMDGPU::DPP;
9284 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9285 } else {
9286 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
9287 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9288 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9289 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9290
9291 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
9292 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9293 AMDGPUOperand::ImmTyDppFI);
9294 }
9295}
9296
9297void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
9298 OptionalImmIndexMap OptionalIdx;
9299
9300 unsigned I = 1;
9301 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9302 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9303 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9304 }
9305
9306 int Fi = 0;
9307 for (unsigned E = Operands.size(); I != E; ++I) {
9308 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9310 if (TiedTo != -1) {
9311 assert((unsigned)TiedTo < Inst.getNumOperands());
9312 // handle tied old or src2 for MAC instructions
9313 Inst.addOperand(Inst.getOperand(TiedTo));
9314 }
9315 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9316 // Add the register arguments
9317 if (Op.isReg() && validateVccOperand(Op.getReg())) {
9318 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
9319 // Skip it.
9320 continue;
9321 }
9322
9323 if (IsDPP8) {
9324 if (Op.isDPP8()) {
9325 Op.addImmOperands(Inst, 1);
9326 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9327 Op.addRegWithFPInputModsOperands(Inst, 2);
9328 } else if (Op.isDppFI()) {
9329 Fi = Op.getImm();
9330 } else if (Op.isReg()) {
9331 Op.addRegOperands(Inst, 1);
9332 } else {
9333 llvm_unreachable("Invalid operand type");
9334 }
9335 } else {
9337 Op.addRegWithFPInputModsOperands(Inst, 2);
9338 } else if (Op.isReg()) {
9339 Op.addRegOperands(Inst, 1);
9340 } else if (Op.isDPPCtrl()) {
9341 Op.addImmOperands(Inst, 1);
9342 } else if (Op.isImm()) {
9343 // Handle optional arguments
9344 OptionalIdx[Op.getImmTy()] = I;
9345 } else {
9346 llvm_unreachable("Invalid operand type");
9347 }
9348 }
9349 }
9350
9351 if (IsDPP8) {
9352 using namespace llvm::AMDGPU::DPP;
9353 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9354 } else {
9355 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9356 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9357 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9358 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
9359 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9360 AMDGPUOperand::ImmTyDppFI);
9361 }
9362 }
9363}
9364
9365//===----------------------------------------------------------------------===//
9366// sdwa
9367//===----------------------------------------------------------------------===//
9368
9369ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9370 StringRef Prefix,
9371 AMDGPUOperand::ImmTy Type) {
9372 using namespace llvm::AMDGPU::SDWA;
9373
9374 SMLoc S = getLoc();
9376
9377 SMLoc StringLoc;
9378 ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
9379 if (!Res.isSuccess())
9380 return Res;
9381
9382 int64_t Int;
9384 .Case("BYTE_0", SdwaSel::BYTE_0)
9385 .Case("BYTE_1", SdwaSel::BYTE_1)
9386 .Case("BYTE_2", SdwaSel::BYTE_2)
9387 .Case("BYTE_3", SdwaSel::BYTE_3)
9388 .Case("WORD_0", SdwaSel::WORD_0)
9389 .Case("WORD_1", SdwaSel::WORD_1)
9390 .Case("DWORD", SdwaSel::DWORD)
9391 .Default(0xffffffff);
9392
9393 if (Int == 0xffffffff)
9394 return Error(StringLoc, "invalid " + Twine(Prefix) + " value");
9395
9396 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
9397 return ParseStatus::Success;
9398}
9399
9400ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9401 using namespace llvm::AMDGPU::SDWA;
9402
9403 SMLoc S = getLoc();
9405
9406 SMLoc StringLoc;
9407 ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc);
9408 if (!Res.isSuccess())
9409 return Res;
9410
9411 int64_t Int;
9413 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
9414 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
9415 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
9416 .Default(0xffffffff);
9417
9418 if (Int == 0xffffffff)
9419 return Error(StringLoc, "invalid dst_unused value");
9420
9421 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused));
9422 return ParseStatus::Success;
9423}
9424
9425void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9426 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
9427}
9428
9429void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9430 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
9431}
9432
9433void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9434 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
9435}
9436
9437void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9438 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
9439}
9440
9441void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9442 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9443}
9444
9445void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9446 uint64_t BasicInstType,
9447 bool SkipDstVcc,
9448 bool SkipSrcVcc) {
9449 using namespace llvm::AMDGPU::SDWA;
9450
9451 OptionalImmIndexMap OptionalIdx;
9452 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9453 bool SkippedVcc = false;
9454
9455 unsigned I = 1;
9456 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9457 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9458 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9459 }
9460
9461 for (unsigned E = Operands.size(); I != E; ++I) {
9462 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9463 if (SkipVcc && !SkippedVcc && Op.isReg() &&
9464 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9465 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9466 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9467 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9468 // Skip VCC only if we didn't skip it on previous iteration.
9469 // Note that src0 and src1 occupy 2 slots each because of modifiers.
9470 if (BasicInstType == SIInstrFlags::VOP2 &&
9471 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9472 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9473 SkippedVcc = true;
9474 continue;
9475 }
9476 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
9477 SkippedVcc = true;
9478 continue;
9479 }
9480 }
9482 Op.addRegOrImmWithInputModsOperands(Inst, 2);
9483 } else if (Op.isImm()) {
9484 // Handle optional arguments
9485 OptionalIdx[Op.getImmTy()] = I;
9486 } else {
9487 llvm_unreachable("Invalid operand type");
9488 }
9489 SkippedVcc = false;
9490 }
9491
9492 const unsigned Opc = Inst.getOpcode();
9493 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9494 Opc != AMDGPU::V_NOP_sdwa_vi) {
9495 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9496 switch (BasicInstType) {
9497 case SIInstrFlags::VOP1:
9498 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9499 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9500 AMDGPUOperand::ImmTyClamp, 0);
9501
9502 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9503 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9504 AMDGPUOperand::ImmTyOModSI, 0);
9505
9506 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
9507 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9508 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9509
9510 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
9511 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9512 AMDGPUOperand::ImmTySDWADstUnused,
9513 DstUnused::UNUSED_PRESERVE);
9514
9515 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9516 break;
9517
9518 case SIInstrFlags::VOP2:
9519 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9520 AMDGPUOperand::ImmTyClamp, 0);
9521
9522 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
9523 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9524
9525 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9526 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
9527 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9528 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9529 break;
9530
9531 case SIInstrFlags::VOPC:
9532 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
9533 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9534 AMDGPUOperand::ImmTyClamp, 0);
9535 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9536 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9537 break;
9538
9539 default:
9540 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9541 }
9542 }
9543
9544 // special case v_mac_{f16, f32}:
9545 // it has src2 register operand that is tied to dst operand
9546 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9547 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9548 auto it = Inst.begin();
9549 std::advance(
9550 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9551 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9552 }
9553}
9554
9555/// Force static initialization.
9559}
9560
9561#define GET_REGISTER_MATCHER
9562#define GET_MATCHER_IMPLEMENTATION
9563#define GET_MNEMONIC_SPELL_CHECKER
9564#define GET_MNEMONIC_CHECKER
9565#include "AMDGPUGenAsmMatcher.inc"
9566
9567ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9568 unsigned MCK) {
9569 switch (MCK) {
9570 case MCK_addr64:
9571 return parseTokenOp("addr64", Operands);
9572 case MCK_done:
9573 return parseTokenOp("done", Operands);
9574 case MCK_idxen:
9575 return parseTokenOp("idxen", Operands);
9576 case MCK_lds:
9577 return parseTokenOp("lds", Operands);
9578 case MCK_offen:
9579 return parseTokenOp("offen", Operands);
9580 case MCK_off:
9581 return parseTokenOp("off", Operands);
9582 case MCK_row_95_en:
9583 return parseTokenOp("row_en", Operands);
9584 case MCK_gds:
9585 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
9586 case MCK_tfe:
9587 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
9588 }
9589 return tryCustomParseOperand(Operands, MCK);
9590}
9591
9592// This function should be defined after auto-generated include so that we have
9593// MatchClassKind enum defined
9594unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9595 unsigned Kind) {
9596 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9597 // But MatchInstructionImpl() expects to meet token and fails to validate
9598 // operand. This method checks if we are given immediate operand but expect to
9599 // get corresponding token.
9600 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9601 switch (Kind) {
9602 case MCK_addr64:
9603 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9604 case MCK_gds:
9605 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9606 case MCK_lds:
9607 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9608 case MCK_idxen:
9609 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9610 case MCK_offen:
9611 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9612 case MCK_tfe:
9613 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9614 case MCK_SSrc_b32:
9615 // When operands have expression values, they will return true for isToken,
9616 // because it is not possible to distinguish between a token and an
9617 // expression at parse time. MatchInstructionImpl() will always try to
9618 // match an operand as a token, when isToken returns true, and when the
9619 // name of the expression is not a valid token, the match will fail,
9620 // so we need to handle it here.
9621 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9622 case MCK_SSrc_f32:
9623 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9624 case MCK_SOPPBrTarget:
9625 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9626 case MCK_VReg32OrOff:
9627 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9628 case MCK_InterpSlot:
9629 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9630 case MCK_InterpAttr:
9631 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9632 case MCK_InterpAttrChan:
9633 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9634 case MCK_SReg_64:
9635 case MCK_SReg_64_XEXEC:
9636 // Null is defined as a 32-bit register but
9637 // it should also be enabled with 64-bit operands.
9638 // The following code enables it for SReg_64 operands
9639 // used as source and destination. Remaining source
9640 // operands are handled in isInlinableImm.
9641 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9642 default:
9643 return Match_InvalidOperand;
9644 }
9645}
9646
9647//===----------------------------------------------------------------------===//
9648// endpgm
9649//===----------------------------------------------------------------------===//
9650
9651ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9652 SMLoc S = getLoc();
9653 int64_t Imm = 0;
9654
9655 if (!parseExpr(Imm)) {
9656 // The operand is optional, if not present default to 0
9657 Imm = 0;
9658 }
9659
9660 if (!isUInt<16>(Imm))
9661 return Error(S, "expected a 16-bit value");
9662
9663 Operands.push_back(
9664 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9665 return ParseStatus::Success;
9666}
9667
9668bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9669
9670//===----------------------------------------------------------------------===//
9671// Split Barrier
9672//===----------------------------------------------------------------------===//
9673
9674bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static unsigned getSpecialRegForName(StringRef RegName)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, unsigned OpName)
static bool IsRevOpcode(const unsigned Opcode)
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_READNONE
Definition: Compiler.h:220
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
@ Default
Definition: DwarfDebug.cpp:87
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
Definition: InlineInfo.cpp:180
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
unsigned unsigned DefaultVal
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
static bool Enabled
Definition: Statistic.cpp:46
StringSet - A set-like wrapper for the StringMap.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
BinaryOperator * Mul
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
support::ulittle16_t & Hi
Definition: aarch32.cpp:205
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
Definition: AMDGPUMCExpr.h:69
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
Definition: AMDGPUMCExpr.h:83
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5317
Class for arbitrary precision integers.
Definition: APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmMacro.h:110
bool is(TokenKind K) const
Definition: MCAsmMacro.h:82
TokenKind getKind() const
Definition: MCAsmMacro.h:81
This class represents an Operation in the Expression.
Register getReg() const
Base class for user error types.
Definition: Error.h:355
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:481
Class representing an expression and its matching format.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:123
virtual MCStreamer & getStreamer()=0
Return the output streamer for the assembler.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:532
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:542
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:617
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:193
Context object for machine code objects.
Definition: MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:414
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:213
const MCSubtargetInfo * getSubtargetInfo() const
Definition: MCContext.h:418
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
SMLoc getLoc() const
Definition: MCInst.h:204
void setLoc(SMLoc loc)
Definition: MCInst.h:203
unsigned getOpcode() const
Definition: MCInst.h:198
iterator insert(iterator I, const MCOperand &Op)
Definition: MCInst.h:224
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
iterator begin()
Definition: MCInst.h:219
size_t size() const
Definition: MCInst.h:218
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:444
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
void setImm(int64_t Val)
Definition: MCInst.h:85
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
void setReg(unsigned Reg)
Set the register number.
Definition: MCInst.h:75
int64_t getImm() const
Definition: MCInst.h:80
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
bool isImm() const
Definition: MCInst.h:62
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
bool isReg() const
Definition: MCInst.h:61
bool isExpr() const
Definition: MCInst.h:65
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual bool isReg() const =0
isReg - Is this a register operand?
virtual bool isMem() const =0
isMem - Is this a memory operand?
virtual MCRegister getReg() const =0
virtual bool isToken() const =0
isToken - Is this a token operand?
virtual bool isImm() const =0
isImm - Is this an immediate operand?
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
Streaming machine code generation interface.
Definition: MCStreamer.h:213
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Definition: MCStreamer.h:309
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
void setVariableValue(const MCExpr *Value)
Definition: MCSymbol.cpp:47
MCTargetAsmParser - Generic interface to target specific assembly parsers.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
const MCInstrInfo & MII
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn't/can't handle effec...
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
ParseInstruction - Parse one assembly instruction.
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
MatchAndEmitInstruction - Recognize a series of operands of a parsed instruction as an actual MCInst ...
Target specific streamer interface.
Definition: MCStreamer.h:94
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Represents a location in source code.
Definition: SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
constexpr const char * getPointer() const
Definition: SMLoc.h:34
constexpr bool isValid() const
Definition: SMLoc.h:29
Represents a range in source code.
Definition: SMLoc.h:48
SMLoc Start
Definition: SMLoc.h:50
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:290
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Register getReg() const
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:838
StringMapEntry - This is used to represent one value that is inserted into a StringMap.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition: StringRef.h:640
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:250
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:594
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:262
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
bool contains(StringRef key) const
Check if the set contains the given key.
Definition: StringSet.h:55
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
Key
PAL metadata keys.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool isGFX9(const MCSubtargetInfo &STI)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition: SIDefines.h:201
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:211
@ OPERAND_REG_INLINE_C_V2INT32
Definition: SIDefines.h:227
@ OPERAND_REG_INLINE_C_FP64
Definition: SIDefines.h:223
@ OPERAND_REG_INLINE_C_BF16
Definition: SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2BF16
Definition: SIDefines.h:225
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:212
@ OPERAND_REG_IMM_BF16
Definition: SIDefines.h:205
@ OPERAND_REG_INLINE_AC_V2FP16
Definition: SIDefines.h:246
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
Definition: SIDefines.h:200
@ OPERAND_REG_IMM_V2BF16
Definition: SIDefines.h:210
@ OPERAND_REG_IMM_BF16_DEFERRED
Definition: SIDefines.h:207
@ OPERAND_REG_IMM_FP16
Definition: SIDefines.h:206
@ OPERAND_REG_INLINE_C_INT64
Definition: SIDefines.h:219
@ OPERAND_REG_INLINE_AC_BF16
Definition: SIDefines.h:240
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition: SIDefines.h:217
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
Definition: SIDefines.h:238
@ OPERAND_REG_IMM_FP64
Definition: SIDefines.h:204
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:226
@ OPERAND_REG_INLINE_AC_V2INT16
Definition: SIDefines.h:244
@ OPERAND_REG_INLINE_AC_FP16
Definition: SIDefines.h:241
@ OPERAND_REG_INLINE_AC_INT32
Definition: SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition: SIDefines.h:242
@ OPERAND_REG_INLINE_AC_V2BF16
Definition: SIDefines.h:245
@ OPERAND_REG_IMM_V2INT32
Definition: SIDefines.h:213
@ OPERAND_REG_IMM_FP32
Definition: SIDefines.h:203
@ OPERAND_INPUT_MODS
Definition: SIDefines.h:251
@ OPERAND_REG_INLINE_C_FP32
Definition: SIDefines.h:222
@ OPERAND_REG_INLINE_C_INT32
Definition: SIDefines.h:218
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:224
@ OPERAND_REG_IMM_V2FP32
Definition: SIDefines.h:214
@ OPERAND_REG_INLINE_AC_FP64
Definition: SIDefines.h:243
@ OPERAND_REG_INLINE_C_FP16
Definition: SIDefines.h:221
@ OPERAND_REG_IMM_INT16
Definition: SIDefines.h:202
@ OPERAND_REG_INLINE_C_V2FP32
Definition: SIDefines.h:228
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition: SIDefines.h:231
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isVI(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1344
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
const uint64_t Version
Definition: InstrProf.h:1107
@ OPERAND_IMMEDIATE
Definition: MCInstrDesc.h:60
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Reg
All possible values of the reg field in the ModR/M byte.
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
Format
The format used for serializing/deserializing remarks.
Definition: RemarkFormat.h:25
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition: Error.h:1099
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition: Alignment.h:217
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:255
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
void PrintError(const Twine &Msg)
Definition: Error.cpp:101
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
Target & getTheR600Target()
The target for R600 GPUs.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:154
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:193
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:159
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:403
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:260
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
unsigned M0(unsigned Val)
Definition: VE.h:375
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
#define N
RegisterKind Kind
StringLiteral Name
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
Definition: TargetParser.h:127
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
Represents the counter values to wait for in an s_waitcnt instruction.
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:276
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:274
static const fltSemantics & BFloat() LLVM_READNONE
Definition: APFloat.cpp:275
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:266
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...