LLVM 19.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
14#include "SIDefines.h"
15#include "SIInstrInfo.h"
16#include "SIRegisterInfo.h"
21#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/StringSet.h"
24#include "llvm/ADT/Twine.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCContext.h"
29#include "llvm/MC/MCExpr.h"
30#include "llvm/MC/MCInst.h"
31#include "llvm/MC/MCInstrDesc.h"
36#include "llvm/MC/MCSymbol.h"
43#include <optional>
44
45using namespace llvm;
46using namespace llvm::AMDGPU;
47using namespace llvm::amdhsa;
48
49namespace {
50
51class AMDGPUAsmParser;
52
53enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
54
55//===----------------------------------------------------------------------===//
56// Operand
57//===----------------------------------------------------------------------===//
58
59class AMDGPUOperand : public MCParsedAsmOperand {
60 enum KindTy {
61 Token,
62 Immediate,
65 } Kind;
66
67 SMLoc StartLoc, EndLoc;
68 const AMDGPUAsmParser *AsmParser;
69
70public:
71 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
72 : Kind(Kind_), AsmParser(AsmParser_) {}
73
74 using Ptr = std::unique_ptr<AMDGPUOperand>;
75
76 struct Modifiers {
77 bool Abs = false;
78 bool Neg = false;
79 bool Sext = false;
80 bool Lit = false;
81
82 bool hasFPModifiers() const { return Abs || Neg; }
83 bool hasIntModifiers() const { return Sext; }
84 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
85
86 int64_t getFPModifiersOperand() const {
87 int64_t Operand = 0;
88 Operand |= Abs ? SISrcMods::ABS : 0u;
89 Operand |= Neg ? SISrcMods::NEG : 0u;
90 return Operand;
91 }
92
93 int64_t getIntModifiersOperand() const {
94 int64_t Operand = 0;
95 Operand |= Sext ? SISrcMods::SEXT : 0u;
96 return Operand;
97 }
98
99 int64_t getModifiersOperand() const {
100 assert(!(hasFPModifiers() && hasIntModifiers())
101 && "fp and int modifiers should not be used simultaneously");
102 if (hasFPModifiers()) {
103 return getFPModifiersOperand();
104 } else if (hasIntModifiers()) {
105 return getIntModifiersOperand();
106 } else {
107 return 0;
108 }
109 }
110
111 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
112 };
113
114 enum ImmTy {
115 ImmTyNone,
116 ImmTyGDS,
117 ImmTyLDS,
118 ImmTyOffen,
119 ImmTyIdxen,
120 ImmTyAddr64,
121 ImmTyOffset,
122 ImmTyInstOffset,
123 ImmTyOffset0,
124 ImmTyOffset1,
125 ImmTySMEMOffsetMod,
126 ImmTyCPol,
127 ImmTyTFE,
128 ImmTyD16,
129 ImmTyClampSI,
130 ImmTyOModSI,
131 ImmTySDWADstSel,
132 ImmTySDWASrc0Sel,
133 ImmTySDWASrc1Sel,
134 ImmTySDWADstUnused,
135 ImmTyDMask,
136 ImmTyDim,
137 ImmTyUNorm,
138 ImmTyDA,
139 ImmTyR128A16,
140 ImmTyA16,
141 ImmTyLWE,
142 ImmTyExpTgt,
143 ImmTyExpCompr,
144 ImmTyExpVM,
145 ImmTyFORMAT,
146 ImmTyHwreg,
147 ImmTyOff,
148 ImmTySendMsg,
149 ImmTyInterpSlot,
150 ImmTyInterpAttr,
151 ImmTyInterpAttrChan,
152 ImmTyOpSel,
153 ImmTyOpSelHi,
154 ImmTyNegLo,
155 ImmTyNegHi,
156 ImmTyIndexKey8bit,
157 ImmTyIndexKey16bit,
158 ImmTyDPP8,
159 ImmTyDppCtrl,
160 ImmTyDppRowMask,
161 ImmTyDppBankMask,
162 ImmTyDppBoundCtrl,
163 ImmTyDppFI,
164 ImmTySwizzle,
165 ImmTyGprIdxMode,
166 ImmTyHigh,
167 ImmTyBLGP,
168 ImmTyCBSZ,
169 ImmTyABID,
170 ImmTyEndpgm,
171 ImmTyWaitVDST,
172 ImmTyWaitEXP,
173 ImmTyWaitVAVDst,
174 ImmTyWaitVMVSrc,
175 ImmTyByteSel,
176 };
177
178 // Immediate operand kind.
179 // It helps to identify the location of an offending operand after an error.
180 // Note that regular literals and mandatory literals (KImm) must be handled
181 // differently. When looking for an offending operand, we should usually
182 // ignore mandatory literals because they are part of the instruction and
183 // cannot be changed. Report location of mandatory operands only for VOPD,
184 // when both OpX and OpY have a KImm and there are no other literals.
185 enum ImmKindTy {
186 ImmKindTyNone,
187 ImmKindTyLiteral,
188 ImmKindTyMandatoryLiteral,
189 ImmKindTyConst,
190 };
191
192private:
193 struct TokOp {
194 const char *Data;
195 unsigned Length;
196 };
197
198 struct ImmOp {
199 int64_t Val;
200 ImmTy Type;
201 bool IsFPImm;
202 mutable ImmKindTy Kind;
203 Modifiers Mods;
204 };
205
206 struct RegOp {
207 unsigned RegNo;
208 Modifiers Mods;
209 };
210
211 union {
212 TokOp Tok;
213 ImmOp Imm;
214 RegOp Reg;
215 const MCExpr *Expr;
216 };
217
218public:
219 bool isToken() const override { return Kind == Token; }
220
221 bool isSymbolRefExpr() const {
222 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
223 }
224
225 bool isImm() const override {
226 return Kind == Immediate;
227 }
228
229 void setImmKindNone() const {
230 assert(isImm());
231 Imm.Kind = ImmKindTyNone;
232 }
233
234 void setImmKindLiteral() const {
235 assert(isImm());
236 Imm.Kind = ImmKindTyLiteral;
237 }
238
239 void setImmKindMandatoryLiteral() const {
240 assert(isImm());
241 Imm.Kind = ImmKindTyMandatoryLiteral;
242 }
243
244 void setImmKindConst() const {
245 assert(isImm());
246 Imm.Kind = ImmKindTyConst;
247 }
248
249 bool IsImmKindLiteral() const {
250 return isImm() && Imm.Kind == ImmKindTyLiteral;
251 }
252
253 bool IsImmKindMandatoryLiteral() const {
254 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
255 }
256
257 bool isImmKindConst() const {
258 return isImm() && Imm.Kind == ImmKindTyConst;
259 }
260
261 bool isInlinableImm(MVT type) const;
262 bool isLiteralImm(MVT type) const;
263
264 bool isRegKind() const {
265 return Kind == Register;
266 }
267
268 bool isReg() const override {
269 return isRegKind() && !hasModifiers();
270 }
271
272 bool isRegOrInline(unsigned RCID, MVT type) const {
273 return isRegClass(RCID) || isInlinableImm(type);
274 }
275
276 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
277 return isRegOrInline(RCID, type) || isLiteralImm(type);
278 }
279
280 bool isRegOrImmWithInt16InputMods() const {
281 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
282 }
283
284 bool isRegOrImmWithIntT16InputMods() const {
285 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
286 }
287
288 bool isRegOrImmWithInt32InputMods() const {
289 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
290 }
291
292 bool isRegOrInlineImmWithInt16InputMods() const {
293 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
294 }
295
296 bool isRegOrInlineImmWithInt32InputMods() const {
297 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
298 }
299
300 bool isRegOrImmWithInt64InputMods() const {
301 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
302 }
303
304 bool isRegOrImmWithFP16InputMods() const {
305 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
306 }
307
308 bool isRegOrImmWithFPT16InputMods() const {
309 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
310 }
311
312 bool isRegOrImmWithFP32InputMods() const {
313 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
314 }
315
316 bool isRegOrImmWithFP64InputMods() const {
317 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
318 }
319
320 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
321 return isRegOrInline(
322 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
323 }
324
325 bool isRegOrInlineImmWithFP32InputMods() const {
326 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
327 }
328
329 bool isPackedFP16InputMods() const {
330 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
331 }
332
333 bool isVReg() const {
334 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
335 isRegClass(AMDGPU::VReg_64RegClassID) ||
336 isRegClass(AMDGPU::VReg_96RegClassID) ||
337 isRegClass(AMDGPU::VReg_128RegClassID) ||
338 isRegClass(AMDGPU::VReg_160RegClassID) ||
339 isRegClass(AMDGPU::VReg_192RegClassID) ||
340 isRegClass(AMDGPU::VReg_256RegClassID) ||
341 isRegClass(AMDGPU::VReg_512RegClassID) ||
342 isRegClass(AMDGPU::VReg_1024RegClassID);
343 }
344
345 bool isVReg32() const {
346 return isRegClass(AMDGPU::VGPR_32RegClassID);
347 }
348
349 bool isVReg32OrOff() const {
350 return isOff() || isVReg32();
351 }
352
353 bool isNull() const {
354 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
355 }
356
357 bool isVRegWithInputMods() const;
358 template <bool IsFake16> bool isT16VRegWithInputMods() const;
359
360 bool isSDWAOperand(MVT type) const;
361 bool isSDWAFP16Operand() const;
362 bool isSDWAFP32Operand() const;
363 bool isSDWAInt16Operand() const;
364 bool isSDWAInt32Operand() const;
365
366 bool isImmTy(ImmTy ImmT) const {
367 return isImm() && Imm.Type == ImmT;
368 }
369
370 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
371
372 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
373
374 bool isImmModifier() const {
375 return isImm() && Imm.Type != ImmTyNone;
376 }
377
378 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
379 bool isDMask() const { return isImmTy(ImmTyDMask); }
380 bool isDim() const { return isImmTy(ImmTyDim); }
381 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
382 bool isOff() const { return isImmTy(ImmTyOff); }
383 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
384 bool isOffen() const { return isImmTy(ImmTyOffen); }
385 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
386 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
387 bool isOffset() const { return isImmTy(ImmTyOffset); }
388 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
389 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
390 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
391 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
392 bool isGDS() const { return isImmTy(ImmTyGDS); }
393 bool isLDS() const { return isImmTy(ImmTyLDS); }
394 bool isCPol() const { return isImmTy(ImmTyCPol); }
395 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
396 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
397 bool isTFE() const { return isImmTy(ImmTyTFE); }
398 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
399 bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); }
400 bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); }
401 bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
402 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
403 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
404 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
405 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
406 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
407 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
408 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
409 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
410 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
411 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
412 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
413 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
414 bool isByteSel() const {
415 return isImmTy(ImmTyByteSel) && isUInt<2>(getImm());
416 }
417
418 bool isRegOrImm() const {
419 return isReg() || isImm();
420 }
421
422 bool isRegClass(unsigned RCID) const;
423
424 bool isInlineValue() const;
425
426 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
427 return isRegOrInline(RCID, type) && !hasModifiers();
428 }
429
430 bool isSCSrcB16() const {
431 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
432 }
433
434 bool isSCSrcV2B16() const {
435 return isSCSrcB16();
436 }
437
438 bool isSCSrc_b32() const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
440 }
441
442 bool isSCSrc_b64() const {
443 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
444 }
445
446 bool isBoolReg() const;
447
448 bool isSCSrcF16() const {
449 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
450 }
451
452 bool isSCSrcV2F16() const {
453 return isSCSrcF16();
454 }
455
456 bool isSCSrcF32() const {
457 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
458 }
459
460 bool isSCSrcF64() const {
461 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
462 }
463
464 bool isSSrc_b32() const {
465 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
466 }
467
468 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
469
470 bool isSSrcV2B16() const {
471 llvm_unreachable("cannot happen");
472 return isSSrc_b16();
473 }
474
475 bool isSSrc_b64() const {
476 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
477 // See isVSrc64().
478 return isSCSrc_b64() || isLiteralImm(MVT::i64);
479 }
480
481 bool isSSrc_f32() const {
482 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
483 }
484
485 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
486
487 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
488
489 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
490
491 bool isSSrcV2F16() const {
492 llvm_unreachable("cannot happen");
493 return isSSrc_f16();
494 }
495
496 bool isSSrcV2FP32() const {
497 llvm_unreachable("cannot happen");
498 return isSSrc_f32();
499 }
500
501 bool isSCSrcV2FP32() const {
502 llvm_unreachable("cannot happen");
503 return isSCSrcF32();
504 }
505
506 bool isSSrcV2INT32() const {
507 llvm_unreachable("cannot happen");
508 return isSSrc_b32();
509 }
510
511 bool isSCSrcV2INT32() const {
512 llvm_unreachable("cannot happen");
513 return isSCSrc_b32();
514 }
515
516 bool isSSrcOrLds_b32() const {
517 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
518 isLiteralImm(MVT::i32) || isExpr();
519 }
520
521 bool isVCSrc_b32() const {
522 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
523 }
524
525 bool isVCSrcB64() const {
526 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
527 }
528
529 bool isVCSrcTB16() const {
530 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
531 }
532
533 bool isVCSrcTB16_Lo128() const {
534 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
535 }
536
537 bool isVCSrcFake16B16_Lo128() const {
538 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
539 }
540
541 bool isVCSrc_b16() const {
542 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
543 }
544
545 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
546
547 bool isVCSrc_f32() const {
548 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
549 }
550
551 bool isVCSrcF64() const {
552 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
553 }
554
555 bool isVCSrcTBF16() const {
556 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
557 }
558
559 bool isVCSrcTF16() const {
560 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
561 }
562
563 bool isVCSrcTBF16_Lo128() const {
564 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
565 }
566
567 bool isVCSrcTF16_Lo128() const {
568 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
569 }
570
571 bool isVCSrcFake16BF16_Lo128() const {
572 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
573 }
574
575 bool isVCSrcFake16F16_Lo128() const {
576 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
577 }
578
579 bool isVCSrc_bf16() const {
580 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
581 }
582
583 bool isVCSrc_f16() const {
584 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
585 }
586
587 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
588
589 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
590
591 bool isVSrc_b32() const {
592 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
593 }
594
595 bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
596
597 bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
598
599 bool isVSrcT_b16_Lo128() const {
600 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
601 }
602
603 bool isVSrcFake16_b16_Lo128() const {
604 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
605 }
606
607 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
608
609 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
610
611 bool isVCSrcV2FP32() const {
612 return isVCSrcF64();
613 }
614
615 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
616
617 bool isVCSrcV2INT32() const {
618 return isVCSrcB64();
619 }
620
621 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
622
623 bool isVSrc_f32() const {
624 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
625 }
626
627 bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
628
629 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
630
631 bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
632
633 bool isVSrcT_bf16_Lo128() const {
634 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
635 }
636
637 bool isVSrcT_f16_Lo128() const {
638 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
639 }
640
641 bool isVSrcFake16_bf16_Lo128() const {
642 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
643 }
644
645 bool isVSrcFake16_f16_Lo128() const {
646 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
647 }
648
649 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
650
651 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
652
653 bool isVSrc_v2bf16() const {
654 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
655 }
656
657 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
658
659 bool isVISrcB32() const {
660 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
661 }
662
663 bool isVISrcB16() const {
664 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
665 }
666
667 bool isVISrcV2B16() const {
668 return isVISrcB16();
669 }
670
671 bool isVISrcF32() const {
672 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
673 }
674
675 bool isVISrcF16() const {
676 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
677 }
678
679 bool isVISrcV2F16() const {
680 return isVISrcF16() || isVISrcB32();
681 }
682
683 bool isVISrc_64_bf16() const {
684 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
685 }
686
687 bool isVISrc_64_f16() const {
688 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
689 }
690
691 bool isVISrc_64_b32() const {
692 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
693 }
694
695 bool isVISrc_64B64() const {
696 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
697 }
698
699 bool isVISrc_64_f64() const {
700 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
701 }
702
703 bool isVISrc_64V2FP32() const {
704 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
705 }
706
707 bool isVISrc_64V2INT32() const {
708 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
709 }
710
711 bool isVISrc_256_b32() const {
712 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
713 }
714
715 bool isVISrc_256_f32() const {
716 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
717 }
718
719 bool isVISrc_256B64() const {
720 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
721 }
722
723 bool isVISrc_256_f64() const {
724 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
725 }
726
727 bool isVISrc_128B16() const {
728 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
729 }
730
731 bool isVISrc_128V2B16() const {
732 return isVISrc_128B16();
733 }
734
735 bool isVISrc_128_b32() const {
736 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
737 }
738
739 bool isVISrc_128_f32() const {
740 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
741 }
742
743 bool isVISrc_256V2FP32() const {
744 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
745 }
746
747 bool isVISrc_256V2INT32() const {
748 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
749 }
750
751 bool isVISrc_512_b32() const {
752 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
753 }
754
755 bool isVISrc_512B16() const {
756 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
757 }
758
759 bool isVISrc_512V2B16() const {
760 return isVISrc_512B16();
761 }
762
763 bool isVISrc_512_f32() const {
764 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
765 }
766
767 bool isVISrc_512F16() const {
768 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
769 }
770
771 bool isVISrc_512V2F16() const {
772 return isVISrc_512F16() || isVISrc_512_b32();
773 }
774
775 bool isVISrc_1024_b32() const {
776 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
777 }
778
779 bool isVISrc_1024B16() const {
780 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
781 }
782
783 bool isVISrc_1024V2B16() const {
784 return isVISrc_1024B16();
785 }
786
787 bool isVISrc_1024_f32() const {
788 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
789 }
790
791 bool isVISrc_1024F16() const {
792 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
793 }
794
795 bool isVISrc_1024V2F16() const {
796 return isVISrc_1024F16() || isVISrc_1024_b32();
797 }
798
799 bool isAISrcB32() const {
800 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
801 }
802
803 bool isAISrcB16() const {
804 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
805 }
806
807 bool isAISrcV2B16() const {
808 return isAISrcB16();
809 }
810
811 bool isAISrcF32() const {
812 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
813 }
814
815 bool isAISrcF16() const {
816 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
817 }
818
819 bool isAISrcV2F16() const {
820 return isAISrcF16() || isAISrcB32();
821 }
822
823 bool isAISrc_64B64() const {
824 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
825 }
826
827 bool isAISrc_64_f64() const {
828 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
829 }
830
831 bool isAISrc_128_b32() const {
832 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
833 }
834
835 bool isAISrc_128B16() const {
836 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
837 }
838
839 bool isAISrc_128V2B16() const {
840 return isAISrc_128B16();
841 }
842
843 bool isAISrc_128_f32() const {
844 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
845 }
846
847 bool isAISrc_128F16() const {
848 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
849 }
850
851 bool isAISrc_128V2F16() const {
852 return isAISrc_128F16() || isAISrc_128_b32();
853 }
854
855 bool isVISrc_128_bf16() const {
856 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
857 }
858
859 bool isVISrc_128_f16() const {
860 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
861 }
862
863 bool isVISrc_128V2F16() const {
864 return isVISrc_128_f16() || isVISrc_128_b32();
865 }
866
867 bool isAISrc_256B64() const {
868 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
869 }
870
871 bool isAISrc_256_f64() const {
872 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
873 }
874
875 bool isAISrc_512_b32() const {
876 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
877 }
878
879 bool isAISrc_512B16() const {
880 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
881 }
882
883 bool isAISrc_512V2B16() const {
884 return isAISrc_512B16();
885 }
886
887 bool isAISrc_512_f32() const {
888 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
889 }
890
891 bool isAISrc_512F16() const {
892 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
893 }
894
895 bool isAISrc_512V2F16() const {
896 return isAISrc_512F16() || isAISrc_512_b32();
897 }
898
899 bool isAISrc_1024_b32() const {
900 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
901 }
902
903 bool isAISrc_1024B16() const {
904 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
905 }
906
907 bool isAISrc_1024V2B16() const {
908 return isAISrc_1024B16();
909 }
910
911 bool isAISrc_1024_f32() const {
912 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
913 }
914
915 bool isAISrc_1024F16() const {
916 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
917 }
918
919 bool isAISrc_1024V2F16() const {
920 return isAISrc_1024F16() || isAISrc_1024_b32();
921 }
922
923 bool isKImmFP32() const {
924 return isLiteralImm(MVT::f32);
925 }
926
927 bool isKImmFP16() const {
928 return isLiteralImm(MVT::f16);
929 }
930
931 bool isMem() const override {
932 return false;
933 }
934
935 bool isExpr() const {
936 return Kind == Expression;
937 }
938
939 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
940
941 bool isSWaitCnt() const;
942 bool isDepCtr() const;
943 bool isSDelayALU() const;
944 bool isHwreg() const;
945 bool isSendMsg() const;
946 bool isSplitBarrier() const;
947 bool isSwizzle() const;
948 bool isSMRDOffset8() const;
949 bool isSMEMOffset() const;
950 bool isSMRDLiteralOffset() const;
951 bool isDPP8() const;
952 bool isDPPCtrl() const;
953 bool isBLGP() const;
954 bool isCBSZ() const;
955 bool isABID() const;
956 bool isGPRIdxMode() const;
957 bool isS16Imm() const;
958 bool isU16Imm() const;
959 bool isEndpgm() const;
960 bool isWaitVDST() const;
961 bool isWaitEXP() const;
962 bool isWaitVAVDst() const;
963 bool isWaitVMVSrc() const;
964
965 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
966 return std::bind(P, *this);
967 }
968
969 StringRef getToken() const {
970 assert(isToken());
971 return StringRef(Tok.Data, Tok.Length);
972 }
973
974 int64_t getImm() const {
975 assert(isImm());
976 return Imm.Val;
977 }
978
979 void setImm(int64_t Val) {
980 assert(isImm());
981 Imm.Val = Val;
982 }
983
984 ImmTy getImmTy() const {
985 assert(isImm());
986 return Imm.Type;
987 }
988
989 MCRegister getReg() const override {
990 assert(isRegKind());
991 return Reg.RegNo;
992 }
993
994 SMLoc getStartLoc() const override {
995 return StartLoc;
996 }
997
998 SMLoc getEndLoc() const override {
999 return EndLoc;
1000 }
1001
1002 SMRange getLocRange() const {
1003 return SMRange(StartLoc, EndLoc);
1004 }
1005
1006 Modifiers getModifiers() const {
1007 assert(isRegKind() || isImmTy(ImmTyNone));
1008 return isRegKind() ? Reg.Mods : Imm.Mods;
1009 }
1010
1011 void setModifiers(Modifiers Mods) {
1012 assert(isRegKind() || isImmTy(ImmTyNone));
1013 if (isRegKind())
1014 Reg.Mods = Mods;
1015 else
1016 Imm.Mods = Mods;
1017 }
1018
1019 bool hasModifiers() const {
1020 return getModifiers().hasModifiers();
1021 }
1022
1023 bool hasFPModifiers() const {
1024 return getModifiers().hasFPModifiers();
1025 }
1026
1027 bool hasIntModifiers() const {
1028 return getModifiers().hasIntModifiers();
1029 }
1030
1031 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1032
1033 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1034
1035 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1036
1037 void addRegOperands(MCInst &Inst, unsigned N) const;
1038
1039 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1040 if (isRegKind())
1041 addRegOperands(Inst, N);
1042 else
1043 addImmOperands(Inst, N);
1044 }
1045
1046 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1047 Modifiers Mods = getModifiers();
1048 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1049 if (isRegKind()) {
1050 addRegOperands(Inst, N);
1051 } else {
1052 addImmOperands(Inst, N, false);
1053 }
1054 }
1055
1056 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1057 assert(!hasIntModifiers());
1058 addRegOrImmWithInputModsOperands(Inst, N);
1059 }
1060
1061 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1062 assert(!hasFPModifiers());
1063 addRegOrImmWithInputModsOperands(Inst, N);
1064 }
1065
1066 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1067 Modifiers Mods = getModifiers();
1068 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1069 assert(isRegKind());
1070 addRegOperands(Inst, N);
1071 }
1072
1073 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1074 assert(!hasIntModifiers());
1075 addRegWithInputModsOperands(Inst, N);
1076 }
1077
1078 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1079 assert(!hasFPModifiers());
1080 addRegWithInputModsOperands(Inst, N);
1081 }
1082
1083 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1084 // clang-format off
1085 switch (Type) {
1086 case ImmTyNone: OS << "None"; break;
1087 case ImmTyGDS: OS << "GDS"; break;
1088 case ImmTyLDS: OS << "LDS"; break;
1089 case ImmTyOffen: OS << "Offen"; break;
1090 case ImmTyIdxen: OS << "Idxen"; break;
1091 case ImmTyAddr64: OS << "Addr64"; break;
1092 case ImmTyOffset: OS << "Offset"; break;
1093 case ImmTyInstOffset: OS << "InstOffset"; break;
1094 case ImmTyOffset0: OS << "Offset0"; break;
1095 case ImmTyOffset1: OS << "Offset1"; break;
1096 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1097 case ImmTyCPol: OS << "CPol"; break;
1098 case ImmTyIndexKey8bit: OS << "index_key"; break;
1099 case ImmTyIndexKey16bit: OS << "index_key"; break;
1100 case ImmTyTFE: OS << "TFE"; break;
1101 case ImmTyD16: OS << "D16"; break;
1102 case ImmTyFORMAT: OS << "FORMAT"; break;
1103 case ImmTyClampSI: OS << "ClampSI"; break;
1104 case ImmTyOModSI: OS << "OModSI"; break;
1105 case ImmTyDPP8: OS << "DPP8"; break;
1106 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1107 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1108 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1109 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1110 case ImmTyDppFI: OS << "DppFI"; break;
1111 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1112 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1113 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1114 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1115 case ImmTyDMask: OS << "DMask"; break;
1116 case ImmTyDim: OS << "Dim"; break;
1117 case ImmTyUNorm: OS << "UNorm"; break;
1118 case ImmTyDA: OS << "DA"; break;
1119 case ImmTyR128A16: OS << "R128A16"; break;
1120 case ImmTyA16: OS << "A16"; break;
1121 case ImmTyLWE: OS << "LWE"; break;
1122 case ImmTyOff: OS << "Off"; break;
1123 case ImmTyExpTgt: OS << "ExpTgt"; break;
1124 case ImmTyExpCompr: OS << "ExpCompr"; break;
1125 case ImmTyExpVM: OS << "ExpVM"; break;
1126 case ImmTyHwreg: OS << "Hwreg"; break;
1127 case ImmTySendMsg: OS << "SendMsg"; break;
1128 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1129 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1130 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1131 case ImmTyOpSel: OS << "OpSel"; break;
1132 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1133 case ImmTyNegLo: OS << "NegLo"; break;
1134 case ImmTyNegHi: OS << "NegHi"; break;
1135 case ImmTySwizzle: OS << "Swizzle"; break;
1136 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1137 case ImmTyHigh: OS << "High"; break;
1138 case ImmTyBLGP: OS << "BLGP"; break;
1139 case ImmTyCBSZ: OS << "CBSZ"; break;
1140 case ImmTyABID: OS << "ABID"; break;
1141 case ImmTyEndpgm: OS << "Endpgm"; break;
1142 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1143 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1144 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1145 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1146 case ImmTyByteSel: OS << "ByteSel" ; break;
1147 }
1148 // clang-format on
1149 }
1150
1151 void print(raw_ostream &OS) const override {
1152 switch (Kind) {
1153 case Register:
1154 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1155 break;
1156 case Immediate:
1157 OS << '<' << getImm();
1158 if (getImmTy() != ImmTyNone) {
1159 OS << " type: "; printImmTy(OS, getImmTy());
1160 }
1161 OS << " mods: " << Imm.Mods << '>';
1162 break;
1163 case Token:
1164 OS << '\'' << getToken() << '\'';
1165 break;
1166 case Expression:
1167 OS << "<expr " << *Expr << '>';
1168 break;
1169 }
1170 }
1171
1172 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1173 int64_t Val, SMLoc Loc,
1174 ImmTy Type = ImmTyNone,
1175 bool IsFPImm = false) {
1176 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1177 Op->Imm.Val = Val;
1178 Op->Imm.IsFPImm = IsFPImm;
1179 Op->Imm.Kind = ImmKindTyNone;
1180 Op->Imm.Type = Type;
1181 Op->Imm.Mods = Modifiers();
1182 Op->StartLoc = Loc;
1183 Op->EndLoc = Loc;
1184 return Op;
1185 }
1186
1187 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1188 StringRef Str, SMLoc Loc,
1189 bool HasExplicitEncodingSize = true) {
1190 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1191 Res->Tok.Data = Str.data();
1192 Res->Tok.Length = Str.size();
1193 Res->StartLoc = Loc;
1194 Res->EndLoc = Loc;
1195 return Res;
1196 }
1197
1198 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1199 unsigned RegNo, SMLoc S,
1200 SMLoc E) {
1201 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1202 Op->Reg.RegNo = RegNo;
1203 Op->Reg.Mods = Modifiers();
1204 Op->StartLoc = S;
1205 Op->EndLoc = E;
1206 return Op;
1207 }
1208
1209 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1210 const class MCExpr *Expr, SMLoc S) {
1211 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1212 Op->Expr = Expr;
1213 Op->StartLoc = S;
1214 Op->EndLoc = S;
1215 return Op;
1216 }
1217};
1218
1219raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1220 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1221 return OS;
1222}
1223
1224//===----------------------------------------------------------------------===//
1225// AsmParser
1226//===----------------------------------------------------------------------===//
1227
1228// Holds info related to the current kernel, e.g. count of SGPRs used.
1229// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1230// .amdgpu_hsa_kernel or at EOF.
1231class KernelScopeInfo {
1232 int SgprIndexUnusedMin = -1;
1233 int VgprIndexUnusedMin = -1;
1234 int AgprIndexUnusedMin = -1;
1235 MCContext *Ctx = nullptr;
1236 MCSubtargetInfo const *MSTI = nullptr;
1237
1238 void usesSgprAt(int i) {
1239 if (i >= SgprIndexUnusedMin) {
1240 SgprIndexUnusedMin = ++i;
1241 if (Ctx) {
1242 MCSymbol* const Sym =
1243 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1244 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1245 }
1246 }
1247 }
1248
1249 void usesVgprAt(int i) {
1250 if (i >= VgprIndexUnusedMin) {
1251 VgprIndexUnusedMin = ++i;
1252 if (Ctx) {
1253 MCSymbol* const Sym =
1254 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1255 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1256 VgprIndexUnusedMin);
1257 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1258 }
1259 }
1260 }
1261
1262 void usesAgprAt(int i) {
1263 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1264 if (!hasMAIInsts(*MSTI))
1265 return;
1266
1267 if (i >= AgprIndexUnusedMin) {
1268 AgprIndexUnusedMin = ++i;
1269 if (Ctx) {
1270 MCSymbol* const Sym =
1271 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1272 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1273
1274 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1275 MCSymbol* const vSym =
1276 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1277 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1278 VgprIndexUnusedMin);
1279 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1280 }
1281 }
1282 }
1283
1284public:
1285 KernelScopeInfo() = default;
1286
1287 void initialize(MCContext &Context) {
1288 Ctx = &Context;
1289 MSTI = Ctx->getSubtargetInfo();
1290
1291 usesSgprAt(SgprIndexUnusedMin = -1);
1292 usesVgprAt(VgprIndexUnusedMin = -1);
1293 if (hasMAIInsts(*MSTI)) {
1294 usesAgprAt(AgprIndexUnusedMin = -1);
1295 }
1296 }
1297
1298 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1299 unsigned RegWidth) {
1300 switch (RegKind) {
1301 case IS_SGPR:
1302 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1303 break;
1304 case IS_AGPR:
1305 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1306 break;
1307 case IS_VGPR:
1308 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1309 break;
1310 default:
1311 break;
1312 }
1313 }
1314};
1315
1316class AMDGPUAsmParser : public MCTargetAsmParser {
1317 MCAsmParser &Parser;
1318
1319 unsigned ForcedEncodingSize = 0;
1320 bool ForcedDPP = false;
1321 bool ForcedSDWA = false;
1322 KernelScopeInfo KernelScope;
1323
1324 /// @name Auto-generated Match Functions
1325 /// {
1326
1327#define GET_ASSEMBLER_HEADER
1328#include "AMDGPUGenAsmMatcher.inc"
1329
1330 /// }
1331
1332private:
1333 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1334 bool OutOfRangeError(SMRange Range);
1335 /// Calculate VGPR/SGPR blocks required for given target, reserved
1336 /// registers, and user-specified NextFreeXGPR values.
1337 ///
1338 /// \param Features [in] Target features, used for bug corrections.
1339 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1340 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1341 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1342 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1343 /// descriptor field, if valid.
1344 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1345 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1346 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1347 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1348 /// \param VGPRBlocks [out] Result VGPR block count.
1349 /// \param SGPRBlocks [out] Result SGPR block count.
1350 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1351 bool FlatScrUsed, bool XNACKUsed,
1352 std::optional<bool> EnableWavefrontSize32,
1353 unsigned NextFreeVGPR, SMRange VGPRRange,
1354 unsigned NextFreeSGPR, SMRange SGPRRange,
1355 unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1356 bool ParseDirectiveAMDGCNTarget();
1357 bool ParseDirectiveAMDHSACodeObjectVersion();
1358 bool ParseDirectiveAMDHSAKernel();
1359 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1360 bool ParseDirectiveAMDKernelCodeT();
1361 // TODO: Possibly make subtargetHasRegister const.
1362 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1363 bool ParseDirectiveAMDGPUHsaKernel();
1364
1365 bool ParseDirectiveISAVersion();
1366 bool ParseDirectiveHSAMetadata();
1367 bool ParseDirectivePALMetadataBegin();
1368 bool ParseDirectivePALMetadata();
1369 bool ParseDirectiveAMDGPULDS();
1370
1371 /// Common code to parse out a block of text (typically YAML) between start and
1372 /// end directives.
1373 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1374 const char *AssemblerDirectiveEnd,
1375 std::string &CollectString);
1376
1377 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1378 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1379 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1380 unsigned &RegNum, unsigned &RegWidth,
1381 bool RestoreOnFailure = false);
1382 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1383 unsigned &RegNum, unsigned &RegWidth,
1385 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1386 unsigned &RegWidth,
1388 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1389 unsigned &RegWidth,
1391 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1392 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1393 bool ParseRegRange(unsigned& Num, unsigned& Width);
1394 unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
1395 unsigned RegWidth, SMLoc Loc);
1396
1397 bool isRegister();
1398 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1399 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1400 void initializeGprCountSymbol(RegisterKind RegKind);
1401 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1402 unsigned RegWidth);
1403 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1404 bool IsAtomic);
1405
1406public:
1407 enum AMDGPUMatchResultTy {
1408 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1409 };
1410 enum OperandMode {
1411 OperandMode_Default,
1412 OperandMode_NSA,
1413 };
1414
1415 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1416
1417 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1418 const MCInstrInfo &MII,
1419 const MCTargetOptions &Options)
1420 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1422
1423 if (getFeatureBits().none()) {
1424 // Set default features.
1425 copySTI().ToggleFeature("southern-islands");
1426 }
1427
1428 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1429
1430 {
1431 // TODO: make those pre-defined variables read-only.
1432 // Currently there is none suitable machinery in the core llvm-mc for this.
1433 // MCSymbol::isRedefinable is intended for another purpose, and
1434 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1436 MCContext &Ctx = getContext();
1437 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1438 MCSymbol *Sym =
1439 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1440 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1441 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1442 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1443 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1444 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1445 } else {
1446 MCSymbol *Sym =
1447 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1448 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1449 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1450 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1451 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1452 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1453 }
1454 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1455 initializeGprCountSymbol(IS_VGPR);
1456 initializeGprCountSymbol(IS_SGPR);
1457 } else
1458 KernelScope.initialize(getContext());
1459 }
1460 }
1461
1462 bool hasMIMG_R128() const {
1463 return AMDGPU::hasMIMG_R128(getSTI());
1464 }
1465
1466 bool hasPackedD16() const {
1467 return AMDGPU::hasPackedD16(getSTI());
1468 }
1469
1470 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1471
1472 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1473
1474 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1475
1476 bool isSI() const {
1477 return AMDGPU::isSI(getSTI());
1478 }
1479
1480 bool isCI() const {
1481 return AMDGPU::isCI(getSTI());
1482 }
1483
1484 bool isVI() const {
1485 return AMDGPU::isVI(getSTI());
1486 }
1487
1488 bool isGFX9() const {
1489 return AMDGPU::isGFX9(getSTI());
1490 }
1491
1492 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1493 bool isGFX90A() const {
1494 return AMDGPU::isGFX90A(getSTI());
1495 }
1496
1497 bool isGFX940() const {
1498 return AMDGPU::isGFX940(getSTI());
1499 }
1500
1501 bool isGFX9Plus() const {
1502 return AMDGPU::isGFX9Plus(getSTI());
1503 }
1504
1505 bool isGFX10() const {
1506 return AMDGPU::isGFX10(getSTI());
1507 }
1508
1509 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1510
1511 bool isGFX11() const {
1512 return AMDGPU::isGFX11(getSTI());
1513 }
1514
1515 bool isGFX11Plus() const {
1516 return AMDGPU::isGFX11Plus(getSTI());
1517 }
1518
1519 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1520
1521 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1522
1523 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1524
1525 bool isGFX10_BEncoding() const {
1527 }
1528
1529 bool hasInv2PiInlineImm() const {
1530 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1531 }
1532
1533 bool hasFlatOffsets() const {
1534 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1535 }
1536
1537 bool hasArchitectedFlatScratch() const {
1538 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1539 }
1540
1541 bool hasSGPR102_SGPR103() const {
1542 return !isVI() && !isGFX9();
1543 }
1544
1545 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1546
1547 bool hasIntClamp() const {
1548 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1549 }
1550
1551 bool hasPartialNSAEncoding() const {
1552 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1553 }
1554
1555 unsigned getNSAMaxSize(bool HasSampler = false) const {
1556 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1557 }
1558
1559 unsigned getMaxNumUserSGPRs() const {
1561 }
1562
1563 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1564
1565 AMDGPUTargetStreamer &getTargetStreamer() {
1567 return static_cast<AMDGPUTargetStreamer &>(TS);
1568 }
1569
1570 const MCRegisterInfo *getMRI() const {
1571 // We need this const_cast because for some reason getContext() is not const
1572 // in MCAsmParser.
1573 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1574 }
1575
1576 const MCInstrInfo *getMII() const {
1577 return &MII;
1578 }
1579
1580 const FeatureBitset &getFeatureBits() const {
1581 return getSTI().getFeatureBits();
1582 }
1583
1584 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1585 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1586 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1587
1588 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1589 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1590 bool isForcedDPP() const { return ForcedDPP; }
1591 bool isForcedSDWA() const { return ForcedSDWA; }
1592 ArrayRef<unsigned> getMatchedVariants() const;
1593 StringRef getMatchedVariantName() const;
1594
1595 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1596 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1597 bool RestoreOnFailure);
1598 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1600 SMLoc &EndLoc) override;
1601 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1603 unsigned Kind) override;
1604 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1607 bool MatchingInlineAsm) override;
1608 bool ParseDirective(AsmToken DirectiveID) override;
1609 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1610 OperandMode Mode = OperandMode_Default);
1611 StringRef parseMnemonicSuffix(StringRef Name);
1613 SMLoc NameLoc, OperandVector &Operands) override;
1614 //bool ProcessInstruction(MCInst &Inst);
1615
1617
1618 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1619
1621 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1622 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1623 std::function<bool(int64_t &)> ConvertResult = nullptr);
1624
1625 ParseStatus parseOperandArrayWithPrefix(
1626 const char *Prefix, OperandVector &Operands,
1627 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1628 bool (*ConvertResult)(int64_t &) = nullptr);
1629
1631 parseNamedBit(StringRef Name, OperandVector &Operands,
1632 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1633 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1635 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1636 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1637 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1638 SMLoc &StringLoc);
1639
1640 bool isModifier();
1641 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1642 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1643 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1644 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1645 bool parseSP3NegModifier();
1646 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1647 bool HasLit = false);
1649 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1650 bool HasLit = false);
1651 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1652 bool AllowImm = true);
1653 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1654 bool AllowImm = true);
1655 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1656 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1657 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1658 ParseStatus tryParseIndexKey(OperandVector &Operands,
1659 AMDGPUOperand::ImmTy ImmTy);
1660 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1661 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1662
1663 ParseStatus parseDfmtNfmt(int64_t &Format);
1664 ParseStatus parseUfmt(int64_t &Format);
1665 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1666 int64_t &Format);
1667 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1668 int64_t &Format);
1669 ParseStatus parseFORMAT(OperandVector &Operands);
1670 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1671 ParseStatus parseNumericFormat(int64_t &Format);
1672 ParseStatus parseFlatOffset(OperandVector &Operands);
1673 ParseStatus parseR128A16(OperandVector &Operands);
1675 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1676 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1677
1678 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1679
1680 bool parseCnt(int64_t &IntVal);
1681 ParseStatus parseSWaitCnt(OperandVector &Operands);
1682
1683 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1684 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1685 ParseStatus parseDepCtr(OperandVector &Operands);
1686
1687 bool parseDelay(int64_t &Delay);
1688 ParseStatus parseSDelayALU(OperandVector &Operands);
1689
1690 ParseStatus parseHwreg(OperandVector &Operands);
1691
1692private:
1693 struct OperandInfoTy {
1694 SMLoc Loc;
1695 int64_t Val;
1696 bool IsSymbolic = false;
1697 bool IsDefined = false;
1698
1699 OperandInfoTy(int64_t Val) : Val(Val) {}
1700 };
1701
1702 struct StructuredOpField : OperandInfoTy {
1705 unsigned Width;
1706 bool IsDefined = false;
1707
1708 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1709 int64_t Default)
1710 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1711 virtual ~StructuredOpField() = default;
1712
1713 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1714 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1715 return false;
1716 }
1717
1718 virtual bool validate(AMDGPUAsmParser &Parser) const {
1719 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1720 return Error(Parser, "not supported on this GPU");
1721 if (!isUIntN(Width, Val))
1722 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1723 return true;
1724 }
1725 };
1726
1727 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1728 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1729
1730 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1731 bool validateSendMsg(const OperandInfoTy &Msg,
1732 const OperandInfoTy &Op,
1733 const OperandInfoTy &Stream);
1734
1735 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1736 OperandInfoTy &Width);
1737
1738 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1739 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1740 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1741
1742 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1743 const OperandVector &Operands) const;
1744 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1745 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1746 SMLoc getLitLoc(const OperandVector &Operands,
1747 bool SearchMandatoryLiterals = false) const;
1748 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1749 SMLoc getConstLoc(const OperandVector &Operands) const;
1750 SMLoc getInstLoc(const OperandVector &Operands) const;
1751
1752 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1753 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1754 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1755 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1756 bool validateSOPLiteral(const MCInst &Inst) const;
1757 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1758 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1759 const OperandVector &Operands);
1760 bool validateIntClampSupported(const MCInst &Inst);
1761 bool validateMIMGAtomicDMask(const MCInst &Inst);
1762 bool validateMIMGGatherDMask(const MCInst &Inst);
1763 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1764 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1765 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1766 bool validateMIMGD16(const MCInst &Inst);
1767 bool validateMIMGMSAA(const MCInst &Inst);
1768 bool validateOpSel(const MCInst &Inst);
1769 bool validateNeg(const MCInst &Inst, int OpName);
1770 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1771 bool validateVccOperand(unsigned Reg) const;
1772 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1773 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1774 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1775 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1776 bool validateAGPRLdSt(const MCInst &Inst) const;
1777 bool validateVGPRAlign(const MCInst &Inst) const;
1778 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1779 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1780 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1781 bool validateDivScale(const MCInst &Inst);
1782 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1783 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1784 const SMLoc &IDLoc);
1785 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1786 const unsigned CPol);
1787 bool validateExeczVcczOperands(const OperandVector &Operands);
1788 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1789 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1790 unsigned getConstantBusLimit(unsigned Opcode) const;
1791 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1792 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1793 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1794
1795 bool isSupportedMnemo(StringRef Mnemo,
1796 const FeatureBitset &FBS);
1797 bool isSupportedMnemo(StringRef Mnemo,
1798 const FeatureBitset &FBS,
1799 ArrayRef<unsigned> Variants);
1800 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1801
1802 bool isId(const StringRef Id) const;
1803 bool isId(const AsmToken &Token, const StringRef Id) const;
1804 bool isToken(const AsmToken::TokenKind Kind) const;
1805 StringRef getId() const;
1806 bool trySkipId(const StringRef Id);
1807 bool trySkipId(const StringRef Pref, const StringRef Id);
1808 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1809 bool trySkipToken(const AsmToken::TokenKind Kind);
1810 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1811 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1812 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1813
1814 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1815 AsmToken::TokenKind getTokenKind() const;
1816 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1817 bool parseExpr(OperandVector &Operands);
1818 StringRef getTokenStr() const;
1819 AsmToken peekToken(bool ShouldSkipSpace = true);
1820 AsmToken getToken() const;
1821 SMLoc getLoc() const;
1822 void lex();
1823
1824public:
1825 void onBeginOfFile() override;
1826 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1827
1828 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1829
1830 ParseStatus parseExpTgt(OperandVector &Operands);
1831 ParseStatus parseSendMsg(OperandVector &Operands);
1832 ParseStatus parseInterpSlot(OperandVector &Operands);
1833 ParseStatus parseInterpAttr(OperandVector &Operands);
1834 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1835 ParseStatus parseBoolReg(OperandVector &Operands);
1836
1837 bool parseSwizzleOperand(int64_t &Op,
1838 const unsigned MinVal,
1839 const unsigned MaxVal,
1840 const StringRef ErrMsg,
1841 SMLoc &Loc);
1842 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1843 const unsigned MinVal,
1844 const unsigned MaxVal,
1845 const StringRef ErrMsg);
1846 ParseStatus parseSwizzle(OperandVector &Operands);
1847 bool parseSwizzleOffset(int64_t &Imm);
1848 bool parseSwizzleMacro(int64_t &Imm);
1849 bool parseSwizzleQuadPerm(int64_t &Imm);
1850 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1851 bool parseSwizzleBroadcast(int64_t &Imm);
1852 bool parseSwizzleSwap(int64_t &Imm);
1853 bool parseSwizzleReverse(int64_t &Imm);
1854
1855 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1856 int64_t parseGPRIdxMacro();
1857
1858 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1859 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1860
1861 ParseStatus parseOModSI(OperandVector &Operands);
1862
1863 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1864 OptionalImmIndexMap &OptionalIdx);
1865 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1866 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1867 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1868 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1869
1870 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1871 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1872 OptionalImmIndexMap &OptionalIdx);
1873 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1874 OptionalImmIndexMap &OptionalIdx);
1875
1876 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1877 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1878
1879 bool parseDimId(unsigned &Encoding);
1881 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1883 ParseStatus parseDPPCtrl(OperandVector &Operands);
1884 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1885 int64_t parseDPPCtrlSel(StringRef Ctrl);
1886 int64_t parseDPPCtrlPerm();
1887 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1888 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1889 cvtDPP(Inst, Operands, true);
1890 }
1891 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1892 bool IsDPP8 = false);
1893 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1894 cvtVOP3DPP(Inst, Operands, true);
1895 }
1896
1897 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1898 AMDGPUOperand::ImmTy Type);
1899 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1900 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1901 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1902 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1903 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1904 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1905 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1906 uint64_t BasicInstType,
1907 bool SkipDstVcc = false,
1908 bool SkipSrcVcc = false);
1909
1910 ParseStatus parseEndpgm(OperandVector &Operands);
1911
1913};
1914
1915} // end anonymous namespace
1916
1917// May be called with integer type with equivalent bitwidth.
1918static const fltSemantics *getFltSemantics(unsigned Size) {
1919 switch (Size) {
1920 case 4:
1921 return &APFloat::IEEEsingle();
1922 case 8:
1923 return &APFloat::IEEEdouble();
1924 case 2:
1925 return &APFloat::IEEEhalf();
1926 default:
1927 llvm_unreachable("unsupported fp type");
1928 }
1929}
1930
1932 return getFltSemantics(VT.getSizeInBits() / 8);
1933}
1934
1936 switch (OperandType) {
1937 // When floating-point immediate is used as operand of type i16, the 32-bit
1938 // representation of the constant truncated to the 16 LSBs should be used.
1958 return &APFloat::IEEEsingle();
1964 return &APFloat::IEEEdouble();
1973 return &APFloat::IEEEhalf();
1981 return &APFloat::BFloat();
1982 default:
1983 llvm_unreachable("unsupported fp type");
1984 }
1985}
1986
1987//===----------------------------------------------------------------------===//
1988// Operand
1989//===----------------------------------------------------------------------===//
1990
1991static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1992 bool Lost;
1993
1994 // Convert literal to single precision
1996 APFloat::rmNearestTiesToEven,
1997 &Lost);
1998 // We allow precision lost but not overflow or underflow
1999 if (Status != APFloat::opOK &&
2000 Lost &&
2001 ((Status & APFloat::opOverflow) != 0 ||
2002 (Status & APFloat::opUnderflow) != 0)) {
2003 return false;
2004 }
2005
2006 return true;
2007}
2008
2009static bool isSafeTruncation(int64_t Val, unsigned Size) {
2010 return isUIntN(Size, Val) || isIntN(Size, Val);
2011}
2012
2013static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2014 if (VT.getScalarType() == MVT::i16)
2015 return isInlinableLiteral32(Val, HasInv2Pi);
2016
2017 if (VT.getScalarType() == MVT::f16)
2018 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2019
2020 assert(VT.getScalarType() == MVT::bf16);
2021
2022 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2023}
2024
2025bool AMDGPUOperand::isInlinableImm(MVT type) const {
2026
2027 // This is a hack to enable named inline values like
2028 // shared_base with both 32-bit and 64-bit operands.
2029 // Note that these values are defined as
2030 // 32-bit operands only.
2031 if (isInlineValue()) {
2032 return true;
2033 }
2034
2035 if (!isImmTy(ImmTyNone)) {
2036 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2037 return false;
2038 }
2039 // TODO: We should avoid using host float here. It would be better to
2040 // check the float bit values which is what a few other places do.
2041 // We've had bot failures before due to weird NaN support on mips hosts.
2042
2043 APInt Literal(64, Imm.Val);
2044
2045 if (Imm.IsFPImm) { // We got fp literal token
2046 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2048 AsmParser->hasInv2PiInlineImm());
2049 }
2050
2051 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2052 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2053 return false;
2054
2055 if (type.getScalarSizeInBits() == 16) {
2056 bool Lost = false;
2057 switch (type.getScalarType().SimpleTy) {
2058 default:
2059 llvm_unreachable("unknown 16-bit type");
2060 case MVT::bf16:
2061 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2062 &Lost);
2063 break;
2064 case MVT::f16:
2065 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2066 &Lost);
2067 break;
2068 case MVT::i16:
2069 FPLiteral.convert(APFloatBase::IEEEsingle(),
2070 APFloat::rmNearestTiesToEven, &Lost);
2071 break;
2072 }
2073 // We need to use 32-bit representation here because when a floating-point
2074 // inline constant is used as an i16 operand, its 32-bit representation
2075 // representation will be used. We will need the 32-bit value to check if
2076 // it is FP inline constant.
2077 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2078 return isInlineableLiteralOp16(ImmVal, type,
2079 AsmParser->hasInv2PiInlineImm());
2080 }
2081
2082 // Check if single precision literal is inlinable
2084 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2085 AsmParser->hasInv2PiInlineImm());
2086 }
2087
2088 // We got int literal token.
2089 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2091 AsmParser->hasInv2PiInlineImm());
2092 }
2093
2094 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2095 return false;
2096 }
2097
2098 if (type.getScalarSizeInBits() == 16) {
2100 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2101 type, AsmParser->hasInv2PiInlineImm());
2102 }
2103
2105 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2106 AsmParser->hasInv2PiInlineImm());
2107}
2108
2109bool AMDGPUOperand::isLiteralImm(MVT type) const {
2110 // Check that this immediate can be added as literal
2111 if (!isImmTy(ImmTyNone)) {
2112 return false;
2113 }
2114
2115 if (!Imm.IsFPImm) {
2116 // We got int literal token.
2117
2118 if (type == MVT::f64 && hasFPModifiers()) {
2119 // Cannot apply fp modifiers to int literals preserving the same semantics
2120 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2121 // disable these cases.
2122 return false;
2123 }
2124
2125 unsigned Size = type.getSizeInBits();
2126 if (Size == 64)
2127 Size = 32;
2128
2129 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2130 // types.
2131 return isSafeTruncation(Imm.Val, Size);
2132 }
2133
2134 // We got fp literal token
2135 if (type == MVT::f64) { // Expected 64-bit fp operand
2136 // We would set low 64-bits of literal to zeroes but we accept this literals
2137 return true;
2138 }
2139
2140 if (type == MVT::i64) { // Expected 64-bit int operand
2141 // We don't allow fp literals in 64-bit integer instructions. It is
2142 // unclear how we should encode them.
2143 return false;
2144 }
2145
2146 // We allow fp literals with f16x2 operands assuming that the specified
2147 // literal goes into the lower half and the upper half is zero. We also
2148 // require that the literal may be losslessly converted to f16.
2149 //
2150 // For i16x2 operands, we assume that the specified literal is encoded as a
2151 // single-precision float. This is pretty odd, but it matches SP3 and what
2152 // happens in hardware.
2153 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2154 : (type == MVT::v2i16) ? MVT::f32
2155 : (type == MVT::v2f32) ? MVT::f32
2156 : type;
2157
2158 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2159 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2160}
2161
2162bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2163 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2164}
2165
2166bool AMDGPUOperand::isVRegWithInputMods() const {
2167 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2168 // GFX90A allows DPP on 64-bit operands.
2169 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2170 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2171}
2172
2173template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2174 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2175 : AMDGPU::VGPR_16_Lo128RegClassID);
2176}
2177
2178bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2179 if (AsmParser->isVI())
2180 return isVReg32();
2181 else if (AsmParser->isGFX9Plus())
2182 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2183 else
2184 return false;
2185}
2186
2187bool AMDGPUOperand::isSDWAFP16Operand() const {
2188 return isSDWAOperand(MVT::f16);
2189}
2190
2191bool AMDGPUOperand::isSDWAFP32Operand() const {
2192 return isSDWAOperand(MVT::f32);
2193}
2194
2195bool AMDGPUOperand::isSDWAInt16Operand() const {
2196 return isSDWAOperand(MVT::i16);
2197}
2198
2199bool AMDGPUOperand::isSDWAInt32Operand() const {
2200 return isSDWAOperand(MVT::i32);
2201}
2202
2203bool AMDGPUOperand::isBoolReg() const {
2204 auto FB = AsmParser->getFeatureBits();
2205 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2206 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2207}
2208
2209uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2210{
2211 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2212 assert(Size == 2 || Size == 4 || Size == 8);
2213
2214 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2215
2216 if (Imm.Mods.Abs) {
2217 Val &= ~FpSignMask;
2218 }
2219 if (Imm.Mods.Neg) {
2220 Val ^= FpSignMask;
2221 }
2222
2223 return Val;
2224}
2225
2226void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2227 if (isExpr()) {
2229 return;
2230 }
2231
2232 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2233 Inst.getNumOperands())) {
2234 addLiteralImmOperand(Inst, Imm.Val,
2235 ApplyModifiers &
2236 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2237 } else {
2238 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2240 setImmKindNone();
2241 }
2242}
2243
2244void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2245 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2246 auto OpNum = Inst.getNumOperands();
2247 // Check that this operand accepts literals
2248 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2249
2250 if (ApplyModifiers) {
2251 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2252 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2253 Val = applyInputFPModifiers(Val, Size);
2254 }
2255
2256 APInt Literal(64, Val);
2257 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2258
2259 if (Imm.IsFPImm) { // We got fp literal token
2260 switch (OpTy) {
2266 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2267 AsmParser->hasInv2PiInlineImm())) {
2268 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2269 setImmKindConst();
2270 return;
2271 }
2272
2273 // Non-inlineable
2274 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2275 // For fp operands we check if low 32 bits are zeros
2276 if (Literal.getLoBits(32) != 0) {
2277 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2278 "Can't encode literal as exact 64-bit floating-point operand. "
2279 "Low 32-bits will be set to zero");
2280 Val &= 0xffffffff00000000u;
2281 }
2282
2284 setImmKindLiteral();
2285 return;
2286 }
2287
2288 // We don't allow fp literals in 64-bit integer instructions. It is
2289 // unclear how we should encode them. This case should be checked earlier
2290 // in predicate methods (isLiteralImm())
2291 llvm_unreachable("fp literal in 64-bit integer instruction.");
2292
2300 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2301 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2302 // loss of precision. The constant represents ideomatic fp32 value of
2303 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2304 // bits. Prevent rounding below.
2305 Inst.addOperand(MCOperand::createImm(0x3e22));
2306 setImmKindLiteral();
2307 return;
2308 }
2309 [[fallthrough]];
2310
2338 bool lost;
2339 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2340 // Convert literal to single precision
2341 FPLiteral.convert(*getOpFltSemantics(OpTy),
2342 APFloat::rmNearestTiesToEven, &lost);
2343 // We allow precision lost but not overflow or underflow. This should be
2344 // checked earlier in isLiteralImm()
2345
2346 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2347 Inst.addOperand(MCOperand::createImm(ImmVal));
2348 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2349 setImmKindMandatoryLiteral();
2350 } else {
2351 setImmKindLiteral();
2352 }
2353 return;
2354 }
2355 default:
2356 llvm_unreachable("invalid operand size");
2357 }
2358
2359 return;
2360 }
2361
2362 // We got int literal token.
2363 // Only sign extend inline immediates.
2364 switch (OpTy) {
2380 if (isSafeTruncation(Val, 32) &&
2381 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2382 AsmParser->hasInv2PiInlineImm())) {
2384 setImmKindConst();
2385 return;
2386 }
2387
2388 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2389 setImmKindLiteral();
2390 return;
2391
2397 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2399 setImmKindConst();
2400 return;
2401 }
2402
2403 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2404 : Lo_32(Val);
2405
2407 setImmKindLiteral();
2408 return;
2409
2413 if (isSafeTruncation(Val, 16) &&
2414 AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2415 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2416 setImmKindConst();
2417 return;
2418 }
2419
2420 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2421 setImmKindLiteral();
2422 return;
2423
2428 if (isSafeTruncation(Val, 16) &&
2429 AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2430 AsmParser->hasInv2PiInlineImm())) {
2432 setImmKindConst();
2433 return;
2434 }
2435
2436 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2437 setImmKindLiteral();
2438 return;
2439
2444 if (isSafeTruncation(Val, 16) &&
2445 AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2446 AsmParser->hasInv2PiInlineImm())) {
2448 setImmKindConst();
2449 return;
2450 }
2451
2452 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2453 setImmKindLiteral();
2454 return;
2455
2458 assert(isSafeTruncation(Val, 16));
2459 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2461 return;
2462 }
2465 assert(isSafeTruncation(Val, 16));
2466 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2467 AsmParser->hasInv2PiInlineImm()));
2468
2470 return;
2471 }
2472
2475 assert(isSafeTruncation(Val, 16));
2476 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2477 AsmParser->hasInv2PiInlineImm()));
2478
2480 return;
2481 }
2482
2484 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2485 setImmKindMandatoryLiteral();
2486 return;
2488 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2489 setImmKindMandatoryLiteral();
2490 return;
2491 default:
2492 llvm_unreachable("invalid operand size");
2493 }
2494}
2495
2496void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2497 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2498}
2499
2500bool AMDGPUOperand::isInlineValue() const {
2501 return isRegKind() && ::isInlineValue(getReg());
2502}
2503
2504//===----------------------------------------------------------------------===//
2505// AsmParser
2506//===----------------------------------------------------------------------===//
2507
2508static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2509 if (Is == IS_VGPR) {
2510 switch (RegWidth) {
2511 default: return -1;
2512 case 32:
2513 return AMDGPU::VGPR_32RegClassID;
2514 case 64:
2515 return AMDGPU::VReg_64RegClassID;
2516 case 96:
2517 return AMDGPU::VReg_96RegClassID;
2518 case 128:
2519 return AMDGPU::VReg_128RegClassID;
2520 case 160:
2521 return AMDGPU::VReg_160RegClassID;
2522 case 192:
2523 return AMDGPU::VReg_192RegClassID;
2524 case 224:
2525 return AMDGPU::VReg_224RegClassID;
2526 case 256:
2527 return AMDGPU::VReg_256RegClassID;
2528 case 288:
2529 return AMDGPU::VReg_288RegClassID;
2530 case 320:
2531 return AMDGPU::VReg_320RegClassID;
2532 case 352:
2533 return AMDGPU::VReg_352RegClassID;
2534 case 384:
2535 return AMDGPU::VReg_384RegClassID;
2536 case 512:
2537 return AMDGPU::VReg_512RegClassID;
2538 case 1024:
2539 return AMDGPU::VReg_1024RegClassID;
2540 }
2541 } else if (Is == IS_TTMP) {
2542 switch (RegWidth) {
2543 default: return -1;
2544 case 32:
2545 return AMDGPU::TTMP_32RegClassID;
2546 case 64:
2547 return AMDGPU::TTMP_64RegClassID;
2548 case 128:
2549 return AMDGPU::TTMP_128RegClassID;
2550 case 256:
2551 return AMDGPU::TTMP_256RegClassID;
2552 case 512:
2553 return AMDGPU::TTMP_512RegClassID;
2554 }
2555 } else if (Is == IS_SGPR) {
2556 switch (RegWidth) {
2557 default: return -1;
2558 case 32:
2559 return AMDGPU::SGPR_32RegClassID;
2560 case 64:
2561 return AMDGPU::SGPR_64RegClassID;
2562 case 96:
2563 return AMDGPU::SGPR_96RegClassID;
2564 case 128:
2565 return AMDGPU::SGPR_128RegClassID;
2566 case 160:
2567 return AMDGPU::SGPR_160RegClassID;
2568 case 192:
2569 return AMDGPU::SGPR_192RegClassID;
2570 case 224:
2571 return AMDGPU::SGPR_224RegClassID;
2572 case 256:
2573 return AMDGPU::SGPR_256RegClassID;
2574 case 288:
2575 return AMDGPU::SGPR_288RegClassID;
2576 case 320:
2577 return AMDGPU::SGPR_320RegClassID;
2578 case 352:
2579 return AMDGPU::SGPR_352RegClassID;
2580 case 384:
2581 return AMDGPU::SGPR_384RegClassID;
2582 case 512:
2583 return AMDGPU::SGPR_512RegClassID;
2584 }
2585 } else if (Is == IS_AGPR) {
2586 switch (RegWidth) {
2587 default: return -1;
2588 case 32:
2589 return AMDGPU::AGPR_32RegClassID;
2590 case 64:
2591 return AMDGPU::AReg_64RegClassID;
2592 case 96:
2593 return AMDGPU::AReg_96RegClassID;
2594 case 128:
2595 return AMDGPU::AReg_128RegClassID;
2596 case 160:
2597 return AMDGPU::AReg_160RegClassID;
2598 case 192:
2599 return AMDGPU::AReg_192RegClassID;
2600 case 224:
2601 return AMDGPU::AReg_224RegClassID;
2602 case 256:
2603 return AMDGPU::AReg_256RegClassID;
2604 case 288:
2605 return AMDGPU::AReg_288RegClassID;
2606 case 320:
2607 return AMDGPU::AReg_320RegClassID;
2608 case 352:
2609 return AMDGPU::AReg_352RegClassID;
2610 case 384:
2611 return AMDGPU::AReg_384RegClassID;
2612 case 512:
2613 return AMDGPU::AReg_512RegClassID;
2614 case 1024:
2615 return AMDGPU::AReg_1024RegClassID;
2616 }
2617 }
2618 return -1;
2619}
2620
2623 .Case("exec", AMDGPU::EXEC)
2624 .Case("vcc", AMDGPU::VCC)
2625 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2626 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2627 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2628 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2629 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2630 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2631 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2632 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2633 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2634 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2635 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2636 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2637 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2638 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2639 .Case("m0", AMDGPU::M0)
2640 .Case("vccz", AMDGPU::SRC_VCCZ)
2641 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2642 .Case("execz", AMDGPU::SRC_EXECZ)
2643 .Case("src_execz", AMDGPU::SRC_EXECZ)
2644 .Case("scc", AMDGPU::SRC_SCC)
2645 .Case("src_scc", AMDGPU::SRC_SCC)
2646 .Case("tba", AMDGPU::TBA)
2647 .Case("tma", AMDGPU::TMA)
2648 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2649 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2650 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2651 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2652 .Case("vcc_lo", AMDGPU::VCC_LO)
2653 .Case("vcc_hi", AMDGPU::VCC_HI)
2654 .Case("exec_lo", AMDGPU::EXEC_LO)
2655 .Case("exec_hi", AMDGPU::EXEC_HI)
2656 .Case("tma_lo", AMDGPU::TMA_LO)
2657 .Case("tma_hi", AMDGPU::TMA_HI)
2658 .Case("tba_lo", AMDGPU::TBA_LO)
2659 .Case("tba_hi", AMDGPU::TBA_HI)
2660 .Case("pc", AMDGPU::PC_REG)
2661 .Case("null", AMDGPU::SGPR_NULL)
2662 .Default(AMDGPU::NoRegister);
2663}
2664
2665bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2666 SMLoc &EndLoc, bool RestoreOnFailure) {
2667 auto R = parseRegister();
2668 if (!R) return true;
2669 assert(R->isReg());
2670 RegNo = R->getReg();
2671 StartLoc = R->getStartLoc();
2672 EndLoc = R->getEndLoc();
2673 return false;
2674}
2675
2676bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2677 SMLoc &EndLoc) {
2678 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2679}
2680
2681ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2682 SMLoc &EndLoc) {
2683 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2684 bool PendingErrors = getParser().hasPendingError();
2685 getParser().clearPendingErrors();
2686 if (PendingErrors)
2687 return ParseStatus::Failure;
2688 if (Result)
2689 return ParseStatus::NoMatch;
2690 return ParseStatus::Success;
2691}
2692
2693bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2694 RegisterKind RegKind, unsigned Reg1,
2695 SMLoc Loc) {
2696 switch (RegKind) {
2697 case IS_SPECIAL:
2698 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2699 Reg = AMDGPU::EXEC;
2700 RegWidth = 64;
2701 return true;
2702 }
2703 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2704 Reg = AMDGPU::FLAT_SCR;
2705 RegWidth = 64;
2706 return true;
2707 }
2708 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2709 Reg = AMDGPU::XNACK_MASK;
2710 RegWidth = 64;
2711 return true;
2712 }
2713 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2714 Reg = AMDGPU::VCC;
2715 RegWidth = 64;
2716 return true;
2717 }
2718 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2719 Reg = AMDGPU::TBA;
2720 RegWidth = 64;
2721 return true;
2722 }
2723 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2724 Reg = AMDGPU::TMA;
2725 RegWidth = 64;
2726 return true;
2727 }
2728 Error(Loc, "register does not fit in the list");
2729 return false;
2730 case IS_VGPR:
2731 case IS_SGPR:
2732 case IS_AGPR:
2733 case IS_TTMP:
2734 if (Reg1 != Reg + RegWidth / 32) {
2735 Error(Loc, "registers in a list must have consecutive indices");
2736 return false;
2737 }
2738 RegWidth += 32;
2739 return true;
2740 default:
2741 llvm_unreachable("unexpected register kind");
2742 }
2743}
2744
2745struct RegInfo {
2747 RegisterKind Kind;
2748};
2749
2750static constexpr RegInfo RegularRegisters[] = {
2751 {{"v"}, IS_VGPR},
2752 {{"s"}, IS_SGPR},
2753 {{"ttmp"}, IS_TTMP},
2754 {{"acc"}, IS_AGPR},
2755 {{"a"}, IS_AGPR},
2756};
2757
2758static bool isRegularReg(RegisterKind Kind) {
2759 return Kind == IS_VGPR ||
2760 Kind == IS_SGPR ||
2761 Kind == IS_TTMP ||
2762 Kind == IS_AGPR;
2763}
2764
2766 for (const RegInfo &Reg : RegularRegisters)
2767 if (Str.starts_with(Reg.Name))
2768 return &Reg;
2769 return nullptr;
2770}
2771
2772static bool getRegNum(StringRef Str, unsigned& Num) {
2773 return !Str.getAsInteger(10, Num);
2774}
2775
2776bool
2777AMDGPUAsmParser::isRegister(const AsmToken &Token,
2778 const AsmToken &NextToken) const {
2779
2780 // A list of consecutive registers: [s0,s1,s2,s3]
2781 if (Token.is(AsmToken::LBrac))
2782 return true;
2783
2784 if (!Token.is(AsmToken::Identifier))
2785 return false;
2786
2787 // A single register like s0 or a range of registers like s[0:1]
2788
2789 StringRef Str = Token.getString();
2790 const RegInfo *Reg = getRegularRegInfo(Str);
2791 if (Reg) {
2792 StringRef RegName = Reg->Name;
2793 StringRef RegSuffix = Str.substr(RegName.size());
2794 if (!RegSuffix.empty()) {
2795 RegSuffix.consume_back(".l");
2796 RegSuffix.consume_back(".h");
2797 unsigned Num;
2798 // A single register with an index: rXX
2799 if (getRegNum(RegSuffix, Num))
2800 return true;
2801 } else {
2802 // A range of registers: r[XX:YY].
2803 if (NextToken.is(AsmToken::LBrac))
2804 return true;
2805 }
2806 }
2807
2808 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2809}
2810
2811bool
2812AMDGPUAsmParser::isRegister()
2813{
2814 return isRegister(getToken(), peekToken());
2815}
2816
2817unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2818 unsigned SubReg, unsigned RegWidth,
2819 SMLoc Loc) {
2820 assert(isRegularReg(RegKind));
2821
2822 unsigned AlignSize = 1;
2823 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2824 // SGPR and TTMP registers must be aligned.
2825 // Max required alignment is 4 dwords.
2826 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2827 }
2828
2829 if (RegNum % AlignSize != 0) {
2830 Error(Loc, "invalid register alignment");
2831 return AMDGPU::NoRegister;
2832 }
2833
2834 unsigned RegIdx = RegNum / AlignSize;
2835 int RCID = getRegClass(RegKind, RegWidth);
2836 if (RCID == -1) {
2837 Error(Loc, "invalid or unsupported register size");
2838 return AMDGPU::NoRegister;
2839 }
2840
2841 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2842 const MCRegisterClass RC = TRI->getRegClass(RCID);
2843 if (RegIdx >= RC.getNumRegs()) {
2844 Error(Loc, "register index is out of range");
2845 return AMDGPU::NoRegister;
2846 }
2847
2848 unsigned Reg = RC.getRegister(RegIdx);
2849
2850 if (SubReg) {
2851 Reg = TRI->getSubReg(Reg, SubReg);
2852
2853 // Currently all regular registers have their .l and .h subregisters, so
2854 // we should never need to generate an error here.
2855 assert(Reg && "Invalid subregister!");
2856 }
2857
2858 return Reg;
2859}
2860
2861bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2862 int64_t RegLo, RegHi;
2863 if (!skipToken(AsmToken::LBrac, "missing register index"))
2864 return false;
2865
2866 SMLoc FirstIdxLoc = getLoc();
2867 SMLoc SecondIdxLoc;
2868
2869 if (!parseExpr(RegLo))
2870 return false;
2871
2872 if (trySkipToken(AsmToken::Colon)) {
2873 SecondIdxLoc = getLoc();
2874 if (!parseExpr(RegHi))
2875 return false;
2876 } else {
2877 RegHi = RegLo;
2878 }
2879
2880 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2881 return false;
2882
2883 if (!isUInt<32>(RegLo)) {
2884 Error(FirstIdxLoc, "invalid register index");
2885 return false;
2886 }
2887
2888 if (!isUInt<32>(RegHi)) {
2889 Error(SecondIdxLoc, "invalid register index");
2890 return false;
2891 }
2892
2893 if (RegLo > RegHi) {
2894 Error(FirstIdxLoc, "first register index should not exceed second index");
2895 return false;
2896 }
2897
2898 Num = static_cast<unsigned>(RegLo);
2899 RegWidth = 32 * ((RegHi - RegLo) + 1);
2900 return true;
2901}
2902
2903unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2904 unsigned &RegNum, unsigned &RegWidth,
2905 SmallVectorImpl<AsmToken> &Tokens) {
2906 assert(isToken(AsmToken::Identifier));
2907 unsigned Reg = getSpecialRegForName(getTokenStr());
2908 if (Reg) {
2909 RegNum = 0;
2910 RegWidth = 32;
2911 RegKind = IS_SPECIAL;
2912 Tokens.push_back(getToken());
2913 lex(); // skip register name
2914 }
2915 return Reg;
2916}
2917
2918unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2919 unsigned &RegNum, unsigned &RegWidth,
2920 SmallVectorImpl<AsmToken> &Tokens) {
2921 assert(isToken(AsmToken::Identifier));
2922 StringRef RegName = getTokenStr();
2923 auto Loc = getLoc();
2924
2925 const RegInfo *RI = getRegularRegInfo(RegName);
2926 if (!RI) {
2927 Error(Loc, "invalid register name");
2928 return AMDGPU::NoRegister;
2929 }
2930
2931 Tokens.push_back(getToken());
2932 lex(); // skip register name
2933
2934 RegKind = RI->Kind;
2935 StringRef RegSuffix = RegName.substr(RI->Name.size());
2936 unsigned SubReg = NoSubRegister;
2937 if (!RegSuffix.empty()) {
2938 // We don't know the opcode till we are done parsing, so we don't know if
2939 // registers should be 16 or 32 bit. It is therefore mandatory to put .l or
2940 // .h to correctly specify 16 bit registers. We also can't determine class
2941 // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16.
2942 if (RegSuffix.consume_back(".l"))
2943 SubReg = AMDGPU::lo16;
2944 else if (RegSuffix.consume_back(".h"))
2945 SubReg = AMDGPU::hi16;
2946
2947 // Single 32-bit register: vXX.
2948 if (!getRegNum(RegSuffix, RegNum)) {
2949 Error(Loc, "invalid register index");
2950 return AMDGPU::NoRegister;
2951 }
2952 RegWidth = 32;
2953 } else {
2954 // Range of registers: v[XX:YY]. ":YY" is optional.
2955 if (!ParseRegRange(RegNum, RegWidth))
2956 return AMDGPU::NoRegister;
2957 }
2958
2959 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2960}
2961
2962unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2963 unsigned &RegWidth,
2964 SmallVectorImpl<AsmToken> &Tokens) {
2965 unsigned Reg = AMDGPU::NoRegister;
2966 auto ListLoc = getLoc();
2967
2968 if (!skipToken(AsmToken::LBrac,
2969 "expected a register or a list of registers")) {
2970 return AMDGPU::NoRegister;
2971 }
2972
2973 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2974
2975 auto Loc = getLoc();
2976 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2977 return AMDGPU::NoRegister;
2978 if (RegWidth != 32) {
2979 Error(Loc, "expected a single 32-bit register");
2980 return AMDGPU::NoRegister;
2981 }
2982
2983 for (; trySkipToken(AsmToken::Comma); ) {
2984 RegisterKind NextRegKind;
2985 unsigned NextReg, NextRegNum, NextRegWidth;
2986 Loc = getLoc();
2987
2988 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2989 NextRegNum, NextRegWidth,
2990 Tokens)) {
2991 return AMDGPU::NoRegister;
2992 }
2993 if (NextRegWidth != 32) {
2994 Error(Loc, "expected a single 32-bit register");
2995 return AMDGPU::NoRegister;
2996 }
2997 if (NextRegKind != RegKind) {
2998 Error(Loc, "registers in a list must be of the same kind");
2999 return AMDGPU::NoRegister;
3000 }
3001 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3002 return AMDGPU::NoRegister;
3003 }
3004
3005 if (!skipToken(AsmToken::RBrac,
3006 "expected a comma or a closing square bracket")) {
3007 return AMDGPU::NoRegister;
3008 }
3009
3010 if (isRegularReg(RegKind))
3011 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3012
3013 return Reg;
3014}
3015
3016bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3017 unsigned &RegNum, unsigned &RegWidth,
3018 SmallVectorImpl<AsmToken> &Tokens) {
3019 auto Loc = getLoc();
3020 Reg = AMDGPU::NoRegister;
3021
3022 if (isToken(AsmToken::Identifier)) {
3023 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3024 if (Reg == AMDGPU::NoRegister)
3025 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3026 } else {
3027 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3028 }
3029
3030 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3031 if (Reg == AMDGPU::NoRegister) {
3032 assert(Parser.hasPendingError());
3033 return false;
3034 }
3035
3036 if (!subtargetHasRegister(*TRI, Reg)) {
3037 if (Reg == AMDGPU::SGPR_NULL) {
3038 Error(Loc, "'null' operand is not supported on this GPU");
3039 } else {
3040 Error(Loc, "register not available on this GPU");
3041 }
3042 return false;
3043 }
3044
3045 return true;
3046}
3047
3048bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3049 unsigned &RegNum, unsigned &RegWidth,
3050 bool RestoreOnFailure /*=false*/) {
3051 Reg = AMDGPU::NoRegister;
3052
3054 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3055 if (RestoreOnFailure) {
3056 while (!Tokens.empty()) {
3057 getLexer().UnLex(Tokens.pop_back_val());
3058 }
3059 }
3060 return true;
3061 }
3062 return false;
3063}
3064
3065std::optional<StringRef>
3066AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3067 switch (RegKind) {
3068 case IS_VGPR:
3069 return StringRef(".amdgcn.next_free_vgpr");
3070 case IS_SGPR:
3071 return StringRef(".amdgcn.next_free_sgpr");
3072 default:
3073 return std::nullopt;
3074 }
3075}
3076
3077void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3078 auto SymbolName = getGprCountSymbolName(RegKind);
3079 assert(SymbolName && "initializing invalid register kind");
3080 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3081 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
3082}
3083
3084bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3085 unsigned DwordRegIndex,
3086 unsigned RegWidth) {
3087 // Symbols are only defined for GCN targets
3088 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3089 return true;
3090
3091 auto SymbolName = getGprCountSymbolName(RegKind);
3092 if (!SymbolName)
3093 return true;
3094 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3095
3096 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3097 int64_t OldCount;
3098
3099 if (!Sym->isVariable())
3100 return !Error(getLoc(),
3101 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3102 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
3103 return !Error(
3104 getLoc(),
3105 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3106
3107 if (OldCount <= NewMax)
3108 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
3109
3110 return true;
3111}
3112
3113std::unique_ptr<AMDGPUOperand>
3114AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3115 const auto &Tok = getToken();
3116 SMLoc StartLoc = Tok.getLoc();
3117 SMLoc EndLoc = Tok.getEndLoc();
3118 RegisterKind RegKind;
3119 unsigned Reg, RegNum, RegWidth;
3120
3121 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3122 return nullptr;
3123 }
3124 if (isHsaAbi(getSTI())) {
3125 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3126 return nullptr;
3127 } else
3128 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3129 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3130}
3131
3132ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3133 bool HasSP3AbsModifier, bool HasLit) {
3134 // TODO: add syntactic sugar for 1/(2*PI)
3135
3136 if (isRegister())
3137 return ParseStatus::NoMatch;
3138 assert(!isModifier());
3139
3140 if (!HasLit) {
3141 HasLit = trySkipId("lit");
3142 if (HasLit) {
3143 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3144 return ParseStatus::Failure;
3145 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
3146 if (S.isSuccess() &&
3147 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3148 return ParseStatus::Failure;
3149 return S;
3150 }
3151 }
3152
3153 const auto& Tok = getToken();
3154 const auto& NextTok = peekToken();
3155 bool IsReal = Tok.is(AsmToken::Real);
3156 SMLoc S = getLoc();
3157 bool Negate = false;
3158
3159 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3160 lex();
3161 IsReal = true;
3162 Negate = true;
3163 }
3164
3165 AMDGPUOperand::Modifiers Mods;
3166 Mods.Lit = HasLit;
3167
3168 if (IsReal) {
3169 // Floating-point expressions are not supported.
3170 // Can only allow floating-point literals with an
3171 // optional sign.
3172
3173 StringRef Num = getTokenStr();
3174 lex();
3175
3176 APFloat RealVal(APFloat::IEEEdouble());
3177 auto roundMode = APFloat::rmNearestTiesToEven;
3178 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3179 return ParseStatus::Failure;
3180 if (Negate)
3181 RealVal.changeSign();
3182
3183 Operands.push_back(
3184 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3185 AMDGPUOperand::ImmTyNone, true));
3186 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3187 Op.setModifiers(Mods);
3188
3189 return ParseStatus::Success;
3190
3191 } else {
3192 int64_t IntVal;
3193 const MCExpr *Expr;
3194 SMLoc S = getLoc();
3195
3196 if (HasSP3AbsModifier) {
3197 // This is a workaround for handling expressions
3198 // as arguments of SP3 'abs' modifier, for example:
3199 // |1.0|
3200 // |-1|
3201 // |1+x|
3202 // This syntax is not compatible with syntax of standard
3203 // MC expressions (due to the trailing '|').
3204 SMLoc EndLoc;
3205 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3206 return ParseStatus::Failure;
3207 } else {
3208 if (Parser.parseExpression(Expr))
3209 return ParseStatus::Failure;
3210 }
3211
3212 if (Expr->evaluateAsAbsolute(IntVal)) {
3213 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3214 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3215 Op.setModifiers(Mods);
3216 } else {
3217 if (HasLit)
3218 return ParseStatus::NoMatch;
3219 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3220 }
3221
3222 return ParseStatus::Success;
3223 }
3224
3225 return ParseStatus::NoMatch;
3226}
3227
3228ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3229 if (!isRegister())
3230 return ParseStatus::NoMatch;
3231
3232 if (auto R = parseRegister()) {
3233 assert(R->isReg());
3234 Operands.push_back(std::move(R));
3235 return ParseStatus::Success;
3236 }
3237 return ParseStatus::Failure;
3238}
3239
3240ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3241 bool HasSP3AbsMod, bool HasLit) {
3242 ParseStatus Res = parseReg(Operands);
3243 if (!Res.isNoMatch())
3244 return Res;
3245 if (isModifier())
3246 return ParseStatus::NoMatch;
3247 return parseImm(Operands, HasSP3AbsMod, HasLit);
3248}
3249
3250bool
3251AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3252 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3253 const auto &str = Token.getString();
3254 return str == "abs" || str == "neg" || str == "sext";
3255 }
3256 return false;
3257}
3258
3259bool
3260AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3261 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3262}
3263
3264bool
3265AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3266 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3267}
3268
3269bool
3270AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3271 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3272}
3273
3274// Check if this is an operand modifier or an opcode modifier
3275// which may look like an expression but it is not. We should
3276// avoid parsing these modifiers as expressions. Currently
3277// recognized sequences are:
3278// |...|
3279// abs(...)
3280// neg(...)
3281// sext(...)
3282// -reg
3283// -|...|
3284// -abs(...)
3285// name:...
3286//
3287bool
3288AMDGPUAsmParser::isModifier() {
3289
3290 AsmToken Tok = getToken();
3291 AsmToken NextToken[2];
3292 peekTokens(NextToken);
3293
3294 return isOperandModifier(Tok, NextToken[0]) ||
3295 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3296 isOpcodeModifierWithVal(Tok, NextToken[0]);
3297}
3298
3299// Check if the current token is an SP3 'neg' modifier.
3300// Currently this modifier is allowed in the following context:
3301//
3302// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3303// 2. Before an 'abs' modifier: -abs(...)
3304// 3. Before an SP3 'abs' modifier: -|...|
3305//
3306// In all other cases "-" is handled as a part
3307// of an expression that follows the sign.
3308//
3309// Note: When "-" is followed by an integer literal,
3310// this is interpreted as integer negation rather
3311// than a floating-point NEG modifier applied to N.
3312// Beside being contr-intuitive, such use of floating-point
3313// NEG modifier would have resulted in different meaning
3314// of integer literals used with VOP1/2/C and VOP3,
3315// for example:
3316// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3317// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3318// Negative fp literals with preceding "-" are
3319// handled likewise for uniformity
3320//
3321bool
3322AMDGPUAsmParser::parseSP3NegModifier() {
3323
3324 AsmToken NextToken[2];
3325 peekTokens(NextToken);
3326
3327 if (isToken(AsmToken::Minus) &&
3328 (isRegister(NextToken[0], NextToken[1]) ||
3329 NextToken[0].is(AsmToken::Pipe) ||
3330 isId(NextToken[0], "abs"))) {
3331 lex();
3332 return true;
3333 }
3334
3335 return false;
3336}
3337
3339AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3340 bool AllowImm) {
3341 bool Neg, SP3Neg;
3342 bool Abs, SP3Abs;
3343 bool Lit;
3344 SMLoc Loc;
3345
3346 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3347 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3348 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3349
3350 SP3Neg = parseSP3NegModifier();
3351
3352 Loc = getLoc();
3353 Neg = trySkipId("neg");
3354 if (Neg && SP3Neg)
3355 return Error(Loc, "expected register or immediate");
3356 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3357 return ParseStatus::Failure;
3358
3359 Abs = trySkipId("abs");
3360 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3361 return ParseStatus::Failure;
3362
3363 Lit = trySkipId("lit");
3364 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3365 return ParseStatus::Failure;
3366
3367 Loc = getLoc();
3368 SP3Abs = trySkipToken(AsmToken::Pipe);
3369 if (Abs && SP3Abs)
3370 return Error(Loc, "expected register or immediate");
3371
3372 ParseStatus Res;
3373 if (AllowImm) {
3374 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3375 } else {
3376 Res = parseReg(Operands);
3377 }
3378 if (!Res.isSuccess())
3379 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3380
3381 if (Lit && !Operands.back()->isImm())
3382 Error(Loc, "expected immediate with lit modifier");
3383
3384 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3385 return ParseStatus::Failure;
3386 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3387 return ParseStatus::Failure;
3388 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3389 return ParseStatus::Failure;
3390 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3391 return ParseStatus::Failure;
3392
3393 AMDGPUOperand::Modifiers Mods;
3394 Mods.Abs = Abs || SP3Abs;
3395 Mods.Neg = Neg || SP3Neg;
3396 Mods.Lit = Lit;
3397
3398 if (Mods.hasFPModifiers() || Lit) {
3399 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3400 if (Op.isExpr())
3401 return Error(Op.getStartLoc(), "expected an absolute expression");
3402 Op.setModifiers(Mods);
3403 }
3404 return ParseStatus::Success;
3405}
3406
3408AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3409 bool AllowImm) {
3410 bool Sext = trySkipId("sext");
3411 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3412 return ParseStatus::Failure;
3413
3414 ParseStatus Res;
3415 if (AllowImm) {
3416 Res = parseRegOrImm(Operands);
3417 } else {
3418 Res = parseReg(Operands);
3419 }
3420 if (!Res.isSuccess())
3421 return Sext ? ParseStatus::Failure : Res;
3422
3423 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3424 return ParseStatus::Failure;
3425
3426 AMDGPUOperand::Modifiers Mods;
3427 Mods.Sext = Sext;
3428
3429 if (Mods.hasIntModifiers()) {
3430 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3431 if (Op.isExpr())
3432 return Error(Op.getStartLoc(), "expected an absolute expression");
3433 Op.setModifiers(Mods);
3434 }
3435
3436 return ParseStatus::Success;
3437}
3438
3439ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3440 return parseRegOrImmWithFPInputMods(Operands, false);
3441}
3442
3443ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3444 return parseRegOrImmWithIntInputMods(Operands, false);
3445}
3446
3447ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3448 auto Loc = getLoc();
3449 if (trySkipId("off")) {
3450 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3451 AMDGPUOperand::ImmTyOff, false));
3452 return ParseStatus::Success;
3453 }
3454
3455 if (!isRegister())
3456 return ParseStatus::NoMatch;
3457
3458 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3459 if (Reg) {
3460 Operands.push_back(std::move(Reg));
3461 return ParseStatus::Success;
3462 }
3463
3464 return ParseStatus::Failure;
3465}
3466
3467unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3468 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3469
3470 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3471 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3472 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3473 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3474 return Match_InvalidOperand;
3475
3476 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3477 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3478 // v_mac_f32/16 allow only dst_sel == DWORD;
3479 auto OpNum =
3480 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3481 const auto &Op = Inst.getOperand(OpNum);
3482 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3483 return Match_InvalidOperand;
3484 }
3485 }
3486
3487 return Match_Success;
3488}
3489
3491 static const unsigned Variants[] = {
3495 };
3496
3497 return ArrayRef(Variants);
3498}
3499
3500// What asm variants we should check
3501ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3502 if (isForcedDPP() && isForcedVOP3()) {
3503 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3504 return ArrayRef(Variants);
3505 }
3506 if (getForcedEncodingSize() == 32) {
3507 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3508 return ArrayRef(Variants);
3509 }
3510
3511 if (isForcedVOP3()) {
3512 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3513 return ArrayRef(Variants);
3514 }
3515
3516 if (isForcedSDWA()) {
3517 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3519 return ArrayRef(Variants);
3520 }
3521
3522 if (isForcedDPP()) {
3523 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3524 return ArrayRef(Variants);
3525 }
3526
3527 return getAllVariants();
3528}
3529
3530StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3531 if (isForcedDPP() && isForcedVOP3())
3532 return "e64_dpp";
3533
3534 if (getForcedEncodingSize() == 32)
3535 return "e32";
3536
3537 if (isForcedVOP3())
3538 return "e64";
3539
3540 if (isForcedSDWA())
3541 return "sdwa";
3542
3543 if (isForcedDPP())
3544 return "dpp";
3545
3546 return "";
3547}
3548
3549unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3550 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3551 for (MCPhysReg Reg : Desc.implicit_uses()) {
3552 switch (Reg) {
3553 case AMDGPU::FLAT_SCR:
3554 case AMDGPU::VCC:
3555 case AMDGPU::VCC_LO:
3556 case AMDGPU::VCC_HI:
3557 case AMDGPU::M0:
3558 return Reg;
3559 default:
3560 break;
3561 }
3562 }
3563 return AMDGPU::NoRegister;
3564}
3565
3566// NB: This code is correct only when used to check constant
3567// bus limitations because GFX7 support no f16 inline constants.
3568// Note that there are no cases when a GFX7 opcode violates
3569// constant bus limitations due to the use of an f16 constant.
3570bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3571 unsigned OpIdx) const {
3572 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3573
3574 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3575 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3576 return false;
3577 }
3578
3579 const MCOperand &MO = Inst.getOperand(OpIdx);
3580
3581 int64_t Val = MO.getImm();
3582 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3583
3584 switch (OpSize) { // expected operand size
3585 case 8:
3586 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3587 case 4:
3588 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3589 case 2: {
3590 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3594 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3595
3600
3605
3610
3615 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3616
3621 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3622
3623 llvm_unreachable("invalid operand type");
3624 }
3625 default:
3626 llvm_unreachable("invalid operand size");
3627 }
3628}
3629
3630unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3631 if (!isGFX10Plus())
3632 return 1;
3633
3634 switch (Opcode) {
3635 // 64-bit shift instructions can use only one scalar value input
3636 case AMDGPU::V_LSHLREV_B64_e64:
3637 case AMDGPU::V_LSHLREV_B64_gfx10:
3638 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3639 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3640 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3641 case AMDGPU::V_LSHRREV_B64_e64:
3642 case AMDGPU::V_LSHRREV_B64_gfx10:
3643 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3644 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3645 case AMDGPU::V_ASHRREV_I64_e64:
3646 case AMDGPU::V_ASHRREV_I64_gfx10:
3647 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3648 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3649 case AMDGPU::V_LSHL_B64_e64:
3650 case AMDGPU::V_LSHR_B64_e64:
3651 case AMDGPU::V_ASHR_I64_e64:
3652 return 1;
3653 default:
3654 return 2;
3655 }
3656}
3657
3658constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3660
3661// Get regular operand indices in the same order as specified
3662// in the instruction (but append mandatory literals to the end).
3664 bool AddMandatoryLiterals = false) {
3665
3666 int16_t ImmIdx =
3667 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3668
3669 if (isVOPD(Opcode)) {
3670 int16_t ImmDeferredIdx =
3671 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3672 : -1;
3673
3674 return {getNamedOperandIdx(Opcode, OpName::src0X),
3675 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3676 getNamedOperandIdx(Opcode, OpName::src0Y),
3677 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3678 ImmDeferredIdx,
3679 ImmIdx};
3680 }
3681
3682 return {getNamedOperandIdx(Opcode, OpName::src0),
3683 getNamedOperandIdx(Opcode, OpName::src1),
3684 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3685}
3686
3687bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3688 const MCOperand &MO = Inst.getOperand(OpIdx);
3689 if (MO.isImm()) {
3690 return !isInlineConstant(Inst, OpIdx);
3691 } else if (MO.isReg()) {
3692 auto Reg = MO.getReg();
3693 if (!Reg) {
3694 return false;
3695 }
3696 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3697 auto PReg = mc2PseudoReg(Reg);
3698 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3699 } else {
3700 return true;
3701 }
3702}
3703
3704// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3705// Writelane is special in that it can use SGPR and M0 (which would normally
3706// count as using the constant bus twice - but in this case it is allowed since
3707// the lane selector doesn't count as a use of the constant bus). However, it is
3708// still required to abide by the 1 SGPR rule.
3709static bool checkWriteLane(const MCInst &Inst) {
3710 const unsigned Opcode = Inst.getOpcode();
3711 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3712 return false;
3713 const MCOperand &LaneSelOp = Inst.getOperand(2);
3714 if (!LaneSelOp.isReg())
3715 return false;
3716 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3717 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3718}
3719
3720bool AMDGPUAsmParser::validateConstantBusLimitations(
3721 const MCInst &Inst, const OperandVector &Operands) {
3722 const unsigned Opcode = Inst.getOpcode();
3723 const MCInstrDesc &Desc = MII.get(Opcode);
3724 unsigned LastSGPR = AMDGPU::NoRegister;
3725 unsigned ConstantBusUseCount = 0;
3726 unsigned NumLiterals = 0;
3727 unsigned LiteralSize;
3728
3729 if (!(Desc.TSFlags &
3732 !isVOPD(Opcode))
3733 return true;
3734
3735 if (checkWriteLane(Inst))
3736 return true;
3737
3738 // Check special imm operands (used by madmk, etc)
3739 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3740 ++NumLiterals;
3741 LiteralSize = 4;
3742 }
3743
3744 SmallDenseSet<unsigned> SGPRsUsed;
3745 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3746 if (SGPRUsed != AMDGPU::NoRegister) {
3747 SGPRsUsed.insert(SGPRUsed);
3748 ++ConstantBusUseCount;
3749 }
3750
3751 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3752
3753 for (int OpIdx : OpIndices) {
3754 if (OpIdx == -1)
3755 continue;
3756
3757 const MCOperand &MO = Inst.getOperand(OpIdx);
3758 if (usesConstantBus(Inst, OpIdx)) {
3759 if (MO.isReg()) {
3760 LastSGPR = mc2PseudoReg(MO.getReg());
3761 // Pairs of registers with a partial intersections like these
3762 // s0, s[0:1]
3763 // flat_scratch_lo, flat_scratch
3764 // flat_scratch_lo, flat_scratch_hi
3765 // are theoretically valid but they are disabled anyway.
3766 // Note that this code mimics SIInstrInfo::verifyInstruction
3767 if (SGPRsUsed.insert(LastSGPR).second) {
3768 ++ConstantBusUseCount;
3769 }
3770 } else { // Expression or a literal
3771
3772 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3773 continue; // special operand like VINTERP attr_chan
3774
3775 // An instruction may use only one literal.
3776 // This has been validated on the previous step.
3777 // See validateVOPLiteral.
3778 // This literal may be used as more than one operand.
3779 // If all these operands are of the same size,
3780 // this literal counts as one scalar value.
3781 // Otherwise it counts as 2 scalar values.
3782 // See "GFX10 Shader Programming", section 3.6.2.3.
3783
3784 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3785 if (Size < 4)
3786 Size = 4;
3787
3788 if (NumLiterals == 0) {
3789 NumLiterals = 1;
3790 LiteralSize = Size;
3791 } else if (LiteralSize != Size) {
3792 NumLiterals = 2;
3793 }
3794 }
3795 }
3796 }
3797 ConstantBusUseCount += NumLiterals;
3798
3799 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3800 return true;
3801
3802 SMLoc LitLoc = getLitLoc(Operands);
3803 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3804 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3805 Error(Loc, "invalid operand (violates constant bus restrictions)");
3806 return false;
3807}
3808
3809bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3810 const MCInst &Inst, const OperandVector &Operands) {
3811
3812 const unsigned Opcode = Inst.getOpcode();
3813 if (!isVOPD(Opcode))
3814 return true;
3815
3816 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3817
3818 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3819 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3820 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3821 ? Opr.getReg()
3823 };
3824
3825 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3826 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3827
3828 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3829 auto InvalidCompOprIdx =
3830 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3831 if (!InvalidCompOprIdx)
3832 return true;
3833
3834 auto CompOprIdx = *InvalidCompOprIdx;
3835 auto ParsedIdx =
3836 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3837 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3838 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3839
3840 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3841 if (CompOprIdx == VOPD::Component::DST) {
3842 Error(Loc, "one dst register must be even and the other odd");
3843 } else {
3844 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3845 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3846 " operands must use different VGPR banks");
3847 }
3848
3849 return false;
3850}
3851
3852bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3853
3854 const unsigned Opc = Inst.getOpcode();
3855 const MCInstrDesc &Desc = MII.get(Opc);
3856
3857 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3858 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3859 assert(ClampIdx != -1);
3860 return Inst.getOperand(ClampIdx).getImm() == 0;
3861 }
3862
3863 return true;
3864}
3865
3868
3869bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3870 const SMLoc &IDLoc) {
3871
3872 const unsigned Opc = Inst.getOpcode();
3873 const MCInstrDesc &Desc = MII.get(Opc);
3874
3875 if ((Desc.TSFlags & MIMGFlags) == 0)
3876 return true;
3877
3878 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3879 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3880 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3881
3882 assert(VDataIdx != -1);
3883
3884 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3885 return true;
3886
3887 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3888 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3889 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3890 if (DMask == 0)
3891 DMask = 1;
3892
3893 bool IsPackedD16 = false;
3894 unsigned DataSize =
3895 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3896 if (hasPackedD16()) {
3897 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3898 IsPackedD16 = D16Idx >= 0;
3899 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3900 DataSize = (DataSize + 1) / 2;
3901 }
3902
3903 if ((VDataSize / 4) == DataSize + TFESize)
3904 return true;
3905
3906 StringRef Modifiers;
3907 if (isGFX90A())
3908 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3909 else
3910 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3911
3912 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3913 return false;
3914}
3915
3916bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3917 const SMLoc &IDLoc) {
3918 const unsigned Opc = Inst.getOpcode();
3919 const MCInstrDesc &Desc = MII.get(Opc);
3920
3921 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3922 return true;
3923
3925
3926 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3928 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3929 int RSrcOpName = Desc.TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
3930 : AMDGPU::OpName::rsrc;
3931 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3932 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3933 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3934
3935 assert(VAddr0Idx != -1);
3936 assert(SrsrcIdx != -1);
3937 assert(SrsrcIdx > VAddr0Idx);
3938
3939 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3940 if (BaseOpcode->BVH) {
3941 if (IsA16 == BaseOpcode->A16)
3942 return true;
3943 Error(IDLoc, "image address size does not match a16");
3944 return false;
3945 }
3946
3947 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3949 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3950 unsigned ActualAddrSize =
3951 IsNSA ? SrsrcIdx - VAddr0Idx
3952 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3953
3954 unsigned ExpectedAddrSize =
3955 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3956
3957 if (IsNSA) {
3958 if (hasPartialNSAEncoding() &&
3959 ExpectedAddrSize >
3961 int VAddrLastIdx = SrsrcIdx - 1;
3962 unsigned VAddrLastSize =
3963 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3964
3965 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3966 }
3967 } else {
3968 if (ExpectedAddrSize > 12)
3969 ExpectedAddrSize = 16;
3970
3971 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3972 // This provides backward compatibility for assembly created
3973 // before 160b/192b/224b types were directly supported.
3974 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3975 return true;
3976 }
3977
3978 if (ActualAddrSize == ExpectedAddrSize)
3979 return true;
3980
3981 Error(IDLoc, "image address size does not match dim and a16");
3982 return false;
3983}
3984
3985bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3986
3987 const unsigned Opc = Inst.getOpcode();
3988 const MCInstrDesc &Desc = MII.get(Opc);
3989
3990 if ((Desc.TSFlags & MIMGFlags) == 0)
3991 return true;
3992 if (!Desc.mayLoad() || !Desc.mayStore())
3993 return true; // Not atomic
3994
3995 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3996 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3997
3998 // This is an incomplete check because image_atomic_cmpswap
3999 // may only use 0x3 and 0xf while other atomic operations
4000 // may use 0x1 and 0x3. However these limitations are
4001 // verified when we check that dmask matches dst size.
4002 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4003}
4004
4005bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4006
4007 const unsigned Opc = Inst.getOpcode();
4008 const MCInstrDesc &Desc = MII.get(Opc);
4009
4010 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4011 return true;
4012
4013 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4014 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4015
4016 // GATHER4 instructions use dmask in a different fashion compared to
4017 // other MIMG instructions. The only useful DMASK values are
4018 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4019 // (red,red,red,red) etc.) The ISA document doesn't mention
4020 // this.
4021 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4022}
4023
4024bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4025 const unsigned Opc = Inst.getOpcode();
4026 const MCInstrDesc &Desc = MII.get(Opc);
4027
4028 if ((Desc.TSFlags & MIMGFlags) == 0)
4029 return true;
4030
4032 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4034
4035 if (!BaseOpcode->MSAA)
4036 return true;
4037
4038 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4039 assert(DimIdx != -1);
4040
4041 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4043
4044 return DimInfo->MSAA;
4045}
4046
4047static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4048{
4049 switch (Opcode) {
4050 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4051 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4052 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4053 return true;
4054 default:
4055 return false;
4056 }
4057}
4058
4059// movrels* opcodes should only allow VGPRS as src0.
4060// This is specified in .td description for vop1/vop3,
4061// but sdwa is handled differently. See isSDWAOperand.
4062bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4063 const OperandVector &Operands) {
4064
4065 const unsigned Opc = Inst.getOpcode();
4066 const MCInstrDesc &Desc = MII.get(Opc);
4067
4068 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4069 return true;
4070
4071 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4072 assert(Src0Idx != -1);
4073
4074 SMLoc ErrLoc;
4075 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4076 if (Src0.isReg()) {
4077 auto Reg = mc2PseudoReg(Src0.getReg());
4078 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4079 if (!isSGPR(Reg, TRI))
4080 return true;
4081 ErrLoc = getRegLoc(Reg, Operands);
4082 } else {
4083 ErrLoc = getConstLoc(Operands);
4084 }
4085
4086 Error(ErrLoc, "source operand must be a VGPR");
4087 return false;
4088}
4089
4090bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4091 const OperandVector &Operands) {
4092
4093 const unsigned Opc = Inst.getOpcode();
4094
4095 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4096 return true;
4097
4098 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4099 assert(Src0Idx != -1);
4100
4101 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4102 if (!Src0.isReg())
4103 return true;
4104
4105 auto Reg = mc2PseudoReg(Src0.getReg());
4106 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4107 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4108 Error(getRegLoc(Reg, Operands),
4109 "source operand must be either a VGPR or an inline constant");
4110 return false;
4111 }
4112
4113 return true;
4114}
4115
4116bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4117 const OperandVector &Operands) {
4118 unsigned Opcode = Inst.getOpcode();
4119 const MCInstrDesc &Desc = MII.get(Opcode);
4120
4121 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4122 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4123 return true;
4124
4125 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4126 if (Src2Idx == -1)
4127 return true;
4128
4129 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4130 Error(getConstLoc(Operands),
4131 "inline constants are not allowed for this operand");
4132 return false;
4133 }
4134
4135 return true;
4136}
4137
4138bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4139 const OperandVector &Operands) {
4140 const unsigned Opc = Inst.getOpcode();
4141 const MCInstrDesc &Desc = MII.get(Opc);
4142
4143 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4144 return true;
4145
4146 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4147 if (Src2Idx == -1)
4148 return true;
4149
4150 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4151 if (!Src2.isReg())
4152 return true;
4153
4154 MCRegister Src2Reg = Src2.getReg();
4155 MCRegister DstReg = Inst.getOperand(0).getReg();
4156 if (Src2Reg == DstReg)
4157 return true;
4158
4159 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4160 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4161 return true;
4162
4163 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4164 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
4165 "source 2 operand must not partially overlap with dst");
4166 return false;
4167 }
4168
4169 return true;
4170}
4171
4172bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4173 switch (Inst.getOpcode()) {
4174 default:
4175 return true;
4176 case V_DIV_SCALE_F32_gfx6_gfx7:
4177 case V_DIV_SCALE_F32_vi:
4178 case V_DIV_SCALE_F32_gfx10:
4179 case V_DIV_SCALE_F64_gfx6_gfx7:
4180 case V_DIV_SCALE_F64_vi:
4181 case V_DIV_SCALE_F64_gfx10:
4182 break;
4183 }
4184
4185 // TODO: Check that src0 = src1 or src2.
4186
4187 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4188 AMDGPU::OpName::src2_modifiers,
4189 AMDGPU::OpName::src2_modifiers}) {
4191 .getImm() &
4193 return false;
4194 }
4195 }
4196
4197 return true;
4198}
4199
4200bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4201
4202 const unsigned Opc = Inst.getOpcode();
4203 const MCInstrDesc &Desc = MII.get(Opc);
4204
4205 if ((Desc.TSFlags & MIMGFlags) == 0)
4206 return true;
4207
4208 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4209 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4210 if (isCI() || isSI())
4211 return false;
4212 }
4213
4214 return true;
4215}
4216
4217static bool IsRevOpcode(const unsigned Opcode)
4218{
4219 switch (Opcode) {
4220 case AMDGPU::V_SUBREV_F32_e32:
4221 case AMDGPU::V_SUBREV_F32_e64:
4222 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4223 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4224 case AMDGPU::V_SUBREV_F32_e32_vi:
4225 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4226 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4227 case AMDGPU::V_SUBREV_F32_e64_vi:
4228
4229 case AMDGPU::V_SUBREV_CO_U32_e32:
4230 case AMDGPU::V_SUBREV_CO_U32_e64:
4231 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4232 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4233
4234 case AMDGPU::V_SUBBREV_U32_e32:
4235 case AMDGPU::V_SUBBREV_U32_e64:
4236 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4237 case AMDGPU::V_SUBBREV_U32_e32_vi:
4238 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4239 case AMDGPU::V_SUBBREV_U32_e64_vi:
4240
4241 case AMDGPU::V_SUBREV_U32_e32:
4242 case AMDGPU::V_SUBREV_U32_e64:
4243 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4244 case AMDGPU::V_SUBREV_U32_e32_vi:
4245 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4246 case AMDGPU::V_SUBREV_U32_e64_vi:
4247
4248 case AMDGPU::V_SUBREV_F16_e32:
4249 case AMDGPU::V_SUBREV_F16_e64:
4250 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4251 case AMDGPU::V_SUBREV_F16_e32_vi:
4252 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4253 case AMDGPU::V_SUBREV_F16_e64_vi:
4254
4255 case AMDGPU::V_SUBREV_U16_e32:
4256 case AMDGPU::V_SUBREV_U16_e64:
4257 case AMDGPU::V_SUBREV_U16_e32_vi:
4258 case AMDGPU::V_SUBREV_U16_e64_vi:
4259
4260 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4261 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4262 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4263
4264 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4265 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4266
4267 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4268 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4269
4270 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4271 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4272
4273 case AMDGPU::V_LSHRREV_B32_e32:
4274 case AMDGPU::V_LSHRREV_B32_e64:
4275 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4276 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4277 case AMDGPU::V_LSHRREV_B32_e32_vi:
4278 case AMDGPU::V_LSHRREV_B32_e64_vi:
4279 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4280 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4281
4282 case AMDGPU::V_ASHRREV_I32_e32:
4283 case AMDGPU::V_ASHRREV_I32_e64:
4284 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4285 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4286 case AMDGPU::V_ASHRREV_I32_e32_vi:
4287 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4288 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4289 case AMDGPU::V_ASHRREV_I32_e64_vi:
4290
4291 case AMDGPU::V_LSHLREV_B32_e32:
4292 case AMDGPU::V_LSHLREV_B32_e64:
4293 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4294 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4295 case AMDGPU::V_LSHLREV_B32_e32_vi:
4296 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4297 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4298 case AMDGPU::V_LSHLREV_B32_e64_vi:
4299
4300 case AMDGPU::V_LSHLREV_B16_e32:
4301 case AMDGPU::V_LSHLREV_B16_e64:
4302 case AMDGPU::V_LSHLREV_B16_e32_vi:
4303 case AMDGPU::V_LSHLREV_B16_e64_vi:
4304 case AMDGPU::V_LSHLREV_B16_gfx10:
4305
4306 case AMDGPU::V_LSHRREV_B16_e32:
4307 case AMDGPU::V_LSHRREV_B16_e64:
4308 case AMDGPU::V_LSHRREV_B16_e32_vi:
4309 case AMDGPU::V_LSHRREV_B16_e64_vi:
4310 case AMDGPU::V_LSHRREV_B16_gfx10:
4311
4312 case AMDGPU::V_ASHRREV_I16_e32:
4313 case AMDGPU::V_ASHRREV_I16_e64:
4314 case AMDGPU::V_ASHRREV_I16_e32_vi:
4315 case AMDGPU::V_ASHRREV_I16_e64_vi:
4316 case AMDGPU::V_ASHRREV_I16_gfx10:
4317
4318 case AMDGPU::V_LSHLREV_B64_e64:
4319 case AMDGPU::V_LSHLREV_B64_gfx10:
4320 case AMDGPU::V_LSHLREV_B64_vi:
4321
4322 case AMDGPU::V_LSHRREV_B64_e64:
4323 case AMDGPU::V_LSHRREV_B64_gfx10:
4324 case AMDGPU::V_LSHRREV_B64_vi:
4325
4326 case AMDGPU::V_ASHRREV_I64_e64:
4327 case AMDGPU::V_ASHRREV_I64_gfx10:
4328 case AMDGPU::V_ASHRREV_I64_vi:
4329
4330 case AMDGPU::V_PK_LSHLREV_B16:
4331 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4332 case AMDGPU::V_PK_LSHLREV_B16_vi:
4333
4334 case AMDGPU::V_PK_LSHRREV_B16:
4335 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4336 case AMDGPU::V_PK_LSHRREV_B16_vi:
4337 case AMDGPU::V_PK_ASHRREV_I16:
4338 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4339 case AMDGPU::V_PK_ASHRREV_I16_vi:
4340 return true;
4341 default:
4342 return false;
4343 }
4344}
4345
4346std::optional<StringRef>
4347AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4348
4349 using namespace SIInstrFlags;
4350 const unsigned Opcode = Inst.getOpcode();
4351 const MCInstrDesc &Desc = MII.get(Opcode);
4352
4353 // lds_direct register is defined so that it can be used
4354 // with 9-bit operands only. Ignore encodings which do not accept these.
4355 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4356 if ((Desc.TSFlags & Enc) == 0)
4357 return std::nullopt;
4358
4359 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4360 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4361 if (SrcIdx == -1)
4362 break;
4363 const auto &Src = Inst.getOperand(SrcIdx);
4364 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4365
4366 if (isGFX90A() || isGFX11Plus())
4367 return StringRef("lds_direct is not supported on this GPU");
4368
4369 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4370 return StringRef("lds_direct cannot be used with this instruction");
4371
4372 if (SrcName != OpName::src0)
4373 return StringRef("lds_direct may be used as src0 only");
4374 }
4375 }
4376
4377 return std::nullopt;
4378}
4379
4380SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4381 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4382 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4383 if (Op.isFlatOffset())
4384 return Op.getStartLoc();
4385 }
4386 return getLoc();
4387}
4388
4389bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4390 const OperandVector &Operands) {
4391 auto Opcode = Inst.getOpcode();
4392 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4393 if (OpNum == -1)
4394 return true;
4395
4396 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4397 if ((TSFlags & SIInstrFlags::FLAT))
4398 return validateFlatOffset(Inst, Operands);
4399
4400 if ((TSFlags & SIInstrFlags::SMRD))
4401 return validateSMEMOffset(Inst, Operands);
4402
4403 const auto &Op = Inst.getOperand(OpNum);
4404 if (isGFX12Plus() &&
4405 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4406 const unsigned OffsetSize = 24;
4407 if (!isIntN(OffsetSize, Op.getImm())) {
4408 Error(getFlatOffsetLoc(Operands),
4409 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4410 return false;
4411 }
4412 } else {
4413 const unsigned OffsetSize = 16;
4414 if (!isUIntN(OffsetSize, Op.getImm())) {
4415 Error(getFlatOffsetLoc(Operands),
4416 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4417 return false;
4418 }
4419 }
4420 return true;
4421}
4422
4423bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4424 const OperandVector &Operands) {
4425 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4426 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4427 return true;
4428
4429 auto Opcode = Inst.getOpcode();
4430 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4431 assert(OpNum != -1);
4432
4433 const auto &Op = Inst.getOperand(OpNum);
4434 if (!hasFlatOffsets() && Op.getImm() != 0) {
4435 Error(getFlatOffsetLoc(Operands),
4436 "flat offset modifier is not supported on this GPU");
4437 return false;
4438 }
4439
4440 // For pre-GFX12 FLAT instructions the offset must be positive;
4441 // MSB is ignored and forced to zero.
4442 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4443 bool AllowNegative =
4445 isGFX12Plus();
4446 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4447 Error(getFlatOffsetLoc(Operands),
4448 Twine("expected a ") +
4449 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4450 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4451 return false;
4452 }
4453
4454 return true;
4455}
4456
4457SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4458 // Start with second operand because SMEM Offset cannot be dst or src0.
4459 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4460 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4461 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4462 return Op.getStartLoc();
4463 }
4464 return getLoc();
4465}
4466
4467bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4468 const OperandVector &Operands) {
4469 if (isCI() || isSI())
4470 return true;
4471
4472 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4473 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4474 return true;
4475
4476 auto Opcode = Inst.getOpcode();
4477 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4478 if (OpNum == -1)
4479 return true;
4480
4481 const auto &Op = Inst.getOperand(OpNum);
4482 if (!Op.isImm())
4483 return true;
4484
4485 uint64_t Offset = Op.getImm();
4486 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4489 return true;
4490
4491 Error(getSMEMOffsetLoc(Operands),
4492 isGFX12Plus() ? "expected a 24-bit signed offset"
4493 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4494 : "expected a 21-bit signed offset");
4495
4496 return false;
4497}
4498
4499bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4500 unsigned Opcode = Inst.getOpcode();
4501 const MCInstrDesc &Desc = MII.get(Opcode);
4502 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4503 return true;
4504
4505 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4506 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4507
4508 const int OpIndices[] = { Src0Idx, Src1Idx };
4509
4510 unsigned NumExprs = 0;
4511 unsigned NumLiterals = 0;
4513
4514 for (int OpIdx : OpIndices) {
4515 if (OpIdx == -1) break;
4516
4517 const MCOperand &MO = Inst.getOperand(OpIdx);
4518 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4519 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4520 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4521 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4522 if (NumLiterals == 0 || LiteralValue != Value) {
4524 ++NumLiterals;
4525 }
4526 } else if (MO.isExpr()) {
4527 ++NumExprs;
4528 }
4529 }
4530 }
4531
4532 return NumLiterals + NumExprs <= 1;
4533}
4534
4535bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4536 const unsigned Opc = Inst.getOpcode();
4537 if (isPermlane16(Opc)) {
4538 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4539 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4540
4541 if (OpSel & ~3)
4542 return false;
4543 }
4544
4545 uint64_t TSFlags = MII.get(Opc).TSFlags;
4546
4547 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4548 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4549 if (OpSelIdx != -1) {
4550 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4551 return false;
4552 }
4553 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4554 if (OpSelHiIdx != -1) {
4555 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4556 return false;
4557 }
4558 }
4559
4560 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4561 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4562 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4563 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4564 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4565 if (OpSel & 3)
4566 return false;
4567 }
4568
4569 return true;
4570}
4571
4572bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
4573 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4574
4575 const unsigned Opc = Inst.getOpcode();
4576 uint64_t TSFlags = MII.get(Opc).TSFlags;
4577
4578 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4579 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4580 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4581 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4582 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4583 !(TSFlags & SIInstrFlags::IsSWMMAC))
4584 return true;
4585
4586 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4587 if (NegIdx == -1)
4588 return true;
4589
4590 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4591
4592 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4593 // on some src operands but not allowed on other.
4594 // It is convenient that such instructions don't have src_modifiers operand
4595 // for src operands that don't allow neg because they also don't allow opsel.
4596
4597 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4598 AMDGPU::OpName::src1_modifiers,
4599 AMDGPU::OpName::src2_modifiers};
4600
4601 for (unsigned i = 0; i < 3; ++i) {
4602 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4603 if (Neg & (1 << i))
4604 return false;
4605 }
4606 }
4607
4608 return true;
4609}
4610
4611bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4612 const OperandVector &Operands) {
4613 const unsigned Opc = Inst.getOpcode();
4614 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4615 if (DppCtrlIdx >= 0) {
4616 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4617
4618 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4619 AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4620 // DP ALU DPP is supported for row_newbcast only on GFX9*
4621 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4622 Error(S, "DP ALU dpp only supports row_newbcast");
4623 return false;
4624 }
4625 }
4626
4627 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4628 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4629
4630 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4631 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4632 if (Src1Idx >= 0) {
4633 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4634 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4635 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
4636 auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());
4637 SMLoc S = getRegLoc(Reg, Operands);
4638 Error(S, "invalid operand for instruction");
4639 return false;
4640 }
4641 if (Src1.isImm()) {
4642 Error(getInstLoc(Operands),
4643 "src1 immediate operand invalid for instruction");
4644 return false;
4645 }
4646 }
4647 }
4648
4649 return true;
4650}
4651
4652// Check if VCC register matches wavefront size
4653bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4654 auto FB = getFeatureBits();
4655 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4656 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4657}
4658
4659// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4660bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4661 const OperandVector &Operands) {
4662 unsigned Opcode = Inst.getOpcode();
4663 const MCInstrDesc &Desc = MII.get(Opcode);
4664 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4665 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4666 !HasMandatoryLiteral && !isVOPD(Opcode))
4667 return true;
4668
4669 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4670
4671 unsigned NumExprs = 0;
4672 unsigned NumLiterals = 0;
4674
4675 for (int OpIdx : OpIndices) {
4676 if (OpIdx == -1)
4677 continue;
4678
4679 const MCOperand &MO = Inst.getOperand(OpIdx);
4680 if (!MO.isImm() && !MO.isExpr())
4681 continue;
4682 if (!isSISrcOperand(Desc, OpIdx))
4683 continue;
4684
4685 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4686 uint64_t Value = static_cast<uint64_t>(MO.getImm());
4687 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4688 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4689 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4690
4691 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4692 Error(getLitLoc(Operands), "invalid operand for instruction");
4693 return false;
4694 }
4695
4696 if (IsFP64 && IsValid32Op)
4697 Value = Hi_32(Value);
4698
4699 if (NumLiterals == 0 || LiteralValue != Value) {
4701 ++NumLiterals;
4702 }
4703 } else if (MO.isExpr()) {
4704 ++NumExprs;
4705 }
4706 }
4707 NumLiterals += NumExprs;
4708
4709 if (!NumLiterals)
4710 return true;
4711
4712 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4713 Error(getLitLoc(Operands), "literal operands are not supported");
4714 return false;
4715 }
4716
4717 if (NumLiterals > 1) {
4718 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4719 return false;
4720 }
4721
4722 return true;
4723}
4724
4725// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4726static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4727 const MCRegisterInfo *MRI) {
4728 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4729 if (OpIdx < 0)
4730 return -1;
4731
4732 const MCOperand &Op = Inst.getOperand(OpIdx);
4733 if (!Op.isReg())
4734 return -1;
4735
4736 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4737 auto Reg = Sub ? Sub : Op.getReg();
4738 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4739 return AGPR32.contains(Reg) ? 1 : 0;
4740}
4741
4742bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4743 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4744 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4746 SIInstrFlags::DS)) == 0)
4747 return true;
4748
4749 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4750 : AMDGPU::OpName::vdata;
4751
4752 const MCRegisterInfo *MRI = getMRI();
4753 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4754 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4755
4756 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4757 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4758 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4759 return false;
4760 }
4761
4762 auto FB = getFeatureBits();
4763 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4764 if (DataAreg < 0 || DstAreg < 0)
4765 return true;
4766 return DstAreg == DataAreg;
4767 }
4768
4769 return DstAreg < 1 && DataAreg < 1;
4770}
4771
4772bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4773 auto FB = getFeatureBits();
4774 if (!FB[AMDGPU::FeatureGFX90AInsts])
4775 return true;
4776
4777 const MCRegisterInfo *MRI = getMRI();
4778 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4779 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4780 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4781 const MCOperand &Op = Inst.getOperand(I);
4782 if (!Op.isReg())
4783 continue;
4784
4785 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4786 if (!Sub)
4787 continue;
4788
4789 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4790 return false;
4791 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4792 return false;
4793 }
4794
4795 return true;
4796}
4797
4798SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4799 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4800 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4801 if (Op.isBLGP())
4802 return Op.getStartLoc();
4803 }
4804 return SMLoc();
4805}
4806
4807bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4808 const OperandVector &Operands) {
4809 unsigned Opc = Inst.getOpcode();
4810 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4811 if (BlgpIdx == -1)
4812 return true;
4813 SMLoc BLGPLoc = getBLGPLoc(Operands);
4814 if (!BLGPLoc.isValid())
4815 return true;
4816 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4817 auto FB = getFeatureBits();
4818 bool UsesNeg = false;
4819 if (FB[AMDGPU::FeatureGFX940Insts]) {
4820 switch (Opc) {
4821 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4822 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4823 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4824 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4825 UsesNeg = true;
4826 }
4827 }
4828
4829 if (IsNeg == UsesNeg)
4830 return true;
4831
4832 Error(BLGPLoc,
4833 UsesNeg ? "invalid modifier: blgp is not supported"
4834 : "invalid modifier: neg is not supported");
4835
4836 return false;
4837}
4838
4839bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4840 const OperandVector &Operands) {
4841 if (!isGFX11Plus())
4842 return true;
4843
4844 unsigned Opc = Inst.getOpcode();
4845 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4846 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4847 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4848 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4849 return true;
4850
4851 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4852 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4853 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4854 if (Reg == AMDGPU::SGPR_NULL)
4855 return true;
4856
4857 SMLoc RegLoc = getRegLoc(Reg, Operands);
4858 Error(RegLoc, "src0 must be null");
4859 return false;
4860}
4861
4862bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4863 const OperandVector &Operands) {
4864 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4865 if ((TSFlags & SIInstrFlags::DS) == 0)
4866 return true;
4867 if (TSFlags & SIInstrFlags::GWS)
4868 return validateGWS(Inst, Operands);
4869 // Only validate GDS for non-GWS instructions.
4870 if (hasGDS())
4871 return true;
4872 int GDSIdx =
4873 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4874 if (GDSIdx < 0)
4875 return true;
4876 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4877 if (GDS) {
4878 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4879 Error(S, "gds modifier is not supported on this GPU");
4880 return false;
4881 }
4882 return true;
4883}
4884
4885// gfx90a has an undocumented limitation:
4886// DS_GWS opcodes must use even aligned registers.
4887bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4888 const OperandVector &Operands) {
4889 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4890 return true;
4891
4892 int Opc = Inst.getOpcode();
4893 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4894 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4895 return true;
4896
4897 const MCRegisterInfo *MRI = getMRI();
4898 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4899 int Data0Pos =
4900 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4901 assert(Data0Pos != -1);
4902 auto Reg = Inst.getOperand(Data0Pos).getReg();
4903 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4904 if (RegIdx & 1) {
4905 SMLoc RegLoc = getRegLoc(Reg, Operands);
4906 Error(RegLoc, "vgpr must be even aligned");
4907 return false;
4908 }
4909
4910 return true;
4911}
4912
4913bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4914 const OperandVector &Operands,
4915 const SMLoc &IDLoc) {
4916 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4917 AMDGPU::OpName::cpol);
4918 if (CPolPos == -1)
4919 return true;
4920
4921 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4922
4923 if (isGFX12Plus())
4924 return validateTHAndScopeBits(Inst, Operands, CPol);
4925
4926 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4927 if (TSFlags & SIInstrFlags::SMRD) {
4928 if (CPol && (isSI() || isCI())) {
4929 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4930 Error(S, "cache policy is not supported for SMRD instructions");
4931 return false;
4932 }
4933 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4934 Error(IDLoc, "invalid cache policy for SMEM instruction");
4935 return false;
4936 }
4937 }
4938
4939 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4940 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4943 if (!(TSFlags & AllowSCCModifier)) {
4944 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4945 StringRef CStr(S.getPointer());
4946 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4947 Error(S,
4948 "scc modifier is not supported for this instruction on this GPU");
4949 return false;
4950 }
4951 }
4952
4954 return true;
4955
4956 if (TSFlags & SIInstrFlags::IsAtomicRet) {
4957 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4958 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4959 : "instruction must use glc");
4960 return false;
4961 }
4962 } else {
4963 if (CPol & CPol::GLC) {
4964 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4965 StringRef CStr(S.getPointer());
4967 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4968 Error(S, isGFX940() ? "instruction must not use sc0"
4969 : "instruction must not use glc");
4970 return false;
4971 }
4972 }
4973
4974 return true;
4975}
4976
4977bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4978 const OperandVector &Operands,
4979 const unsigned CPol) {
4980 const unsigned TH = CPol & AMDGPU::CPol::TH;
4981 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
4982
4983 const unsigned Opcode = Inst.getOpcode();
4984 const MCInstrDesc &TID = MII.get(Opcode);
4985
4986 auto PrintError = [&](StringRef Msg) {
4987 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4988 Error(S, Msg);
4989 return false;
4990 };
4991
4992 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
4995 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
4996
4997 if (TH == 0)
4998 return true;
4999
5000 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5001 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5002 (TH == AMDGPU::CPol::TH_NT_HT)))
5003 return PrintError("invalid th value for SMEM instruction");
5004
5005 if (TH == AMDGPU::CPol::TH_BYPASS) {
5006 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5008 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5010 return PrintError("scope and th combination is not valid");
5011 }
5012
5013 bool IsStore = TID.mayStore();
5014 bool IsAtomic =
5016
5017 if (IsAtomic) {
5018 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5019 return PrintError("invalid th value for atomic instructions");
5020 } else if (IsStore) {
5021 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5022 return PrintError("invalid th value for store instructions");
5023 } else {
5024 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5025 return PrintError("invalid th value for load instructions");
5026 }
5027
5028 return true;
5029}
5030
5031bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
5032 if (!isGFX11Plus())
5033 return true;
5034 for (auto &Operand : Operands) {
5035 if (!Operand->isReg())
5036 continue;
5037 unsigned Reg = Operand->getReg();
5038 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
5039 Error(getRegLoc(Reg, Operands),
5040 "execz and vccz are not supported on this GPU");
5041 return false;
5042 }
5043 }
5044 return true;
5045}
5046
5047bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5048 const OperandVector &Operands) {
5049 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5050 if (Desc.mayStore() &&
5052 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5053 if (Loc != getInstLoc(Operands)) {
5054 Error(Loc, "TFE modifier has no meaning for store instructions");
5055 return false;
5056 }
5057 }
5058
5059 return true;
5060}
5061
5062bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5063 const SMLoc &IDLoc,
5064 const OperandVector &Operands) {
5065 if (auto ErrMsg = validateLdsDirect(Inst)) {
5066 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
5067 return false;
5068 }
5069 if (!validateSOPLiteral(Inst)) {
5070 Error(getLitLoc(Operands),
5071 "only one unique literal operand is allowed");
5072 return false;
5073 }
5074 if (!validateVOPLiteral(Inst, Operands)) {
5075 return false;
5076 }
5077 if (!validateConstantBusLimitations(Inst, Operands)) {
5078 return false;
5079 }
5080 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5081 return false;
5082 }
5083 if (!validateIntClampSupported(Inst)) {
5084 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
5085 "integer clamping is not supported on this GPU");
5086 return false;
5087 }
5088 if (!validateOpSel(Inst)) {
5089 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5090 "invalid op_sel operand");
5091 return false;
5092 }
5093 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5094 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5095 "invalid neg_lo operand");
5096 return false;
5097 }
5098 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5099 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5100 "invalid neg_hi operand");
5101 return false;
5102 }
5103 if (!validateDPP(Inst, Operands)) {
5104 return false;
5105 }
5106 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5107 if (!validateMIMGD16(Inst)) {
5108 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5109 "d16 modifier is not supported on this GPU");
5110 return false;
5111 }
5112 if (!validateMIMGMSAA(Inst)) {
5113 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5114 "invalid dim; must be MSAA type");
5115 return false;
5116 }
5117 if (!validateMIMGDataSize(Inst, IDLoc)) {
5118 return false;
5119 }
5120 if (!validateMIMGAddrSize(Inst, IDLoc))
5121 return false;
5122 if (!validateMIMGAtomicDMask(Inst)) {
5123 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5124 "invalid atomic image dmask");
5125 return false;
5126 }
5127 if (!validateMIMGGatherDMask(Inst)) {
5128 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5129 "invalid image_gather dmask: only one bit must be set");
5130 return false;
5131 }
5132 if (!validateMovrels(Inst, Operands)) {
5133 return false;
5134 }
5135 if (!validateOffset(Inst, Operands)) {
5136 return false;
5137 }
5138 if (!validateMAIAccWrite(Inst, Operands)) {
5139 return false;
5140 }
5141 if (!validateMAISrc2(Inst, Operands)) {
5142 return false;
5143 }
5144 if (!validateMFMA(Inst, Operands)) {
5145 return false;
5146 }
5147 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5148 return false;
5149 }
5150
5151 if (!validateAGPRLdSt(Inst)) {
5152 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5153 ? "invalid register class: data and dst should be all VGPR or AGPR"
5154 : "invalid register class: agpr loads and stores not supported on this GPU"
5155 );
5156 return false;
5157 }
5158 if (!validateVGPRAlign(Inst)) {
5159 Error(IDLoc,
5160 "invalid register class: vgpr tuples must be 64 bit aligned");
5161 return false;
5162 }
5163 if (!validateDS(Inst, Operands)) {
5164 return false;
5165 }
5166
5167 if (!validateBLGP(Inst, Operands)) {
5168 return false;
5169 }
5170
5171 if (!validateDivScale(Inst)) {
5172 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5173 return false;
5174 }
5175 if (!validateWaitCnt(Inst, Operands)) {
5176 return false;
5177 }
5178 if (!validateExeczVcczOperands(Operands)) {
5179 return false;
5180 }
5181 if (!validateTFE(Inst, Operands)) {
5182 return false;
5183 }
5184
5185 return true;
5186}
5187
5189 const FeatureBitset &FBS,
5190 unsigned VariantID = 0);
5191
5192static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5193 const FeatureBitset &AvailableFeatures,
5194 unsigned VariantID);
5195
5196bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5197 const FeatureBitset &FBS) {
5198 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5199}
5200
5201bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5202 const FeatureBitset &FBS,
5203 ArrayRef<unsigned> Variants) {
5204 for (auto Variant : Variants) {
5205 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5206 return true;
5207 }
5208
5209 return false;
5210}
5211
5212bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5213 const SMLoc &IDLoc) {
5214 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5215
5216 // Check if requested instruction variant is supported.
5217 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5218 return false;
5219
5220 // This instruction is not supported.
5221 // Clear any other pending errors because they are no longer relevant.
5222 getParser().clearPendingErrors();
5223
5224 // Requested instruction variant is not supported.
5225 // Check if any other variants are supported.
5226 StringRef VariantName = getMatchedVariantName();
5227 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5228 return Error(IDLoc,
5229 Twine(VariantName,
5230 " variant of this instruction is not supported"));
5231 }
5232
5233 // Check if this instruction may be used with a different wavesize.
5234 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5235 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5236
5237 FeatureBitset FeaturesWS32 = getFeatureBits();
5238 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5239 .flip(AMDGPU::FeatureWavefrontSize32);
5240 FeatureBitset AvailableFeaturesWS32 =
5241 ComputeAvailableFeatures(FeaturesWS32);
5242
5243 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5244 return Error(IDLoc, "instruction requires wavesize=32");
5245 }
5246
5247 // Finally check if this instruction is supported on any other GPU.
5248 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5249 return Error(IDLoc, "instruction not supported on this GPU");
5250 }
5251
5252 // Instruction not supported on any GPU. Probably a typo.
5253 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5254 return Error(IDLoc, "invalid instruction" + Suggestion);
5255}
5256
5258 uint64_t InvalidOprIdx) {
5259 assert(InvalidOprIdx < Operands.size());
5260 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5261 if (Op.isToken() && InvalidOprIdx > 1) {
5262 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5263 return PrevOp.isToken() && PrevOp.getToken() == "::";
5264 }
5265 return false;
5266}
5267
5268bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5270 MCStreamer &Out,
5272 bool MatchingInlineAsm) {
5273 MCInst Inst;
5274 unsigned Result = Match_Success;
5275 for (auto Variant : getMatchedVariants()) {
5276 uint64_t EI;
5277 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5278 Variant);
5279 // We order match statuses from least to most specific. We use most specific
5280 // status as resulting
5281 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
5282 if ((R == Match_Success) ||
5283 (R == Match_PreferE32) ||
5284 (R == Match_MissingFeature && Result != Match_PreferE32) ||
5285 (R == Match_InvalidOperand && Result != Match_MissingFeature
5286 && Result != Match_PreferE32) ||
5287 (R == Match_MnemonicFail && Result != Match_InvalidOperand
5288 && Result != Match_MissingFeature
5289 && Result != Match_PreferE32)) {
5290 Result = R;
5291 ErrorInfo = EI;
5292 }
5293 if (R == Match_Success)
5294 break;
5295 }
5296
5297 if (Result == Match_Success) {
5298 if (!validateInstruction(Inst, IDLoc, Operands)) {
5299 return true;
5300 }
5301 Inst.setLoc(IDLoc);
5302 Out.emitInstruction(Inst, getSTI());
5303 return false;
5304 }
5305
5306 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5307 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5308 return true;
5309 }
5310
5311 switch (Result) {
5312 default: break;
5313 case Match_MissingFeature:
5314 // It has been verified that the specified instruction
5315 // mnemonic is valid. A match was found but it requires
5316 // features which are not supported on this GPU.
5317 return Error(IDLoc, "operands are not valid for this GPU or mode");
5318
5319 case Match_InvalidOperand: {
5320 SMLoc ErrorLoc = IDLoc;
5321 if (ErrorInfo != ~0ULL) {
5322 if (ErrorInfo >= Operands.size()) {
5323 return Error(IDLoc, "too few operands for instruction");
5324 }
5325 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5326 if (ErrorLoc == SMLoc())
5327 ErrorLoc = IDLoc;
5328
5330 return Error(ErrorLoc, "invalid VOPDY instruction");
5331 }
5332 return Error(ErrorLoc, "invalid operand for instruction");
5333 }
5334
5335 case Match_PreferE32:
5336 return Error(IDLoc, "internal error: instruction without _e64 suffix "
5337 "should be encoded as e32");
5338 case Match_MnemonicFail:
5339 llvm_unreachable("Invalid instructions should have been handled already");
5340 }
5341 llvm_unreachable("Implement any new match types added!");
5342}
5343
5344bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5345 int64_t Tmp = -1;
5346 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5347 return true;
5348 }
5349 if (getParser().parseAbsoluteExpression(Tmp)) {
5350 return true;
5351 }
5352 Ret = static_cast<uint32_t>(Tmp);
5353 return false;
5354}
5355
5356bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5357 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5358 return TokError("directive only supported for amdgcn architecture");
5359
5360 std::string TargetIDDirective;
5361 SMLoc TargetStart = getTok().getLoc();
5362 if (getParser().parseEscapedString(TargetIDDirective))
5363 return true;
5364
5365 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5366 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5367 return getParser().Error(TargetRange.Start,
5368 (Twine(".amdgcn_target directive's target id ") +
5369 Twine(TargetIDDirective) +
5370 Twine(" does not match the specified target id ") +
5371 Twine(getTargetStreamer().getTargetID()->toString())).str());
5372
5373 return false;
5374}
5375
5376bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5377 return Error(Range.Start, "value out of range", Range);
5378}
5379
5380bool AMDGPUAsmParser::calculateGPRBlocks(
5381 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
5382 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
5383 unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
5384 SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
5385 // TODO(scott.linder): These calculations are duplicated from
5386 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5387 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5388
5389 unsigned NumVGPRs = NextFreeVGPR;
5390 unsigned NumSGPRs = NextFreeSGPR;
5391
5392 if (Version.Major >= 10)
5393 NumSGPRs = 0;
5394 else {
5395 unsigned MaxAddressableNumSGPRs =
5397
5398 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
5399 NumSGPRs > MaxAddressableNumSGPRs)
5400 return OutOfRangeError(SGPRRange);
5401
5402 NumSGPRs +=
5403 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
5404
5405 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5406 NumSGPRs > MaxAddressableNumSGPRs)
5407 return OutOfRangeError(SGPRRange);
5408
5409 if (Features.test(FeatureSGPRInitBug))
5411 }
5412
5413 VGPRBlocks = IsaInfo::getEncodedNumVGPRBlocks(&getSTI(), NumVGPRs,
5414 EnableWavefrontSize32);
5415 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
5416
5417 return false;
5418}
5419
5420bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5421 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5422 return TokError("directive only supported for amdgcn architecture");
5423
5424 if (!isHsaAbi(getSTI()))
5425 return TokError("directive only supported for amdhsa OS");
5426
5427 StringRef KernelName;
5428 if (getParser().parseIdentifier(KernelName))
5429 return true;
5430
5433 &getSTI(), getContext());
5434
5435 StringSet<> Seen;
5436
5437 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5438
5439 SMRange VGPRRange;
5440 uint64_t NextFreeVGPR = 0;
5441 uint64_t AccumOffset = 0;
5442 uint64_t SharedVGPRCount = 0;
5443 uint64_t PreloadLength = 0;
5444 uint64_t PreloadOffset = 0;
5445 SMRange SGPRRange;
5446 uint64_t NextFreeSGPR = 0;
5447
5448 // Count the number of user SGPRs implied from the enabled feature bits.
5449 unsigned ImpliedUserSGPRCount = 0;
5450
5451 // Track if the asm explicitly contains the directive for the user SGPR
5452 // count.
5453 std::optional<unsigned> ExplicitUserSGPRCount;
5454 bool ReserveVCC = true;
5455 bool ReserveFlatScr = true;
5456 std::optional<bool> EnableWavefrontSize32;
5457
5458 while (true) {
5459 while (trySkipToken(AsmToken::EndOfStatement));
5460
5461 StringRef ID;
5462 SMRange IDRange = getTok().getLocRange();
5463 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5464 return true;
5465
5466 if (ID == ".end_amdhsa_kernel")
5467 break;
5468
5469 if (!Seen.insert(ID).second)
5470 return TokError(".amdhsa_ directives cannot be repeated");
5471
5472 SMLoc ValStart = getLoc();
5473 const MCExpr *ExprVal;
5474 if (getParser().parseExpression(ExprVal))
5475 return true;
5476 SMLoc ValEnd = getLoc();
5477 SMRange ValRange = SMRange(ValStart, ValEnd);
5478
5479 int64_t IVal = 0;
5480 uint64_t Val = IVal;
5481 bool EvaluatableExpr;
5482 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5483 if (IVal < 0)
5484 return OutOfRangeError(ValRange);
5485 Val = IVal;
5486 }
5487
5488#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5489 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5490 return OutOfRangeError(RANGE); \
5491 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5492 getContext());
5493
5494// Some fields use the parsed value immediately which requires the expression to
5495// be solvable.
5496#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5497 if (!(RESOLVED)) \
5498 return Error(IDRange.Start, "directive should have resolvable expression", \
5499 IDRange);
5500
5501 if (ID == ".amdhsa_group_segment_fixed_size") {
5503 CHAR_BIT>(Val))
5504 return OutOfRangeError(ValRange);
5505 KD.group_segment_fixed_size = ExprVal;
5506 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5508 CHAR_BIT>(Val))
5509 return OutOfRangeError(ValRange);
5510 KD.private_segment_fixed_size = ExprVal;
5511 } else if (ID == ".amdhsa_kernarg_size") {
5512 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
5513 return OutOfRangeError(ValRange);
5514 KD.kernarg_size = ExprVal;
5515 } else if (ID == ".amdhsa_user_sgpr_count") {
5516 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5517 ExplicitUserSGPRCount = Val;
5518 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5519 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5521 return Error(IDRange.Start,
5522 "directive is not supported with architected flat scratch",
5523 IDRange);
5525 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5526 ExprVal, ValRange);
5527 if (Val)
5528 ImpliedUserSGPRCount += 4;
5529 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5530 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5531 if (!hasKernargPreload())
5532 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5533
5534 if (Val > getMaxNumUserSGPRs())
5535 return OutOfRangeError(ValRange);
5536 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
5537 ValRange);
5538 if (Val) {
5539 ImpliedUserSGPRCount += Val;
5540 PreloadLength = Val;
5541 }
5542 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5543 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5544 if (!hasKernargPreload())
5545 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5546
5547 if (Val >= 1024)
5548 return OutOfRangeError(ValRange);
5549 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
5550 ValRange);
5551 if (Val)
5552 PreloadOffset = Val;
5553 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5554 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5556 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5557 ValRange);
5558 if (Val)
5559 ImpliedUserSGPRCount += 2;
5560 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5561 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5563 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5564 ValRange);
5565 if (Val)
5566 ImpliedUserSGPRCount += 2;
5567 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5568 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5570 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5571 ExprVal, ValRange);
5572 if (Val)
5573 ImpliedUserSGPRCount += 2;
5574 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5575 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5577 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5578 ValRange);
5579 if (Val)
5580 ImpliedUserSGPRCount += 2;
5581 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5583 return Error(IDRange.Start,
5584 "directive is not supported with architected flat scratch",
5585 IDRange);
5586 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5588 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5589 ExprVal, ValRange);
5590 if (Val)
5591 ImpliedUserSGPRCount += 2;
5592 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5593 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5595 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5596 ExprVal, ValRange);
5597 if (Val)
5598 ImpliedUserSGPRCount += 1;
5599 } else if (ID == ".amdhsa_wavefront_size32") {
5600 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5601 if (IVersion.Major < 10)
5602 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5603 EnableWavefrontSize32 = Val;
5605 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5606 ValRange);
5607 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5609 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5610 ValRange);
5611 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5613 return Error(IDRange.Start,
5614 "directive is not supported with architected flat scratch",
5615 IDRange);
5617 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5618 ValRange);
5619 } else if (ID == ".amdhsa_enable_private_segment") {
5621 return Error(
5622 IDRange.Start,
5623 "directive is not supported without architected flat scratch",
5624 IDRange);
5626 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5627 ValRange);
5628 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5630 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5631 ValRange);
5632 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5634 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5635 ValRange);
5636 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5638 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5639 ValRange);
5640 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5642 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5643 ValRange);
5644 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5646 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5647 ValRange);
5648 } else if (ID == ".amdhsa_next_free_vgpr") {
5649 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5650 VGPRRange = ValRange;
5651 NextFreeVGPR = Val;
5652 } else if (ID == ".amdhsa_next_free_sgpr") {
5653 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5654 SGPRRange = ValRange;
5655 NextFreeSGPR = Val;
5656 } else if (ID == ".amdhsa_accum_offset") {
5657 if (!isGFX90A())
5658 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5659 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5660 AccumOffset = Val;
5661 } else if (ID == ".amdhsa_reserve_vcc") {
5662 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5663 if (!isUInt<1>(Val))
5664 return OutOfRangeError(ValRange);
5665 ReserveVCC = Val;
5666 } else if (ID == ".amdhsa_reserve_flat_scratch") {
5667 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5668 if (IVersion.Major < 7)
5669 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5671 return Error(IDRange.Start,
5672 "directive is not supported with architected flat scratch",
5673 IDRange);
5674 if (!isUInt<1>(Val))
5675 return OutOfRangeError(ValRange);
5676 ReserveFlatScr = Val;
5677 } else if (ID == ".amdhsa_reserve_xnack_mask") {
5678 if (IVersion.Major < 8)
5679 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5680 if (!isUInt<1>(Val))
5681 return OutOfRangeError(ValRange);
5682 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5683 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5684 IDRange);
5685 } else if (ID == ".amdhsa_float_round_mode_32") {
5687 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5688 ValRange);
5689 } else if (ID == ".amdhsa_float_round_mode_16_64") {
5691 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5692 ValRange);
5693 } else if (ID == ".amdhsa_float_denorm_mode_32") {
5695 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5696 ValRange);
5697 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5699 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5700 ValRange);
5701 } else if (ID == ".amdhsa_dx10_clamp") {
5702 if (IVersion.Major >= 12)
5703 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5705 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5706 ValRange);
5707 } else if (ID == ".amdhsa_ieee_mode") {
5708 if (IVersion.Major >= 12)
5709 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5711 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5712 ValRange);
5713 } else if (ID == ".amdhsa_fp16_overflow") {
5714 if (IVersion.Major < 9)
5715 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5717 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5718 ValRange);
5719 } else if (ID == ".amdhsa_tg_split") {
5720 if (!isGFX90A())
5721 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5722 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
5723 ExprVal, ValRange);
5724 } else if (ID == ".amdhsa_workgroup_processor_mode") {
5725 if (IVersion.Major < 10)
5726 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5728 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5729 ValRange);
5730 } else if (ID == ".amdhsa_memory_ordered") {
5731 if (IVersion.Major < 10)
5732 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5734 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5735 ValRange);
5736 } else if (ID == ".amdhsa_forward_progress") {
5737 if (IVersion.Major < 10)
5738 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5740 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5741 ValRange);
5742 } else if (ID == ".amdhsa_shared_vgpr_count") {
5743 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5744 if (IVersion.Major < 10 || IVersion.Major >= 12)
5745 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
5746 IDRange);
5747 SharedVGPRCount = Val;
5749 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5750 ValRange);
5751 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5754 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5755 ExprVal, ValRange);
5756 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5758 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5759 ExprVal, ValRange);
5760 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5763 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5764 ExprVal, ValRange);
5765 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5767 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5768 ExprVal, ValRange);
5769 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5771 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5772 ExprVal, ValRange);
5773 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5775 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5776 ExprVal, ValRange);
5777 } else if (ID == ".amdhsa_exception_int_div_zero") {
5779 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5780 ExprVal, ValRange);
5781 } else if (ID == ".amdhsa_round_robin_scheduling") {
5782 if (IVersion.Major < 12)
5783 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
5785 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5786 ValRange);
5787 } else {
5788 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5789 }
5790
5791#undef PARSE_BITS_ENTRY
5792 }
5793
5794 if (!Seen.contains(".amdhsa_next_free_vgpr"))
5795 return TokError(".amdhsa_next_free_vgpr directive is required");
5796
5797 if (!Seen.contains(".amdhsa_next_free_sgpr"))
5798 return TokError(".amdhsa_next_free_sgpr directive is required");
5799
5800 unsigned VGPRBlocks;
5801 unsigned SGPRBlocks;
5802 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5803 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5804 EnableWavefrontSize32, NextFreeVGPR,
5805 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5806 SGPRBlocks))
5807 return true;
5808
5809 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5810 VGPRBlocks))
5811 return OutOfRangeError(VGPRRange);
5813 KD.compute_pgm_rsrc1, MCConstantExpr::create(VGPRBlocks, getContext()),
5814 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5815 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
5816
5817 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5818 SGPRBlocks))
5819 return OutOfRangeError(SGPRRange);
5821 KD.compute_pgm_rsrc1, MCConstantExpr::create(SGPRBlocks, getContext()),
5822 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5823 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
5824
5825 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5826 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5827 "enabled user SGPRs");
5828
5829 unsigned UserSGPRCount =
5830 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5831
5832 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5833 return TokError("too many user SGPRs enabled");
5835 KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
5836 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5837 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
5838
5839 int64_t IVal = 0;
5840 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
5841 return TokError("Kernarg size should be resolvable");
5842 uint64_t kernarg_size = IVal;
5843 if (PreloadLength && kernarg_size &&
5844 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5845 return TokError("Kernarg preload length + offset is larger than the "
5846 "kernarg segment size");
5847
5848 if (isGFX90A()) {
5849 if (!Seen.contains(".amdhsa_accum_offset"))
5850 return TokError(".amdhsa_accum_offset directive is required");
5851 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5852 return TokError("accum_offset should be in range [4..256] in "
5853 "increments of 4");
5854 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5855 return TokError("accum_offset exceeds total VGPR allocation");
5858 MCConstantExpr::create(AccumOffset / 4 - 1, getContext()),
5859 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5860 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
5861 }
5862
5863 if (IVersion.Major >= 10 && IVersion.Major < 12) {
5864 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5865 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5866 return TokError("shared_vgpr_count directive not valid on "
5867 "wavefront size 32");
5868 }
5869 if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5870 return TokError("shared_vgpr_count*2 + "
5871 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5872 "exceed 63\n");
5873 }
5874 }
5875
5876 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5877 NextFreeVGPR, NextFreeSGPR,
5878 ReserveVCC, ReserveFlatScr);
5879 return false;
5880}
5881
5882bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5884 if (ParseAsAbsoluteExpression(Version))
5885 return true;
5886
5887 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5888 return false;
5889}
5890
5891bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5892 amd_kernel_code_t &Header) {
5893 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5894 // assembly for backwards compatibility.
5895 if (ID == "max_scratch_backing_memory_byte_size") {
5896 Parser.eatToEndOfStatement();
5897 return false;
5898 }
5899
5900 SmallString<40> ErrStr;
5901 raw_svector_ostream Err(ErrStr);
5902 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5903 return TokError(Err.str());
5904 }
5905 Lex();
5906
5907 if (ID == "enable_dx10_clamp") {
5908 if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
5909 isGFX12Plus())
5910 return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
5911 }
5912
5913 if (ID == "enable_ieee_mode") {
5914 if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
5915 isGFX12Plus())
5916 return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
5917 }
5918
5919 if (ID == "enable_wavefront_size32") {
5920 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5921 if (!isGFX10Plus())
5922 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5923 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5924 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5925 } else {
5926 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5927 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5928 }
5929 }
5930
5931 if (ID == "wavefront_size") {
5932 if (Header.wavefront_size == 5) {
5933 if (!isGFX10Plus())
5934 return TokError("wavefront_size=5 is only allowed on GFX10+");
5935 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5936 return TokError("wavefront_size=5 requires +WavefrontSize32");
5937 } else if (Header.wavefront_size == 6) {
5938 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5939 return TokError("wavefront_size=6 requires +WavefrontSize64");
5940 }
5941 }
5942
5943 if (ID == "enable_wgp_mode") {
5944 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5945 !isGFX10Plus())
5946 return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5947 }
5948
5949 if (ID == "enable_mem_ordered") {
5950 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5951 !isGFX10Plus())
5952 return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5953 }
5954
5955 if (ID == "enable_fwd_progress") {
5956 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5957 !isGFX10Plus())
5958 return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5959 }
5960
5961 return false;
5962}
5963
5964bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5965 amd_kernel_code_t Header;
5966 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5967
5968 while (true) {
5969 // Lex EndOfStatement. This is in a while loop, because lexing a comment
5970 // will set the current token to EndOfStatement.
5971 while(trySkipToken(AsmToken::EndOfStatement));
5972
5973 StringRef ID;
5974 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5975 return true;
5976
5977 if (ID == ".end_amd_kernel_code_t")
5978 break;
5979
5980 if (ParseAMDKernelCodeTValue(ID, Header))
5981 return true;
5982 }
5983
5984 getTargetStreamer().EmitAMDKernelCodeT(Header);
5985
5986 return false;
5987}
5988
5989bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5990 StringRef KernelName;
5991 if (!parseId(KernelName, "expected symbol name"))
5992 return true;
5993
5994 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5996
5997 KernelScope.initialize(getContext());
5998 return false;
5999}
6000
6001bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6002 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
6003 return Error(getLoc(),
6004 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6005 "architectures");
6006 }
6007
6008 auto TargetIDDirective = getLexer().getTok().getStringContents();
6009 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6010 return Error(getParser().getTok().getLoc(), "target id must match options");
6011
6012 getTargetStreamer().EmitISAVersion();
6013 Lex();
6014
6015 return false;
6016}
6017
6018bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6019 assert(isHsaAbi(getSTI()));
6020
6021 std::string HSAMetadataString;
6022 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6023 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6024 return true;
6025
6026 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6027 return Error(getLoc(), "invalid HSA metadata");
6028
6029 return false;
6030}
6031
6032/// Common code to parse out a block of text (typically YAML) between start and
6033/// end directives.
6034bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6035 const char *AssemblerDirectiveEnd,
6036 std::string &CollectString) {
6037
6038 raw_string_ostream CollectStream(CollectString);
6039
6040 getLexer().setSkipSpace(false);
6041
6042 bool FoundEnd = false;
6043 while (!isToken(AsmToken::Eof)) {
6044 while (isToken(AsmToken::Space)) {
6045 CollectStream << getTokenStr();
6046 Lex();
6047 }
6048
6049 if (trySkipId(AssemblerDirectiveEnd)) {
6050 FoundEnd = true;
6051 break;
6052 }
6053
6054 CollectStream << Parser.parseStringToEndOfStatement()
6055 << getContext().getAsmInfo()->getSeparatorString();
6056
6057 Parser.eatToEndOfStatement();
6058 }
6059
6060 getLexer().setSkipSpace(true);
6061
6062 if (isToken(AsmToken::Eof) && !FoundEnd) {
6063 return TokError(Twine("expected directive ") +
6064 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6065 }
6066
6067 CollectStream.flush();
6068 return false;
6069}
6070
6071/// Parse the assembler directive for new MsgPack-format PAL metadata.
6072bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6073 std::string String;
6074 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6076 return true;
6077
6078 auto PALMetadata = getTargetStreamer().getPALMetadata();
6079 if (!PALMetadata->setFromString(String))
6080 return Error(getLoc(), "invalid PAL metadata");
6081 return false;
6082}
6083
6084/// Parse the assembler directive for old linear-format PAL metadata.
6085bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6086 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6087 return Error(getLoc(),
6088 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6089 "not available on non-amdpal OSes")).str());
6090 }
6091
6092 auto PALMetadata = getTargetStreamer().getPALMetadata();
6093 PALMetadata->setLegacy();
6094 for (;;) {
6096 if (ParseAsAbsoluteExpression(Key)) {
6097 return TokError(Twine("invalid value in ") +
6099 }
6100 if (!trySkipToken(AsmToken::Comma)) {
6101 return TokError(Twine("expected an even number of values in ") +
6103 }
6104 if (ParseAsAbsoluteExpression(Value)) {
6105 return TokError(Twine("invalid value in ") +
6107 }
6108 PALMetadata->setRegister(Key, Value);
6109 if (!trySkipToken(AsmToken::Comma))
6110 break;
6111 }
6112 return false;
6113}
6114
6115/// ParseDirectiveAMDGPULDS
6116/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6117bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6118 if (getParser().checkForValidSection())
6119 return true;
6120
6122 SMLoc NameLoc = getLoc();
6123 if (getParser().parseIdentifier(Name))
6124 return TokError("expected identifier in directive");
6125
6126 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6127 if (getParser().parseComma())
6128 return true;
6129
6130 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6131
6132 int64_t Size;
6133 SMLoc SizeLoc = getLoc();
6134 if (getParser().parseAbsoluteExpression(Size))
6135 return true;
6136 if (Size < 0)
6137 return Error(SizeLoc, "size must be non-negative");
6138 if (Size > LocalMemorySize)
6139 return Error(SizeLoc, "size is too large");
6140
6141 int64_t Alignment = 4;
6142 if (trySkipToken(AsmToken::Comma)) {
6143 SMLoc AlignLoc = getLoc();
6144 if (getParser().parseAbsoluteExpression(Alignment))
6145 return true;
6146 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6147 return Error(AlignLoc, "alignment must be a power of two");
6148
6149 // Alignment larger than the size of LDS is possible in theory, as long
6150 // as the linker manages to place to symbol at address 0, but we do want
6151 // to make sure the alignment fits nicely into a 32-bit integer.
6152 if (Alignment >= 1u << 31)
6153 return Error(AlignLoc, "alignment is too large");
6154 }
6155
6156 if (parseEOL())
6157 return true;
6158
6159 Symbol->redefineIfPossible();
6160 if (!Symbol->isUndefined())
6161 return Error(NameLoc, "invalid symbol redefinition");
6162
6163 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6164 return false;
6165}
6166
6167bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6168 StringRef IDVal = DirectiveID.getString();
6169
6170 if (isHsaAbi(getSTI())) {
6171 if (IDVal == ".amdhsa_kernel")
6172 return ParseDirectiveAMDHSAKernel();
6173
6174 if (IDVal == ".amdhsa_code_object_version")
6175 return ParseDirectiveAMDHSACodeObjectVersion();
6176
6177 // TODO: Restructure/combine with PAL metadata directive.
6179 return ParseDirectiveHSAMetadata();
6180 } else {
6181 if (IDVal == ".amd_kernel_code_t")
6182 return ParseDirectiveAMDKernelCodeT();
6183
6184 if (IDVal == ".amdgpu_hsa_kernel")
6185 return ParseDirectiveAMDGPUHsaKernel();
6186
6187 if (IDVal == ".amd_amdgpu_isa")
6188 return ParseDirectiveISAVersion();
6189
6191 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6192 Twine(" directive is "
6193 "not available on non-amdhsa OSes"))
6194 .str());
6195 }
6196 }
6197
6198 if (IDVal == ".amdgcn_target")
6199 return ParseDirectiveAMDGCNTarget();
6200
6201 if (IDVal == ".amdgpu_lds")
6202 return ParseDirectiveAMDGPULDS();
6203
6204 if (IDVal == PALMD::AssemblerDirectiveBegin)
6205 return ParseDirectivePALMetadataBegin();
6206
6207 if (IDVal == PALMD::AssemblerDirective)
6208 return ParseDirectivePALMetadata();
6209
6210 return true;
6211}
6212
6213bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6214 unsigned RegNo) {
6215
6216 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
6217 return isGFX9Plus();
6218
6219 // GFX10+ has 2 more SGPRs 104 and 105.
6220 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
6221 return hasSGPR104_SGPR105();
6222
6223 switch (RegNo) {
6224 case AMDGPU::SRC_SHARED_BASE_LO:
6225 case AMDGPU::SRC_SHARED_BASE:
6226 case AMDGPU::SRC_SHARED_LIMIT_LO:
6227 case AMDGPU::SRC_SHARED_LIMIT:
6228 case AMDGPU::SRC_PRIVATE_BASE_LO:
6229 case AMDGPU::SRC_PRIVATE_BASE:
6230 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
6231 case AMDGPU::SRC_PRIVATE_LIMIT:
6232 return isGFX9Plus();
6233 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
6234 return isGFX9Plus() && !isGFX11Plus();
6235 case AMDGPU::TBA:
6236 case AMDGPU::TBA_LO:
6237 case AMDGPU::TBA_HI:
6238 case AMDGPU::TMA:
6239 case AMDGPU::TMA_LO:
6240 case AMDGPU::TMA_HI:
6241 return !isGFX9Plus();
6242 case AMDGPU::XNACK_MASK:
6243 case AMDGPU::XNACK_MASK_LO:
6244 case AMDGPU::XNACK_MASK_HI:
6245 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6246 case AMDGPU::SGPR_NULL:
6247 return isGFX10Plus();
6248 default:
6249 break;
6250 }
6251
6252 if (isCI())
6253 return true;
6254
6255 if (isSI() || isGFX10Plus()) {
6256 // No flat_scr on SI.
6257 // On GFX10Plus flat scratch is not a valid register operand and can only be
6258 // accessed with s_setreg/s_getreg.
6259 switch (RegNo) {
6260 case AMDGPU::FLAT_SCR:
6261 case AMDGPU::FLAT_SCR_LO:
6262 case AMDGPU::FLAT_SCR_HI:
6263 return false;
6264 default:
6265 return true;
6266 }
6267 }
6268
6269 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6270 // SI/CI have.
6271 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
6272 return hasSGPR102_SGPR103();
6273
6274 return true;
6275}
6276
6277ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6278 StringRef Mnemonic,
6279 OperandMode Mode) {
6280 ParseStatus Res = parseVOPD(Operands);
6281 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6282 return Res;
6283
6284 // Try to parse with a custom parser
6285 Res = MatchOperandParserImpl(Operands, Mnemonic);
6286
6287 // If we successfully parsed the operand or if there as an error parsing,
6288 // we are done.
6289 //
6290 // If we are parsing after we reach EndOfStatement then this means we
6291 // are appending default values to the Operands list. This is only done
6292 // by custom parser, so we shouldn't continue on to the generic parsing.
6293 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6294 return Res;
6295
6296 SMLoc RBraceLoc;
6297 SMLoc LBraceLoc = getLoc();
6298 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6299 unsigned Prefix = Operands.size();
6300
6301 for (;;) {
6302 auto Loc = getLoc();
6303 Res = parseReg(Operands);
6304 if (Res.isNoMatch())
6305 Error(Loc, "expected a register");
6306 if (!Res.isSuccess())
6307 return ParseStatus::Failure;
6308
6309 RBraceLoc = getLoc();
6310 if (trySkipToken(AsmToken::RBrac))
6311 break;
6312
6313 if (!skipToken(AsmToken::Comma,
6314 "expected a comma or a closing square bracket"))
6315 return ParseStatus::Failure;
6316 }
6317
6318 if (Operands.size() - Prefix > 1) {
6319 Operands.insert(Operands.begin() + Prefix,
6320 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6321 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6322 }
6323
6324 return ParseStatus::Success;
6325 }
6326
6327 return parseRegOrImm(Operands);
6328}
6329
6330StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6331 // Clear any forced encodings from the previous instruction.
6332 setForcedEncodingSize(0);
6333 setForcedDPP(false);
6334 setForcedSDWA(false);
6335
6336 if (Name.ends_with("_e64_dpp")) {
6337 setForcedDPP(true);
6338 setForcedEncodingSize(64);
6339 return Name.substr(0, Name.size() - 8);
6340 } else if (Name.ends_with("_e64")) {
6341 setForcedEncodingSize(64);
6342 return Name.substr(0, Name.size() - 4);
6343 } else if (Name.ends_with("_e32")) {
6344 setForcedEncodingSize(32);
6345 return Name.substr(0, Name.size() - 4);
6346 } else if (Name.ends_with("_dpp")) {
6347 setForcedDPP(true);
6348 return Name.substr(0, Name.size() - 4);
6349 } else if (Name.ends_with("_sdwa")) {
6350 setForcedSDWA(true);
6351 return Name.substr(0, Name.size() - 5);
6352 }
6353 return Name;
6354}
6355
6356static void applyMnemonicAliases(StringRef &Mnemonic,
6357 const FeatureBitset &Features,
6358 unsigned VariantID);
6359
6360bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
6362 SMLoc NameLoc, OperandVector &Operands) {
6363 // Add the instruction mnemonic
6364 Name = parseMnemonicSuffix(Name);
6365
6366 // If the target architecture uses MnemonicAlias, call it here to parse
6367 // operands correctly.
6368 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6369
6370 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6371
6372 bool IsMIMG = Name.starts_with("image_");
6373
6374 while (!trySkipToken(AsmToken::EndOfStatement)) {
6375 OperandMode Mode = OperandMode_Default;
6376 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6377 Mode = OperandMode_NSA;
6378 ParseStatus Res = parseOperand(Operands, Name, Mode);
6379
6380 if (!Res.isSuccess()) {
6381 checkUnsupportedInstruction(Name, NameLoc);
6382 if (!Parser.hasPendingError()) {
6383 // FIXME: use real operand location rather than the current location.
6384 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6385 : "not a valid operand.";
6386 Error(getLoc(), Msg);
6387 }
6388 while (!trySkipToken(AsmToken::EndOfStatement)) {
6389 lex();
6390 }
6391 return true;
6392 }
6393
6394 // Eat the comma or space if there is one.
6395 trySkipToken(AsmToken::Comma);
6396 }
6397
6398 return false;
6399}
6400
6401//===----------------------------------------------------------------------===//
6402// Utility functions
6403//===----------------------------------------------------------------------===//
6404
6405ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6407 SMLoc S = getLoc();
6408 if (!trySkipId(Name))
6409 return ParseStatus::NoMatch;
6410
6411 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6412 return ParseStatus::Success;
6413}
6414
6415ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6416 int64_t &IntVal) {
6417
6418 if (!trySkipId(Prefix, AsmToken::Colon))
6419 return ParseStatus::NoMatch;
6420
6421 return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;
6422}
6423
6424ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6425 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6426 std::function<bool(int64_t &)> ConvertResult) {
6427 SMLoc S = getLoc();
6428 int64_t Value = 0;
6429
6430 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6431 if (!Res.isSuccess())
6432 return Res;
6433
6434 if (ConvertResult && !ConvertResult(Value)) {
6435 Error(S, "invalid " + StringRef(Prefix) + " value.");
6436 }
6437
6438 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6439 return ParseStatus::Success;
6440}
6441
6442ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6443 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6444 bool (*ConvertResult)(int64_t &)) {
6445 SMLoc S = getLoc();
6446 if (!trySkipId(Prefix, AsmToken::Colon))
6447 return ParseStatus::NoMatch;
6448
6449 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6450 return ParseStatus::Failure;
6451
6452 unsigned Val = 0;
6453 const unsigned MaxSize = 4;
6454
6455 // FIXME: How to verify the number of elements matches the number of src
6456 // operands?
6457 for (int I = 0; ; ++I) {
6458 int64_t Op;
6459 SMLoc Loc = getLoc();
6460 if (!parseExpr(Op))
6461 return ParseStatus::Failure;
6462
6463 if (Op != 0 && Op != 1)
6464 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6465
6466 Val |= (Op << I);
6467
6468 if (trySkipToken(AsmToken::RBrac))
6469 break;
6470
6471 if (I + 1 == MaxSize)
6472 return Error(getLoc(), "expected a closing square bracket");
6473
6474 if (!skipToken(AsmToken::Comma, "expected a comma"))
6475 return ParseStatus::Failure;
6476 }
6477
6478 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6479 return ParseStatus::Success;
6480}
6481
6482ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6484 AMDGPUOperand::ImmTy ImmTy) {
6485 int64_t Bit;
6486 SMLoc S = getLoc();
6487
6488 if (trySkipId(Name)) {
6489 Bit = 1;
6490 } else if (trySkipId("no", Name)) {
6491 Bit = 0;
6492 } else {
6493 return ParseStatus::NoMatch;
6494 }
6495
6496 if (Name == "r128" && !hasMIMG_R128())
6497 return Error(S, "r128 modifier is not supported on this GPU");
6498 if (Name == "a16" && !hasA16())
6499 return Error(S, "a16 modifier is not supported on this GPU");
6500
6501 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6502 ImmTy = AMDGPUOperand::ImmTyR128A16;
6503
6504 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6505 return ParseStatus::Success;
6506}
6507
6508unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6509 bool &Disabling) const {
6510 Disabling = Id.consume_front("no");
6511
6512 if (isGFX940() && !Mnemo.starts_with("s_")) {
6513 return StringSwitch<unsigned>(Id)
6514 .Case("nt", AMDGPU::CPol::NT)
6515 .Case("sc0", AMDGPU::CPol::SC0)
6516 .Case("sc1", AMDGPU::CPol::SC1)
6517 .Default(0);
6518 }
6519
6520 return StringSwitch<unsigned>(Id)
6521 .Case("dlc", AMDGPU::CPol::DLC)
6522 .Case("glc", AMDGPU::CPol::GLC)
6523 .Case("scc", AMDGPU::CPol::SCC)
6524 .Case("slc", AMDGPU::CPol::SLC)
6525 .Default(0);
6526}
6527
6528ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6529 if (isGFX12Plus()) {
6530 SMLoc StringLoc = getLoc();
6531
6532 int64_t CPolVal = 0;
6535
6536 for (;;) {
6537 if (ResTH.isNoMatch()) {
6538 int64_t TH;
6539 ResTH = parseTH(Operands, TH);
6540 if (ResTH.isFailure())
6541 return ResTH;
6542 if (ResTH.isSuccess()) {
6543 CPolVal |= TH;
6544 continue;
6545 }
6546 }
6547
6548 if (ResScope.isNoMatch()) {
6549 int64_t Scope;
6550 ResScope = parseScope(Operands, Scope);
6551 if (ResScope.isFailure())
6552 return ResScope;
6553 if (ResScope.isSuccess()) {
6554 CPolVal |= Scope;
6555 continue;
6556 }
6557 }
6558
6559 break;
6560 }
6561
6562 if (ResTH.isNoMatch() && ResScope.isNoMatch())
6563 return ParseStatus::NoMatch;
6564
6565 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6566 AMDGPUOperand::ImmTyCPol));
6567 return ParseStatus::Success;
6568 }
6569
6570 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6571 SMLoc OpLoc = getLoc();
6572 unsigned Enabled = 0, Seen = 0;
6573 for (;;) {
6574 SMLoc S = getLoc();
6575 bool Disabling;
6576 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6577 if (!CPol)
6578 break;
6579
6580 lex();
6581
6582 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6583 return Error(S, "dlc modifier is not supported on this GPU");
6584
6585 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6586 return Error(S, "scc modifier is not supported on this GPU");
6587
6588 if (Seen & CPol)
6589 return Error(S, "duplicate cache policy modifier");
6590
6591 if (!Disabling)
6592 Enabled |= CPol;
6593
6594 Seen |= CPol;
6595 }
6596
6597 if (!Seen)
6598 return ParseStatus::NoMatch;
6599
6600 Operands.push_back(
6601 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6602 return ParseStatus::Success;
6603}
6604
6605ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6606 int64_t &Scope) {
6607 Scope = AMDGPU::CPol::SCOPE_CU; // default;
6608
6610 SMLoc StringLoc;
6611 ParseStatus Res;
6612
6613 Res = parseStringWithPrefix("scope", Value, StringLoc);
6614 if (!Res.isSuccess())
6615 return Res;
6616
6618 .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU)
6619 .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE)
6620 .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV)
6621 .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS)
6622 .Default(0xffffffff);
6623
6624 if (Scope == 0xffffffff)
6625 return Error(StringLoc, "invalid scope value");
6626
6627 return ParseStatus::Success;
6628}
6629
6630ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6631 TH = AMDGPU::CPol::TH_RT; // default
6632
6634 SMLoc StringLoc;
6635 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
6636 if (!Res.isSuccess())
6637 return Res;
6638
6639 if (Value == "TH_DEFAULT")
6641 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
6642 Value == "TH_LOAD_NT_WB") {
6643 return Error(StringLoc, "invalid th value");
6644 } else if (Value.consume_front("TH_ATOMIC_")) {
6646 } else if (Value.consume_front("TH_LOAD_")) {
6648 } else if (Value.consume_front("TH_STORE_")) {
6650 } else {
6651 return Error(StringLoc, "invalid th value");
6652 }
6653
6654 if (Value == "BYPASS")
6656
6657 if (TH != 0) {
6664 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
6667 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
6669 .Default(0xffffffff);
6670 else
6676 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
6677 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
6678 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
6679 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
6680 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
6681 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
6682 .Default(0xffffffff);
6683 }
6684
6685 if (TH == 0xffffffff)
6686 return Error(StringLoc, "invalid th value");
6687
6688 return ParseStatus::Success;
6689}
6690
6692 MCInst& Inst, const OperandVector& Operands,
6693 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6694 AMDGPUOperand::ImmTy ImmT,
6695 int64_t Default = 0) {
6696 auto i = OptionalIdx.find(ImmT);
6697 if (i != OptionalIdx.end()) {
6698 unsigned Idx = i->second;
6699 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6700 } else {
6702 }
6703}
6704
6705ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6707 SMLoc &StringLoc) {
6708 if (!trySkipId(Prefix, AsmToken::Colon))
6709 return ParseStatus::NoMatch;
6710
6711 StringLoc = getLoc();
6712 return parseId(Value, "expected an identifier") ? ParseStatus::Success
6714}
6715
6716//===----------------------------------------------------------------------===//
6717// MTBUF format
6718//===----------------------------------------------------------------------===//
6719
6720bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6721 int64_t MaxVal,
6722 int64_t &Fmt) {
6723 int64_t Val;
6724 SMLoc Loc = getLoc();
6725
6726 auto Res = parseIntWithPrefix(Pref, Val);
6727 if (Res.isFailure())
6728 return false;
6729 if (Res.isNoMatch())
6730 return true;
6731
6732 if (Val < 0 || Val > MaxVal) {
6733 Error(Loc, Twine("out of range ", StringRef(Pref)));
6734 return false;
6735 }
6736
6737 Fmt = Val;
6738 return true;
6739}
6740
6741ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
6742 AMDGPUOperand::ImmTy ImmTy) {
6743 const char *Pref = "index_key";
6744 int64_t ImmVal = 0;
6745 SMLoc Loc = getLoc();
6746 auto Res = parseIntWithPrefix(Pref, ImmVal);
6747 if (!Res.isSuccess())
6748 return Res;
6749
6750 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6751 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6752
6753 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6754 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6755
6756 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
6757 return ParseStatus::Success;
6758}
6759
6760ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
6761 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6762}
6763
6764ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
6765 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6766}
6767
6768// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6769// values to live in a joint format operand in the MCInst encoding.
6770ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6771 using namespace llvm::AMDGPU::MTBUFFormat;
6772
6773 int64_t Dfmt = DFMT_UNDEF;
6774 int64_t Nfmt = NFMT_UNDEF;
6775
6776 // dfmt and nfmt can appear in either order, and each is optional.
6777 for (int I = 0; I < 2; ++I) {
6778 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6779 return ParseStatus::Failure;
6780
6781 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6782 return ParseStatus::Failure;
6783
6784 // Skip optional comma between dfmt/nfmt
6785 // but guard against 2 commas following each other.
6786 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6787 !peekToken().is(AsmToken::Comma)) {
6788 trySkipToken(AsmToken::Comma);
6789 }
6790 }
6791
6792 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6793 return ParseStatus::NoMatch;
6794
6795 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6796 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6797
6798 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6799 return ParseStatus::Success;
6800}
6801
6802ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6803 using namespace llvm::AMDGPU::MTBUFFormat;
6804
6805 int64_t Fmt = UFMT_UNDEF;
6806
6807 if (!tryParseFmt("format", UFMT_MAX, Fmt))
6808 return ParseStatus::Failure;
6809
6810 if (Fmt == UFMT_UNDEF)
6811 return ParseStatus::NoMatch;
6812
6813 Format = Fmt;
6814 return ParseStatus::Success;
6815}
6816
6817bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6818 int64_t &Nfmt,
6819 StringRef FormatStr,
6820 SMLoc Loc) {
6821 using namespace llvm::AMDGPU::MTBUFFormat;
6822 int64_t Format;
6823
6824 Format = getDfmt(FormatStr);
6825 if (Format != DFMT_UNDEF) {
6826 Dfmt = Format;
6827 return true;
6828 }
6829
6830 Format = getNfmt(FormatStr, getSTI());
6831 if (Format != NFMT_UNDEF) {
6832 Nfmt = Format;
6833 return true;
6834 }
6835
6836 Error(Loc, "unsupported format");
6837 return false;
6838}
6839
6840ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6841 SMLoc FormatLoc,
6842 int64_t &Format) {
6843 using namespace llvm::AMDGPU::MTBUFFormat;
6844
6845 int64_t Dfmt = DFMT_UNDEF;
6846 int64_t Nfmt = NFMT_UNDEF;
6847 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6848 return ParseStatus::Failure;
6849
6850 if (trySkipToken(AsmToken::Comma)) {
6851 StringRef Str;
6852 SMLoc Loc = getLoc();
6853 if (!parseId(Str, "expected a format string") ||
6854 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6855 return ParseStatus::Failure;
6856 if (Dfmt == DFMT_UNDEF)
6857 return Error(Loc, "duplicate numeric format");
6858 if (Nfmt == NFMT_UNDEF)
6859 return Error(Loc, "duplicate data format");
6860 }
6861
6862 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6863 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6864
6865 if (isGFX10Plus()) {
6866 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6867 if (Ufmt == UFMT_UNDEF)
6868 return Error(FormatLoc, "unsupported format");
6869 Format = Ufmt;
6870 } else {
6871 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6872 }
6873
6874 return ParseStatus::Success;
6875}
6876
6877ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6878 SMLoc Loc,
6879 int64_t &Format) {
6880 using namespace llvm::AMDGPU::MTBUFFormat;
6881
6882 auto Id = getUnifiedFormat(FormatStr, getSTI());
6883 if (Id == UFMT_UNDEF)
6884 return ParseStatus::NoMatch;
6885
6886 if (!isGFX10Plus())
6887 return Error(Loc, "unified format is not supported on this GPU");
6888
6889 Format = Id;
6890 return ParseStatus::Success;
6891}
6892
6893ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6894 using namespace llvm::AMDGPU::MTBUFFormat;
6895 SMLoc Loc = getLoc();
6896
6897 if (!parseExpr(Format))
6898 return ParseStatus::Failure;
6899 if (!isValidFormatEncoding(Format, getSTI()))
6900 return Error(Loc, "out of range format");
6901
6902 return ParseStatus::Success;
6903}
6904
6905ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6906 using namespace llvm::AMDGPU::MTBUFFormat;
6907
6908 if (!trySkipId("format", AsmToken::Colon))
6909 return ParseStatus::NoMatch;
6910
6911 if (trySkipToken(AsmToken::LBrac)) {
6912 StringRef FormatStr;
6913 SMLoc Loc = getLoc();
6914 if (!parseId(FormatStr, "expected a format string"))
6915 return ParseStatus::Failure;
6916
6917 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6918 if (Res.isNoMatch())
6919 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6920 if (!Res.isSuccess())
6921 return Res;
6922
6923 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6924 return ParseStatus::Failure;
6925
6926 return ParseStatus::Success;
6927 }
6928
6929 return parseNumericFormat(Format);
6930}
6931
6932ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6933 using namespace llvm::AMDGPU::MTBUFFormat;
6934
6935 int64_t Format = getDefaultFormatEncoding(getSTI());
6936 ParseStatus Res;
6937 SMLoc Loc = getLoc();
6938
6939 // Parse legacy format syntax.
6940 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6941 if (Res.isFailure())
6942 return Res;
6943
6944 bool FormatFound = Res.isSuccess();
6945
6946 Operands.push_back(
6947 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6948
6949 if (FormatFound)
6950 trySkipToken(AsmToken::Comma);
6951
6952 if (isToken(AsmToken::EndOfStatement)) {
6953 // We are expecting an soffset operand,
6954 // but let matcher handle the error.
6955 return ParseStatus::Success;
6956 }
6957
6958 // Parse soffset.
6959 Res = parseRegOrImm(Operands);
6960 if (!Res.isSuccess())
6961 return Res;
6962
6963 trySkipToken(AsmToken::Comma);
6964
6965 if (!FormatFound) {
6966 Res = parseSymbolicOrNumericFormat(Format);
6967 if (Res.isFailure())
6968 return Res;
6969 if (Res.isSuccess()) {
6970 auto Size = Operands.size();
6971 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6972 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6973 Op.setImm(Format);
6974 }
6975 return ParseStatus::Success;
6976 }
6977
6978 if (isId("format") && peekToken().is(AsmToken::Colon))
6979 return Error(getLoc(), "duplicate format");
6980 return ParseStatus::Success;
6981}
6982
6983ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
6984 ParseStatus Res =
6985 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
6986 if (Res.isNoMatch()) {
6987 Res = parseIntWithPrefix("inst_offset", Operands,
6988 AMDGPUOperand::ImmTyInstOffset);
6989 }
6990 return Res;
6991}
6992
6993ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
6994 ParseStatus Res =
6995 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
6996 if (Res.isNoMatch())
6997 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
6998 return Res;
6999}
7000
7001ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7002 ParseStatus Res =
7003 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7004 if (Res.isNoMatch()) {
7005 Res =
7006 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7007 }
7008 return Res;
7009}
7010
7011//===----------------------------------------------------------------------===//
7012// Exp
7013//===----------------------------------------------------------------------===//
7014
7015void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7016 OptionalImmIndexMap OptionalIdx;
7017
7018 unsigned OperandIdx[4];
7019 unsigned EnMask = 0;
7020 int SrcIdx = 0;
7021
7022 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7023 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7024
7025 // Add the register arguments
7026 if (Op.isReg()) {
7027 assert(SrcIdx < 4);
7028 OperandIdx[SrcIdx] = Inst.size();
7029 Op.addRegOperands(Inst, 1);
7030 ++SrcIdx;
7031 continue;
7032 }
7033
7034 if (Op.isOff()) {
7035 assert(SrcIdx < 4);
7036 OperandIdx[SrcIdx] = Inst.size();
7037 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
7038 ++SrcIdx;
7039 continue;
7040 }
7041
7042 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7043 Op.addImmOperands(Inst, 1);
7044 continue;
7045 }
7046
7047 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7048 continue;
7049
7050 // Handle optional arguments
7051 OptionalIdx[Op.getImmTy()] = i;
7052 }
7053
7054 assert(SrcIdx == 4);
7055
7056 bool Compr = false;
7057 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7058 Compr = true;
7059 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7060 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
7061 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
7062 }
7063
7064 for (auto i = 0; i < SrcIdx; ++i) {
7065 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
7066 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7067 }
7068 }
7069
7070 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7071 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7072
7073 Inst.addOperand(MCOperand::createImm(EnMask));
7074}
7075
7076//===----------------------------------------------------------------------===//
7077// s_waitcnt
7078//===----------------------------------------------------------------------===//
7079
7080static bool
7082 const AMDGPU::IsaVersion ISA,
7083 int64_t &IntVal,
7084 int64_t CntVal,
7085 bool Saturate,
7086 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7087 unsigned (*decode)(const IsaVersion &Version, unsigned))
7088{
7089 bool Failed = false;
7090
7091 IntVal = encode(ISA, IntVal, CntVal);
7092 if (CntVal != decode(ISA, IntVal)) {
7093 if (Saturate) {
7094 IntVal = encode(ISA, IntVal, -1);
7095 } else {
7096 Failed = true;
7097 }
7098 }
7099 return Failed;
7100}
7101
7102bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7103
7104 SMLoc CntLoc = getLoc();
7105 StringRef CntName = getTokenStr();
7106
7107 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7108 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7109 return false;
7110
7111 int64_t CntVal;
7112 SMLoc ValLoc = getLoc();
7113 if (!parseExpr(CntVal))
7114 return false;
7115
7117
7118 bool Failed = true;
7119 bool Sat = CntName.ends_with("_sat");
7120
7121 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7122 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7123 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7124 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7125 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7126 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7127 } else {
7128 Error(CntLoc, "invalid counter name " + CntName);
7129 return false;
7130 }
7131
7132 if (Failed) {
7133 Error(ValLoc, "too large value for " + CntName);
7134 return false;
7135 }
7136
7137 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7138 return false;
7139
7140 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7141 if (isToken(AsmToken::EndOfStatement)) {
7142 Error(getLoc(), "expected a counter name");
7143 return false;
7144 }
7145 }
7146
7147 return true;
7148}
7149
7150ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7152 int64_t Waitcnt = getWaitcntBitMask(ISA);
7153 SMLoc S = getLoc();
7154
7155 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7156 while (!isToken(AsmToken::EndOfStatement)) {
7157 if (!parseCnt(Waitcnt))
7158 return ParseStatus::Failure;
7159 }
7160 } else {
7161 if (!parseExpr(Waitcnt))
7162 return ParseStatus::Failure;
7163 }
7164
7165 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7166 return ParseStatus::Success;
7167}
7168
7169bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7170 SMLoc FieldLoc = getLoc();
7171 StringRef FieldName = getTokenStr();
7172 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7173 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7174 return false;
7175
7176 SMLoc ValueLoc = getLoc();
7177 StringRef ValueName = getTokenStr();
7178 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7179 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7180 return false;
7181
7182 unsigned Shift;
7183 if (FieldName == "instid0") {
7184 Shift = 0;
7185 } else if (FieldName == "instskip") {
7186 Shift = 4;
7187 } else if (FieldName == "instid1") {
7188 Shift = 7;
7189 } else {
7190 Error(FieldLoc, "invalid field name " + FieldName);
7191 return false;
7192 }
7193
7194 int Value;
7195 if (Shift == 4) {
7196 // Parse values for instskip.
7198 .Case("SAME", 0)
7199 .Case("NEXT", 1)
7200 .Case("SKIP_1", 2)
7201 .Case("SKIP_2", 3)
7202 .Case("SKIP_3", 4)
7203 .Case("SKIP_4", 5)
7204 .Default(-1);
7205 } else {
7206 // Parse values for instid0 and instid1.
7208 .Case("NO_DEP", 0)
7209 .Case("VALU_DEP_1", 1)
7210 .Case("VALU_DEP_2", 2)
7211 .Case("VALU_DEP_3", 3)
7212 .Case("VALU_DEP_4", 4)
7213 .Case("TRANS32_DEP_1", 5)
7214 .Case("TRANS32_DEP_2", 6)
7215 .Case("TRANS32_DEP_3", 7)
7216 .Case("FMA_ACCUM_CYCLE_1", 8)
7217 .Case("SALU_CYCLE_1", 9)
7218 .Case("SALU_CYCLE_2", 10)
7219 .Case("SALU_CYCLE_3", 11)
7220 .Default(-1);
7221 }
7222 if (Value < 0) {
7223 Error(ValueLoc, "invalid value name " + ValueName);
7224 return false;
7225 }
7226
7227 Delay |= Value << Shift;
7228 return true;
7229}
7230
7231ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7232 int64_t Delay = 0;
7233 SMLoc S = getLoc();
7234
7235 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7236 do {
7237 if (!parseDelay(Delay))
7238 return ParseStatus::Failure;
7239 } while (trySkipToken(AsmToken::Pipe));
7240 } else {
7241 if (!parseExpr(Delay))
7242 return ParseStatus::Failure;
7243 }
7244
7245 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7246 return ParseStatus::Success;
7247}
7248
7249bool
7250AMDGPUOperand::isSWaitCnt() const {
7251 return isImm();
7252}
7253
7254bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7255
7256//===----------------------------------------------------------------------===//
7257// DepCtr
7258//===----------------------------------------------------------------------===//
7259
7260void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7261 StringRef DepCtrName) {
7262 switch (ErrorId) {
7263 case OPR_ID_UNKNOWN:
7264 Error(Loc, Twine("invalid counter name ", DepCtrName));
7265 return;
7266 case OPR_ID_UNSUPPORTED:
7267 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7268 return;
7269 case OPR_ID_DUPLICATE:
7270 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7271 return;
7272 case OPR_VAL_INVALID:
7273 Error(Loc, Twine("invalid value for ", DepCtrName));
7274 return;
7275 default:
7276 assert(false);
7277 }
7278}
7279
7280bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7281
7282 using namespace llvm::AMDGPU::DepCtr;
7283
7284 SMLoc DepCtrLoc = getLoc();
7285 StringRef DepCtrName = getTokenStr();
7286
7287 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7288 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7289 return false;
7290
7291 int64_t ExprVal;
7292 if (!parseExpr(ExprVal))
7293 return false;
7294
7295 unsigned PrevOprMask = UsedOprMask;
7296 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7297
7298 if (CntVal < 0) {
7299 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7300 return false;
7301 }
7302
7303 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7304 return false;
7305
7306 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7307 if (isToken(AsmToken::EndOfStatement)) {
7308 Error(getLoc(), "expected a counter name");
7309 return false;
7310 }
7311 }
7312
7313 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7314 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7315 return true;
7316}
7317
7318ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7319 using namespace llvm::AMDGPU::DepCtr;
7320
7321 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7322 SMLoc Loc = getLoc();
7323
7324 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7325 unsigned UsedOprMask = 0;
7326 while (!isToken(AsmToken::EndOfStatement)) {
7327 if (!parseDepCtr(DepCtr, UsedOprMask))
7328 return ParseStatus::Failure;
7329 }
7330 } else {
7331 if (!parseExpr(DepCtr))
7332 return ParseStatus::Failure;
7333 }
7334
7335 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7336 return ParseStatus::Success;
7337}
7338
7339bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7340
7341//===----------------------------------------------------------------------===//
7342// hwreg
7343//===----------------------------------------------------------------------===//
7344
7345ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7346 OperandInfoTy &Offset,
7347 OperandInfoTy &Width) {
7348 using namespace llvm::AMDGPU::Hwreg;
7349
7350 if (!trySkipId("hwreg", AsmToken::LParen))
7351 return ParseStatus::NoMatch;
7352
7353 // The register may be specified by name or using a numeric code
7354 HwReg.Loc = getLoc();
7355 if (isToken(AsmToken::Identifier) &&
7356 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7357 HwReg.IsSymbolic = true;
7358 lex(); // skip register name
7359 } else if (!parseExpr(HwReg.Val, "a register name")) {
7360 return ParseStatus::Failure;
7361 }
7362
7363 if (trySkipToken(AsmToken::RParen))
7364 return ParseStatus::Success;
7365
7366 // parse optional params
7367 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7368 return ParseStatus::Failure;
7369
7370 Offset.Loc = getLoc();
7371 if (!parseExpr(Offset.Val))
7372 return ParseStatus::Failure;
7373
7374 if (!skipToken(AsmToken::Comma, "expected a comma"))
7375 return ParseStatus::Failure;
7376
7377 Width.Loc = getLoc();
7378 if (!parseExpr(Width.Val) ||
7379 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7380 return ParseStatus::Failure;
7381
7382 return ParseStatus::Success;
7383}
7384
7385ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7386 using namespace llvm::AMDGPU::Hwreg;
7387
7388 int64_t ImmVal = 0;
7389 SMLoc Loc = getLoc();
7390
7391 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
7392 HwregId::Default);
7393 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
7394 HwregOffset::Default);
7395 struct : StructuredOpField {
7396 using StructuredOpField::StructuredOpField;
7397 bool validate(AMDGPUAsmParser &Parser) const override {
7398 if (!isUIntN(Width, Val - 1))
7399 return Error(Parser, "only values from 1 to 32 are legal");
7400 return true;
7401 }
7402 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
7403 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
7404
7405 if (Res.isNoMatch())
7406 Res = parseHwregFunc(HwReg, Offset, Width);
7407
7408 if (Res.isSuccess()) {
7409 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
7410 return ParseStatus::Failure;
7411 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
7412 }
7413
7414 if (Res.isNoMatch() &&
7415 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
7417
7418 if (!Res.isSuccess())
7419 return ParseStatus::Failure;
7420
7421 if (!isUInt<16>(ImmVal))
7422 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7423 Operands.push_back(
7424 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7425 return ParseStatus::Success;
7426}
7427
7428bool AMDGPUOperand::isHwreg() const {
7429 return isImmTy(ImmTyHwreg);
7430}
7431
7432//===----------------------------------------------------------------------===//
7433// sendmsg
7434//===----------------------------------------------------------------------===//
7435
7436bool
7437AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7438 OperandInfoTy &Op,
7439 OperandInfoTy &Stream) {
7440 using namespace llvm::AMDGPU::SendMsg;
7441
7442 Msg.Loc = getLoc();
7443 if (isToken(AsmToken::Identifier) &&
7444 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7445 Msg.IsSymbolic = true;
7446 lex(); // skip message name
7447 } else if (!parseExpr(Msg.Val, "a message name")) {
7448 return false;
7449 }
7450
7451 if (trySkipToken(AsmToken::Comma)) {
7452 Op.IsDefined = true;
7453 Op.Loc = getLoc();
7454 if (isToken(AsmToken::Identifier) &&
7455 (Op.Val = getMsgOpId(Msg.Val, getTokenStr())) >= 0) {
7456 lex(); // skip operation name
7457 } else if (!parseExpr(Op.Val, "an operation name")) {
7458 return false;
7459 }
7460
7461 if (trySkipToken(AsmToken::Comma)) {
7462 Stream.IsDefined = true;
7463 Stream.Loc = getLoc();
7464 if (!parseExpr(Stream.Val))
7465 return false;
7466 }
7467 }
7468
7469 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7470}
7471
7472bool
7473AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7474 const OperandInfoTy &Op,
7475 const OperandInfoTy &Stream) {
7476 using namespace llvm::AMDGPU::SendMsg;
7477
7478 // Validation strictness depends on whether message is specified
7479 // in a symbolic or in a numeric form. In the latter case
7480 // only encoding possibility is checked.
7481 bool Strict = Msg.IsSymbolic;
7482
7483 if (Strict) {
7484 if (Msg.Val == OPR_ID_UNSUPPORTED) {
7485 Error(Msg.Loc, "specified message id is not supported on this GPU");
7486 return false;
7487 }
7488 } else {
7489 if (!isValidMsgId(Msg.Val, getSTI())) {
7490 Error(Msg.Loc, "invalid message id");
7491 return false;
7492 }
7493 }
7494 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
7495 if (Op.IsDefined) {
7496 Error(Op.Loc, "message does not support operations");
7497 } else {
7498 Error(Msg.Loc, "missing message operation");
7499 }
7500 return false;
7501 }
7502 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
7503 Error(Op.Loc, "invalid operation id");
7504 return false;
7505 }
7506 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
7507 Stream.IsDefined) {
7508 Error(Stream.Loc, "message operation does not support streams");
7509 return false;
7510 }
7511 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
7512 Error(Stream.Loc, "invalid message stream id");
7513 return false;
7514 }
7515 return true;
7516}
7517
7518ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7519 using namespace llvm::AMDGPU::SendMsg;
7520
7521 int64_t ImmVal = 0;
7522 SMLoc Loc = getLoc();
7523
7524 if (trySkipId("sendmsg", AsmToken::LParen)) {
7525 OperandInfoTy Msg(OPR_ID_UNKNOWN);
7526 OperandInfoTy Op(OP_NONE_);
7527 OperandInfoTy Stream(STREAM_ID_NONE_);
7528 if (parseSendMsgBody(Msg, Op, Stream) &&
7529 validateSendMsg(Msg, Op, Stream)) {
7530 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
7531 } else {
7532 return ParseStatus::Failure;
7533 }
7534 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7535 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7536 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7537 } else {
7538 return ParseStatus::Failure;
7539 }
7540
7541 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7542 return ParseStatus::Success;
7543}
7544
7545bool AMDGPUOperand::isSendMsg() const {
7546 return isImmTy(ImmTySendMsg);
7547}
7548
7549//===----------------------------------------------------------------------===//
7550// v_interp
7551//===----------------------------------------------------------------------===//
7552
7553ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7554 StringRef Str;
7555 SMLoc S = getLoc();
7556
7557 if (!parseId(Str))
7558 return ParseStatus::NoMatch;
7559
7560 int Slot = StringSwitch<int>(Str)
7561 .Case("p10", 0)
7562 .Case("p20", 1)
7563 .Case("p0", 2)
7564 .Default(-1);
7565
7566 if (Slot == -1)
7567 return Error(S, "invalid interpolation slot");
7568
7569 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
7570 AMDGPUOperand::ImmTyInterpSlot));
7571 return ParseStatus::Success;
7572}
7573
7574ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7575 StringRef Str;
7576 SMLoc S = getLoc();
7577
7578 if (!parseId(Str))
7579 return ParseStatus::NoMatch;
7580
7581 if (!Str.starts_with("attr"))
7582 return Error(S, "invalid interpolation attribute");
7583
7584 StringRef Chan = Str.take_back(2);
7585 int AttrChan = StringSwitch<int>(Chan)
7586 .Case(".x", 0)
7587 .Case(".y", 1)
7588 .Case(".z", 2)
7589 .Case(".w", 3)
7590 .Default(-1);
7591 if (AttrChan == -1)
7592 return Error(S, "invalid or missing interpolation attribute channel");
7593
7594 Str = Str.drop_back(2).drop_front(4);
7595
7596 uint8_t Attr;
7597 if (Str.getAsInteger(10, Attr))
7598 return Error(S, "invalid or missing interpolation attribute number");
7599
7600 if (Attr > 32)
7601 return Error(S, "out of bounds interpolation attribute number");
7602
7603 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
7604
7605 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
7606 AMDGPUOperand::ImmTyInterpAttr));
7607 Operands.push_back(AMDGPUOperand::CreateImm(
7608 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7609 return ParseStatus::Success;
7610}
7611
7612//===----------------------------------------------------------------------===//
7613// exp
7614//===----------------------------------------------------------------------===//
7615
7616ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7617 using namespace llvm::AMDGPU::Exp;
7618
7619 StringRef Str;
7620 SMLoc S = getLoc();
7621
7622 if (!parseId(Str))
7623 return ParseStatus::NoMatch;
7624
7625 unsigned Id = getTgtId(Str);
7626 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
7627 return Error(S, (Id == ET_INVALID)
7628 ? "invalid exp target"
7629 : "exp target is not supported on this GPU");
7630
7631 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
7632 AMDGPUOperand::ImmTyExpTgt));
7633 return ParseStatus::Success;
7634}
7635
7636//===----------------------------------------------------------------------===//
7637// parser helpers
7638//===----------------------------------------------------------------------===//
7639
7640bool
7641AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7642 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
7643}
7644
7645bool
7646AMDGPUAsmParser::isId(const StringRef Id) const {
7647 return isId(getToken(), Id);
7648}
7649
7650bool
7651AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7652 return getTokenKind() == Kind;
7653}
7654
7655StringRef AMDGPUAsmParser::getId() const {
7656 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7657}
7658
7659bool
7660AMDGPUAsmParser::trySkipId(const StringRef Id) {
7661 if (isId(Id)) {
7662 lex();
7663 return true;
7664 }
7665 return false;
7666}
7667
7668bool
7669AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7670 if (isToken(AsmToken::Identifier)) {
7671 StringRef Tok = getTokenStr();
7672 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
7673 lex();
7674 return true;
7675 }
7676 }
7677 return false;
7678}
7679
7680bool
7681AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7682 if (isId(Id) && peekToken().is(Kind)) {
7683 lex();
7684 lex();
7685 return true;
7686 }
7687 return false;
7688}
7689
7690bool
7691AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7692 if (isToken(Kind)) {
7693 lex();
7694 return true;
7695 }
7696 return false;
7697}
7698
7699bool
7700AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7701 const StringRef ErrMsg) {
7702 if (!trySkipToken(Kind)) {
7703 Error(getLoc(), ErrMsg);
7704 return false;
7705 }
7706 return true;
7707}
7708
7709bool
7710AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7711 SMLoc S = getLoc();
7712
7713 const MCExpr *Expr;
7714 if (Parser.parseExpression(Expr))
7715 return false;
7716
7717 if (Expr->evaluateAsAbsolute(Imm))
7718 return true;
7719
7720 if (Expected.empty()) {
7721 Error(S, "expected absolute expression");
7722 } else {
7723 Error(S, Twine("expected ", Expected) +
7724 Twine(" or an absolute expression"));
7725 }
7726 return false;
7727}
7728
7729bool
7730AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7731 SMLoc S = getLoc();
7732
7733 const MCExpr *Expr;
7734 if (Parser.parseExpression(Expr))
7735 return false;
7736
7737 int64_t IntVal;
7738 if (Expr->evaluateAsAbsolute(IntVal)) {
7739 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7740 } else {
7741 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7742 }
7743 return true;
7744}
7745
7746bool
7747AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7748 if (isToken(AsmToken::String)) {
7749 Val = getToken().getStringContents();
7750 lex();
7751 return true;
7752 } else {
7753 Error(getLoc(), ErrMsg);
7754 return false;
7755 }
7756}
7757
7758bool
7759AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7760 if (isToken(AsmToken::Identifier)) {
7761 Val = getTokenStr();
7762 lex();
7763 return true;
7764 } else {
7765 if (!ErrMsg.empty())
7766 Error(getLoc(), ErrMsg);
7767 return false;
7768 }
7769}
7770
7772AMDGPUAsmParser::getToken() const {
7773 return Parser.getTok();
7774}
7775
7776AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7777 return isToken(AsmToken::EndOfStatement)
7778 ? getToken()
7779 : getLexer().peekTok(ShouldSkipSpace);
7780}
7781
7782void
7783AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7784 auto TokCount = getLexer().peekTokens(Tokens);
7785
7786 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7787 Tokens[Idx] = AsmToken(AsmToken::Error, "");
7788}
7789
7791AMDGPUAsmParser::getTokenKind() const {
7792 return getLexer().getKind();
7793}
7794
7795SMLoc
7796AMDGPUAsmParser::getLoc() const {
7797 return getToken().getLoc();
7798}
7799
7801AMDGPUAsmParser::getTokenStr() const {
7802 return getToken().getString();
7803}
7804
7805void
7806AMDGPUAsmParser::lex() {
7807 Parser.Lex();
7808}
7809
7810SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7811 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7812}
7813
7814SMLoc
7815AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7816 const OperandVector &Operands) const {
7817 for (unsigned i = Operands.size() - 1; i > 0; --i) {
7818 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7819 if (Test(Op))
7820 return Op.getStartLoc();
7821 }
7822 return getInstLoc(Operands);
7823}
7824
7825SMLoc
7826AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7827 const OperandVector &Operands) const {
7828 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7829 return getOperandLoc(Test, Operands);
7830}
7831
7832SMLoc
7833AMDGPUAsmParser::getRegLoc(unsigned Reg,
7834 const OperandVector &Operands) const {
7835 auto Test = [=](const AMDGPUOperand& Op) {
7836 return Op.isRegKind() && Op.getReg() == Reg;
7837 };
7838 return getOperandLoc(Test, Operands);
7839}
7840
7841SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7842 bool SearchMandatoryLiterals) const {
7843 auto Test = [](const AMDGPUOperand& Op) {
7844 return Op.IsImmKindLiteral() || Op.isExpr();
7845 };
7846 SMLoc Loc = getOperandLoc(Test, Operands);
7847 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7848 Loc = getMandatoryLitLoc(Operands);
7849 return Loc;
7850}
7851
7852SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7853 auto Test = [](const AMDGPUOperand &Op) {
7854 return Op.IsImmKindMandatoryLiteral();
7855 };
7856 return getOperandLoc(Test, Operands);
7857}
7858
7859SMLoc
7860AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7861 auto Test = [](const AMDGPUOperand& Op) {
7862 return Op.isImmKindConst();
7863 };
7864 return getOperandLoc(Test, Operands);
7865}
7866
7868AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
7869 if (!trySkipToken(AsmToken::LCurly))
7870 return ParseStatus::NoMatch;
7871
7872 bool First = true;
7873 while (!trySkipToken(AsmToken::RCurly)) {
7874 if (!First &&
7875 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
7876 return ParseStatus::Failure;
7877
7878 StringRef Id = getTokenStr();
7879 SMLoc IdLoc = getLoc();
7880 if (!skipToken(AsmToken::Identifier, "field name expected") ||
7881 !skipToken(AsmToken::Colon, "colon expected"))
7882 return ParseStatus::Failure;
7883
7884 auto I =
7885 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
7886 if (I == Fields.end())
7887 return Error(IdLoc, "unknown field");
7888 if ((*I)->IsDefined)
7889 return Error(IdLoc, "duplicate field");
7890
7891 // TODO: Support symbolic values.
7892 (*I)->Loc = getLoc();
7893 if (!parseExpr((*I)->Val))
7894 return ParseStatus::Failure;
7895 (*I)->IsDefined = true;
7896
7897 First = false;
7898 }
7899 return ParseStatus::Success;
7900}
7901
7902bool AMDGPUAsmParser::validateStructuredOpFields(
7904 return all_of(Fields, [this](const StructuredOpField *F) {
7905 return F->validate(*this);
7906 });
7907}
7908
7909//===----------------------------------------------------------------------===//
7910// swizzle
7911//===----------------------------------------------------------------------===//
7912
7914static unsigned
7915encodeBitmaskPerm(const unsigned AndMask,
7916 const unsigned OrMask,
7917 const unsigned XorMask) {
7918 using namespace llvm::AMDGPU::Swizzle;
7919
7920 return BITMASK_PERM_ENC |
7921 (AndMask << BITMASK_AND_SHIFT) |
7922 (OrMask << BITMASK_OR_SHIFT) |
7923 (XorMask << BITMASK_XOR_SHIFT);
7924}
7925
7926bool
7927AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7928 const unsigned MinVal,
7929 const unsigned MaxVal,
7930 const StringRef ErrMsg,
7931 SMLoc &Loc) {
7932 if (!skipToken(AsmToken::Comma, "expected a comma")) {
7933 return false;
7934 }
7935 Loc = getLoc();
7936 if (!parseExpr(Op)) {
7937 return false;
7938 }
7939 if (Op < MinVal || Op > MaxVal) {
7940 Error(Loc, ErrMsg);
7941 return false;
7942 }
7943
7944 return true;
7945}
7946
7947bool
7948AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7949 const unsigned MinVal,
7950 const unsigned MaxVal,
7951 const StringRef ErrMsg) {
7952 SMLoc Loc;
7953 for (unsigned i = 0; i < OpNum; ++i) {
7954 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7955 return false;
7956 }
7957
7958 return true;
7959}
7960
7961bool
7962AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7963 using namespace llvm::AMDGPU::Swizzle;
7964
7965 int64_t Lane[LANE_NUM];
7966 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7967 "expected a 2-bit lane id")) {
7969 for (unsigned I = 0; I < LANE_NUM; ++I) {
7970 Imm |= Lane[I] << (LANE_SHIFT * I);
7971 }
7972 return true;
7973 }
7974 return false;
7975}
7976
7977bool
7978AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7979 using namespace llvm::AMDGPU::Swizzle;
7980
7981 SMLoc Loc;
7982 int64_t GroupSize;
7983 int64_t LaneIdx;
7984
7985 if (!parseSwizzleOperand(GroupSize,
7986 2, 32,
7987 "group size must be in the interval [2,32]",
7988 Loc)) {
7989 return false;
7990 }
7991 if (!isPowerOf2_64(GroupSize)) {
7992 Error(Loc, "group size must be a power of two");
7993 return false;
7994 }
7995 if (parseSwizzleOperand(LaneIdx,
7996 0, GroupSize - 1,
7997 "lane id must be in the interval [0,group size - 1]",
7998 Loc)) {
7999 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8000 return true;
8001 }
8002 return false;
8003}
8004
8005bool
8006AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8007 using namespace llvm::AMDGPU::Swizzle;
8008
8009 SMLoc Loc;
8010 int64_t GroupSize;
8011
8012 if (!parseSwizzleOperand(GroupSize,
8013 2, 32,
8014 "group size must be in the interval [2,32]",
8015 Loc)) {
8016 return false;
8017 }
8018 if (!isPowerOf2_64(GroupSize)) {
8019 Error(Loc, "group size must be a power of two");
8020 return false;
8021 }
8022
8023 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8024 return true;
8025}
8026
8027bool
8028AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8029 using namespace llvm::AMDGPU::Swizzle;
8030
8031 SMLoc Loc;
8032 int64_t GroupSize;
8033
8034 if (!parseSwizzleOperand(GroupSize,
8035 1, 16,
8036 "group size must be in the interval [1,16]",
8037 Loc)) {
8038 return false;
8039 }
8040 if (!isPowerOf2_64(GroupSize)) {
8041 Error(Loc, "group size must be a power of two");
8042 return false;
8043 }
8044
8045 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8046 return true;
8047}
8048
8049bool
8050AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8051 using namespace llvm::AMDGPU::Swizzle;
8052
8053 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8054 return false;
8055 }
8056
8057 StringRef Ctl;
8058 SMLoc StrLoc = getLoc();
8059 if (!parseString(Ctl)) {
8060 return false;
8061 }
8062 if (Ctl.size() != BITMASK_WIDTH) {
8063 Error(StrLoc, "expected a 5-character mask");
8064 return false;
8065 }
8066
8067 unsigned AndMask = 0;
8068 unsigned OrMask = 0;
8069 unsigned XorMask = 0;
8070
8071 for (size_t i = 0; i < Ctl.size(); ++i) {
8072 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8073 switch(Ctl[i]) {
8074 default:
8075 Error(StrLoc, "invalid mask");
8076 return false;
8077 case '0':
8078 break;
8079 case '1':
8080 OrMask |= Mask;
8081 break;
8082 case 'p':
8083 AndMask |= Mask;
8084 break;
8085 case 'i':
8086 AndMask |= Mask;
8087 XorMask |= Mask;
8088 break;
8089 }
8090 }
8091
8092 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8093 return true;
8094}
8095
8096bool
8097AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8098
8099 SMLoc OffsetLoc = getLoc();
8100
8101 if (!parseExpr(Imm, "a swizzle macro")) {
8102 return false;
8103 }
8104 if (!isUInt<16>(Imm)) {
8105 Error(OffsetLoc, "expected a 16-bit offset");
8106 return false;
8107 }
8108 return true;
8109}
8110
8111bool
8112AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8113 using namespace llvm::AMDGPU::Swizzle;
8114
8115 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8116
8117 SMLoc ModeLoc = getLoc();
8118 bool Ok = false;
8119
8120 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8121 Ok = parseSwizzleQuadPerm(Imm);
8122 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8123 Ok = parseSwizzleBitmaskPerm(Imm);
8124 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8125 Ok = parseSwizzleBroadcast(Imm);
8126 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8127 Ok = parseSwizzleSwap(Imm);
8128 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8129 Ok = parseSwizzleReverse(Imm);
8130 } else {
8131 Error(ModeLoc, "expected a swizzle mode");
8132 }
8133
8134 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8135 }
8136
8137 return false;
8138}
8139
8140ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8141 SMLoc S = getLoc();
8142 int64_t Imm = 0;
8143
8144 if (trySkipId("offset")) {
8145
8146 bool Ok = false;
8147 if (skipToken(AsmToken::Colon, "expected a colon")) {
8148 if (trySkipId("swizzle")) {
8149 Ok = parseSwizzleMacro(Imm);
8150 } else {
8151 Ok = parseSwizzleOffset(Imm);
8152 }
8153 }
8154
8155 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8156
8158 }
8159 return ParseStatus::NoMatch;
8160}
8161
8162bool
8163AMDGPUOperand::isSwizzle() const {
8164 return isImmTy(ImmTySwizzle);
8165}
8166
8167//===----------------------------------------------------------------------===//
8168// VGPR Index Mode
8169//===----------------------------------------------------------------------===//
8170
8171int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8172
8173 using namespace llvm::AMDGPU::VGPRIndexMode;
8174
8175 if (trySkipToken(AsmToken::RParen)) {
8176 return OFF;
8177 }
8178
8179 int64_t Imm = 0;
8180
8181 while (true) {
8182 unsigned Mode = 0;
8183 SMLoc S = getLoc();
8184
8185 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8186 if (trySkipId(IdSymbolic[ModeId])) {
8187 Mode = 1 << ModeId;
8188 break;
8189 }
8190 }
8191
8192 if (Mode == 0) {
8193 Error(S, (Imm == 0)?
8194 "expected a VGPR index mode or a closing parenthesis" :
8195 "expected a VGPR index mode");
8196 return UNDEF;
8197 }
8198
8199 if (Imm & Mode) {
8200 Error(S, "duplicate VGPR index mode");
8201 return UNDEF;
8202 }
8203 Imm |= Mode;
8204
8205 if (trySkipToken(AsmToken::RParen))
8206 break;
8207 if (!skipToken(AsmToken::Comma,
8208 "expected a comma or a closing parenthesis"))
8209 return UNDEF;
8210 }
8211
8212 return Imm;
8213}
8214
8215ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8216
8217 using namespace llvm::AMDGPU::VGPRIndexMode;
8218
8219 int64_t Imm = 0;
8220 SMLoc S = getLoc();
8221
8222 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8223 Imm = parseGPRIdxMacro();
8224 if (Imm == UNDEF)
8225 return ParseStatus::Failure;
8226 } else {
8227 if (getParser().parseAbsoluteExpression(Imm))
8228 return ParseStatus::Failure;
8229 if (Imm < 0 || !isUInt<4>(Imm))
8230 return Error(S, "invalid immediate: only 4-bit values are legal");
8231 }
8232
8233 Operands.push_back(
8234 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8235 return ParseStatus::Success;
8236}
8237
8238bool AMDGPUOperand::isGPRIdxMode() const {
8239 return isImmTy(ImmTyGprIdxMode);
8240}
8241
8242//===----------------------------------------------------------------------===//
8243// sopp branch targets
8244//===----------------------------------------------------------------------===//
8245
8246ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8247
8248 // Make sure we are not parsing something
8249 // that looks like a label or an expression but is not.
8250 // This will improve error messages.
8251 if (isRegister() || isModifier())
8252 return ParseStatus::NoMatch;
8253
8254 if (!parseExpr(Operands))
8255 return ParseStatus::Failure;
8256
8257 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8258 assert(Opr.isImm() || Opr.isExpr());
8259 SMLoc Loc = Opr.getStartLoc();
8260
8261 // Currently we do not support arbitrary expressions as branch targets.
8262 // Only labels and absolute expressions are accepted.
8263 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8264 Error(Loc, "expected an absolute expression or a label");
8265 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8266 Error(Loc, "expected a 16-bit signed jump offset");
8267 }
8268
8269 return ParseStatus::Success;
8270}
8271
8272//===----------------------------------------------------------------------===//
8273// Boolean holding registers
8274//===----------------------------------------------------------------------===//
8275
8276ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8277 return parseReg(Operands);
8278}
8279
8280//===----------------------------------------------------------------------===//
8281// mubuf
8282//===----------------------------------------------------------------------===//
8283
8284void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8285 const OperandVector &Operands,
8286 bool IsAtomic) {
8287 OptionalImmIndexMap OptionalIdx;
8288 unsigned FirstOperandIdx = 1;
8289 bool IsAtomicReturn = false;
8290
8291 if (IsAtomic) {
8292 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
8294 }
8295
8296 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8297 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8298
8299 // Add the register arguments
8300 if (Op.isReg()) {
8301 Op.addRegOperands(Inst, 1);
8302 // Insert a tied src for atomic return dst.
8303 // This cannot be postponed as subsequent calls to
8304 // addImmOperands rely on correct number of MC operands.
8305 if (IsAtomicReturn && i == FirstOperandIdx)
8306 Op.addRegOperands(Inst, 1);
8307 continue;
8308 }
8309
8310 // Handle the case where soffset is an immediate
8311 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8312 Op.addImmOperands(Inst, 1);
8313 continue;
8314 }
8315
8316 // Handle tokens like 'offen' which are sometimes hard-coded into the
8317 // asm string. There are no MCInst operands for these.
8318 if (Op.isToken()) {
8319 continue;
8320 }
8321 assert(Op.isImm());
8322
8323 // Handle optional arguments
8324 OptionalIdx[Op.getImmTy()] = i;
8325 }
8326
8327 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8328 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8329}
8330
8331//===----------------------------------------------------------------------===//
8332// smrd
8333//===----------------------------------------------------------------------===//
8334
8335bool AMDGPUOperand::isSMRDOffset8() const {
8336 return isImmLiteral() && isUInt<8>(getImm());
8337}
8338
8339bool AMDGPUOperand::isSMEMOffset() const {
8340 // Offset range is checked later by validator.
8341 return isImmLiteral();
8342}
8343
8344bool AMDGPUOperand::isSMRDLiteralOffset() const {
8345 // 32-bit literals are only supported on CI and we only want to use them
8346 // when the offset is > 8-bits.
8347 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8348}
8349
8350//===----------------------------------------------------------------------===//
8351// vop3
8352//===----------------------------------------------------------------------===//
8353
8354static bool ConvertOmodMul(int64_t &Mul) {
8355 if (Mul != 1 && Mul != 2 && Mul != 4)
8356 return false;
8357
8358 Mul >>= 1;
8359 return true;
8360}
8361
8362static bool ConvertOmodDiv(int64_t &Div) {
8363 if (Div == 1) {
8364 Div = 0;
8365 return true;
8366 }
8367
8368 if (Div == 2) {
8369 Div = 3;
8370 return true;
8371 }
8372
8373 return false;
8374}
8375
8376// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8377// This is intentional and ensures compatibility with sp3.
8378// See bug 35397 for details.
8379bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8380 if (BoundCtrl == 0 || BoundCtrl == 1) {
8381 if (!isGFX11Plus())
8382 BoundCtrl = 1;
8383 return true;
8384 }
8385 return false;
8386}
8387
8388void AMDGPUAsmParser::onBeginOfFile() {
8389 if (!getParser().getStreamer().getTargetStreamer() ||
8390 getSTI().getTargetTriple().getArch() == Triple::r600)
8391 return;
8392
8393 if (!getTargetStreamer().getTargetID())
8394 getTargetStreamer().initializeTargetID(getSTI(),
8395 getSTI().getFeatureString());
8396
8397 if (isHsaAbi(getSTI()))
8398 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8399}
8400
8401/// Parse AMDGPU specific expressions.
8402///
8403/// expr ::= or(expr, ...) |
8404/// max(expr, ...)
8405///
8406bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8408
8409 if (isToken(AsmToken::Identifier)) {
8410 StringRef TokenId = getTokenStr();
8411 AGVK VK = StringSwitch<AGVK>(TokenId)
8412 .Case("max", AGVK::AGVK_Max)
8413 .Case("or", AGVK::AGVK_Or)
8414 .Default(AGVK::AGVK_None);
8415
8416 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
8418 uint64_t CommaCount = 0;
8419 lex(); // Eat 'max'/'or'
8420 lex(); // Eat '('
8421 while (true) {
8422 if (trySkipToken(AsmToken::RParen)) {
8423 if (Exprs.empty()) {
8424 Error(getToken().getLoc(),
8425 "empty " + Twine(TokenId) + " expression");
8426 return true;
8427 }
8428 if (CommaCount + 1 != Exprs.size()) {
8429 Error(getToken().getLoc(),
8430 "mismatch of commas in " + Twine(TokenId) + " expression");
8431 return true;
8432 }
8433 Res = AMDGPUVariadicMCExpr::create(VK, Exprs, getContext());
8434 return false;
8435 }
8436 const MCExpr *Expr;
8437 if (getParser().parseExpression(Expr, EndLoc))
8438 return true;
8439 Exprs.push_back(Expr);
8440 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
8441 if (LastTokenWasComma)
8442 CommaCount++;
8443 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
8444 Error(getToken().getLoc(),
8445 "unexpected token in " + Twine(TokenId) + " expression");
8446 return true;
8447 }
8448 }
8449 }
8450 }
8451 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
8452}
8453
8454ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8455 StringRef Name = getTokenStr();
8456 if (Name == "mul") {
8457 return parseIntWithPrefix("mul", Operands,
8458 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8459 }
8460
8461 if (Name == "div") {
8462 return parseIntWithPrefix("div", Operands,
8463 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8464 }
8465
8466 return ParseStatus::NoMatch;
8467}
8468
8469// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8470// the number of src operands present, then copies that bit into src0_modifiers.
8471static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
8472 int Opc = Inst.getOpcode();
8473 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8474 if (OpSelIdx == -1)
8475 return;
8476
8477 int SrcNum;
8478 const int Ops[] = { AMDGPU::OpName::src0,
8479 AMDGPU::OpName::src1,
8480 AMDGPU::OpName::src2 };
8481 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8482 ++SrcNum)
8483 ;
8484 assert(SrcNum > 0);
8485
8486 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8487
8488 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
8489 if (DstIdx == -1)
8490 return;
8491
8492 const MCOperand &DstOp = Inst.getOperand(DstIdx);
8493 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8494 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8495 if (DstOp.isReg() &&
8496 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
8497 if (AMDGPU::isHi(DstOp.getReg(), MRI))
8498 ModVal |= SISrcMods::DST_OP_SEL;
8499 } else {
8500 if ((OpSel & (1 << SrcNum)) != 0)
8501 ModVal |= SISrcMods::DST_OP_SEL;
8502 }
8503 Inst.getOperand(ModIdx).setImm(ModVal);
8504}
8505
8506void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8507 const OperandVector &Operands) {
8508 cvtVOP3P(Inst, Operands);
8509 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8510}
8511
8512void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8513 OptionalImmIndexMap &OptionalIdx) {
8514 cvtVOP3P(Inst, Operands, OptionalIdx);
8515 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8516}
8517
8518static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8519 return
8520 // 1. This operand is input modifiers
8521 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8522 // 2. This is not last operand
8523 && Desc.NumOperands > (OpNum + 1)
8524 // 3. Next operand is register class
8525 && Desc.operands()[OpNum + 1].RegClass != -1
8526 // 4. Next register is not tied to any other operand
8527 && Desc.getOperandConstraint(OpNum + 1,
8528 MCOI::OperandConstraint::TIED_TO) == -1;
8529}
8530
8531void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8532{
8533 OptionalImmIndexMap OptionalIdx;
8534 unsigned Opc = Inst.getOpcode();
8535
8536 unsigned I = 1;
8537 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8538 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8539 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8540 }
8541
8542 for (unsigned E = Operands.size(); I != E; ++I) {
8543 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8545 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8546 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
8547 Op.isInterpAttrChan()) {
8548 Inst.addOperand(MCOperand::createImm(Op.getImm()));
8549 } else if (Op.isImmModifier()) {
8550 OptionalIdx[Op.getImmTy()] = I;
8551 } else {
8552 llvm_unreachable("unhandled operand type");
8553 }
8554 }
8555
8556 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
8557 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8558 AMDGPUOperand::ImmTyHigh);
8559
8560 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8561 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8562 AMDGPUOperand::ImmTyClampSI);
8563
8564 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8565 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8566 AMDGPUOperand::ImmTyOModSI);
8567}
8568
8569void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8570{
8571 OptionalImmIndexMap OptionalIdx;
8572 unsigned Opc = Inst.getOpcode();
8573
8574 unsigned I = 1;
8575 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8576 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8577 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8578 }
8579
8580 for (unsigned E = Operands.size(); I != E; ++I) {
8581 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8583 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8584 } else if (Op.isImmModifier()) {
8585 OptionalIdx[Op.getImmTy()] = I;
8586 } else {
8587 llvm_unreachable("unhandled operand type");
8588 }
8589 }
8590
8591 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8592
8593 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8594 if (OpSelIdx != -1)
8595 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8596
8597 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8598
8599 if (OpSelIdx == -1)
8600 return;
8601
8602 const int Ops[] = { AMDGPU::OpName::src0,
8603 AMDGPU::OpName::src1,
8604 AMDGPU::OpName::src2 };
8605 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8606 AMDGPU::OpName::src1_modifiers,
8607 AMDGPU::OpName::src2_modifiers };
8608
8609 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8610
8611 for (int J = 0; J < 3; ++J) {
8612 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8613 if (OpIdx == -1)
8614 break;
8615
8616 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8617 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8618
8619 if ((OpSel & (1 << J)) != 0)
8620 ModVal |= SISrcMods::OP_SEL_0;
8621 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8622 (OpSel & (1 << 3)) != 0)
8623 ModVal |= SISrcMods::DST_OP_SEL;
8624
8625 Inst.getOperand(ModIdx).setImm(ModVal);
8626 }
8627}
8628
8629void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8630 OptionalImmIndexMap &OptionalIdx) {
8631 unsigned Opc = Inst.getOpcode();
8632
8633 unsigned I = 1;
8634 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8635 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8636 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8637 }
8638
8639 for (unsigned E = Operands.size(); I != E; ++I) {
8640 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8642 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8643 } else if (Op.isImmModifier()) {
8644 OptionalIdx[Op.getImmTy()] = I;
8645 } else if (Op.isRegOrImm()) {
8646 Op.addRegOrImmOperands(Inst, 1);
8647 } else {
8648 llvm_unreachable("unhandled operand type");
8649 }
8650 }
8651
8652 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
8653 assert(AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in));
8654 Inst.addOperand(Inst.getOperand(0));
8655 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8656 AMDGPUOperand::ImmTyByteSel);
8657 }
8658
8659 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8660 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8661 AMDGPUOperand::ImmTyClampSI);
8662
8663 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8664 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8665 AMDGPUOperand::ImmTyOModSI);
8666
8667 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8668 // it has src2 register operand that is tied to dst operand
8669 // we don't allow modifiers for this operand in assembler so src2_modifiers
8670 // should be 0.
8671 if (isMAC(Opc)) {
8672 auto it = Inst.begin();
8673 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8674 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8675 ++it;
8676 // Copy the operand to ensure it's not invalidated when Inst grows.
8677 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8678 }
8679}
8680
8681void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8682 OptionalImmIndexMap OptionalIdx;
8683 cvtVOP3(Inst, Operands, OptionalIdx);
8684}
8685
8686void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8687 OptionalImmIndexMap &OptIdx) {
8688 const int Opc = Inst.getOpcode();
8689 const MCInstrDesc &Desc = MII.get(Opc);
8690
8691 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8692
8693 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8694 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8695 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
8696 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
8697 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8698 Inst.addOperand(Inst.getOperand(0));
8699 }
8700
8701 // Adding vdst_in operand is already covered for these DPP instructions in
8702 // cvtVOP3DPP.
8703 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
8704 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8705 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8706 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8707 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 ||
8708 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
8709 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
8710 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
8711 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
8712 assert(!IsPacked);
8713 Inst.addOperand(Inst.getOperand(0));
8714 }
8715
8716 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8717 // instruction, and then figure out where to actually put the modifiers
8718
8719 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8720 if (OpSelIdx != -1) {
8721 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8722 }
8723
8724 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8725 if (OpSelHiIdx != -1) {
8726 int DefaultVal = IsPacked ? -1 : 0;
8727 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8728 DefaultVal);
8729 }
8730
8731 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8732 if (NegLoIdx != -1)
8733 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8734
8735 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8736 if (NegHiIdx != -1)
8737 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8738
8739 const int Ops[] = { AMDGPU::OpName::src0,
8740 AMDGPU::OpName::src1,
8741 AMDGPU::OpName::src2 };
8742 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8743 AMDGPU::OpName::src1_modifiers,
8744 AMDGPU::OpName::src2_modifiers };
8745
8746 unsigned OpSel = 0;
8747 unsigned OpSelHi = 0;
8748 unsigned NegLo = 0;
8749 unsigned NegHi = 0;
8750
8751 if (OpSelIdx != -1)
8752 OpSel = Inst.getOperand(OpSelIdx).getImm();
8753
8754 if (OpSelHiIdx != -1)
8755 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8756
8757 if (NegLoIdx != -1)
8758 NegLo = Inst.getOperand(NegLoIdx).getImm();
8759
8760 if (NegHiIdx != -1)
8761 NegHi = Inst.getOperand(NegHiIdx).getImm();
8762
8763 for (int J = 0; J < 3; ++J) {
8764 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8765 if (OpIdx == -1)
8766 break;
8767
8768 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8769
8770 if (ModIdx == -1)
8771 continue;
8772
8773 uint32_t ModVal = 0;
8774
8775 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
8776 if (SrcOp.isReg() && getMRI()
8777 ->getRegClass(AMDGPU::VGPR_16RegClassID)
8778 .contains(SrcOp.getReg())) {
8779 bool VGPRSuffixIsHi = AMDGPU::isHi(SrcOp.getReg(), *getMRI());
8780 if (VGPRSuffixIsHi)
8781 ModVal |= SISrcMods::OP_SEL_0;
8782 } else {
8783 if ((OpSel & (1 << J)) != 0)
8784 ModVal |= SISrcMods::OP_SEL_0;
8785 }
8786
8787 if ((OpSelHi & (1 << J)) != 0)
8788 ModVal |= SISrcMods::OP_SEL_1;
8789
8790 if ((NegLo & (1 << J)) != 0)
8791 ModVal |= SISrcMods::NEG;
8792
8793 if ((NegHi & (1 << J)) != 0)
8794 ModVal |= SISrcMods::NEG_HI;
8795
8796 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8797 }
8798}
8799
8800void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8801 OptionalImmIndexMap OptIdx;
8802 cvtVOP3(Inst, Operands, OptIdx);
8803 cvtVOP3P(Inst, Operands, OptIdx);
8804}
8805
8807 unsigned i, unsigned Opc, unsigned OpName) {
8808 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
8809 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8810 else
8811 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
8812}
8813
8814void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
8815 unsigned Opc = Inst.getOpcode();
8816
8817 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
8818 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
8819 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
8820 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
8821 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
8822
8823 OptionalImmIndexMap OptIdx;
8824 for (unsigned i = 5; i < Operands.size(); ++i) {
8825 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8826 OptIdx[Op.getImmTy()] = i;
8827 }
8828
8829 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
8830 addOptionalImmOperand(Inst, Operands, OptIdx,
8831 AMDGPUOperand::ImmTyIndexKey8bit);
8832
8833 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
8834 addOptionalImmOperand(Inst, Operands, OptIdx,
8835 AMDGPUOperand::ImmTyIndexKey16bit);
8836
8837 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8838 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI);
8839
8840 cvtVOP3P(Inst, Operands, OptIdx);
8841}
8842
8843//===----------------------------------------------------------------------===//
8844// VOPD
8845//===----------------------------------------------------------------------===//
8846
8847ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8848 if (!hasVOPD(getSTI()))
8849 return ParseStatus::NoMatch;
8850
8851 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8852 SMLoc S = getLoc();
8853 lex();
8854 lex();
8855 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8856 SMLoc OpYLoc = getLoc();
8857 StringRef OpYName;
8858 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8859 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8860 return ParseStatus::Success;
8861 }
8862 return Error(OpYLoc, "expected a VOPDY instruction after ::");
8863 }
8864 return ParseStatus::NoMatch;
8865}
8866
8867// Create VOPD MCInst operands using parsed assembler operands.
8868void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8869 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8870 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8871 if (Op.isReg()) {
8872 Op.addRegOperands(Inst, 1);
8873 return;
8874 }
8875 if (Op.isImm()) {
8876 Op.addImmOperands(Inst, 1);
8877 return;
8878 }
8879 llvm_unreachable("Unhandled operand type in cvtVOPD");
8880 };
8881
8882 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8883
8884 // MCInst operands are ordered as follows:
8885 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8886
8887 for (auto CompIdx : VOPD::COMPONENTS) {
8888 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8889 }
8890
8891 for (auto CompIdx : VOPD::COMPONENTS) {
8892 const auto &CInfo = InstInfo[CompIdx];
8893 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8894 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8895 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8896 if (CInfo.hasSrc2Acc())
8897 addOp(CInfo.getIndexOfDstInParsedOperands());
8898 }
8899}
8900
8901//===----------------------------------------------------------------------===//
8902// dpp
8903//===----------------------------------------------------------------------===//
8904
8905bool AMDGPUOperand::isDPP8() const {
8906 return isImmTy(ImmTyDPP8);
8907}
8908
8909bool AMDGPUOperand::isDPPCtrl() const {
8910 using namespace AMDGPU::DPP;
8911
8912 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8913 if (result) {
8914 int64_t Imm = getImm();
8915 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8916 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8917 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8918 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8919 (Imm == DppCtrl::WAVE_SHL1) ||
8920 (Imm == DppCtrl::WAVE_ROL1) ||
8921 (Imm == DppCtrl::WAVE_SHR1) ||
8922 (Imm == DppCtrl::WAVE_ROR1) ||
8923 (Imm == DppCtrl::ROW_MIRROR) ||
8924 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8925 (Imm == DppCtrl::BCAST15) ||
8926 (Imm == DppCtrl::BCAST31) ||
8927 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8928 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8929 }
8930 return false;
8931}
8932
8933//===----------------------------------------------------------------------===//
8934// mAI
8935//===----------------------------------------------------------------------===//
8936
8937bool AMDGPUOperand::isBLGP() const {
8938 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8939}
8940
8941bool AMDGPUOperand::isCBSZ() const {
8942 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8943}
8944
8945bool AMDGPUOperand::isABID() const {
8946 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8947}
8948
8949bool AMDGPUOperand::isS16Imm() const {
8950 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8951}
8952
8953bool AMDGPUOperand::isU16Imm() const {
8954 return isImmLiteral() && isUInt<16>(getImm());
8955}
8956
8957//===----------------------------------------------------------------------===//
8958// dim
8959//===----------------------------------------------------------------------===//
8960
8961bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8962 // We want to allow "dim:1D" etc.,
8963 // but the initial 1 is tokenized as an integer.
8964 std::string Token;
8965 if (isToken(AsmToken::Integer)) {
8966 SMLoc Loc = getToken().getEndLoc();
8967 Token = std::string(getTokenStr());
8968 lex();
8969 if (getLoc() != Loc)
8970 return false;
8971 }
8972
8973 StringRef Suffix;
8974 if (!parseId(Suffix))
8975 return false;
8976 Token += Suffix;
8977
8978 StringRef DimId = Token;
8979 if (DimId.starts_with("SQ_RSRC_IMG_"))
8980 DimId = DimId.drop_front(12);
8981
8983 if (!DimInfo)
8984 return false;
8985
8986 Encoding = DimInfo->Encoding;
8987 return true;
8988}
8989
8990ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8991 if (!isGFX10Plus())
8992 return ParseStatus::NoMatch;
8993
8994 SMLoc S = getLoc();
8995
8996 if (!trySkipId("dim", AsmToken::Colon))
8997 return ParseStatus::NoMatch;
8998
8999 unsigned Encoding;
9000 SMLoc Loc = getLoc();
9001 if (!parseDimId(Encoding))
9002 return Error(Loc, "invalid dim value");
9003
9004 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9005 AMDGPUOperand::ImmTyDim));
9006 return ParseStatus::Success;
9007}
9008
9009//===----------------------------------------------------------------------===//
9010// dpp
9011//===----------------------------------------------------------------------===//
9012
9013ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9014 SMLoc S = getLoc();
9015
9016 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9017 return ParseStatus::NoMatch;
9018
9019 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9020
9021 int64_t Sels[8];
9022
9023 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9024 return ParseStatus::Failure;
9025
9026 for (size_t i = 0; i < 8; ++i) {
9027 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9028 return ParseStatus::Failure;
9029
9030 SMLoc Loc = getLoc();
9031 if (getParser().parseAbsoluteExpression(Sels[i]))
9032 return ParseStatus::Failure;
9033 if (0 > Sels[i] || 7 < Sels[i])
9034 return Error(Loc, "expected a 3-bit value");
9035 }
9036
9037 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9038 return ParseStatus::Failure;
9039
9040 unsigned DPP8 = 0;
9041 for (size_t i = 0; i < 8; ++i)
9042 DPP8 |= (Sels[i] << (i * 3));
9043
9044 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9045 return ParseStatus::Success;
9046}
9047
9048bool
9049AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9050 const OperandVector &Operands) {
9051 if (Ctrl == "row_newbcast")
9052 return isGFX90A();
9053
9054 if (Ctrl == "row_share" ||
9055 Ctrl == "row_xmask")
9056 return isGFX10Plus();
9057
9058 if (Ctrl == "wave_shl" ||
9059 Ctrl == "wave_shr" ||
9060 Ctrl == "wave_rol" ||
9061 Ctrl == "wave_ror" ||
9062 Ctrl == "row_bcast")
9063 return isVI() || isGFX9();
9064
9065 return Ctrl == "row_mirror" ||
9066 Ctrl == "row_half_mirror" ||
9067 Ctrl == "quad_perm" ||
9068 Ctrl == "row_shl" ||
9069 Ctrl == "row_shr" ||
9070 Ctrl == "row_ror";
9071}
9072
9073int64_t
9074AMDGPUAsmParser::parseDPPCtrlPerm() {
9075 // quad_perm:[%d,%d,%d,%d]
9076
9077 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9078 return -1;
9079
9080 int64_t Val = 0;
9081 for (int i = 0; i < 4; ++i) {
9082 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9083 return -1;
9084
9085 int64_t Temp;
9086 SMLoc Loc = getLoc();
9087 if (getParser().parseAbsoluteExpression(Temp))
9088 return -1;
9089 if (Temp < 0 || Temp > 3) {
9090 Error(Loc, "expected a 2-bit value");
9091 return -1;
9092 }
9093
9094 Val += (Temp << i * 2);
9095 }
9096
9097 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9098 return -1;
9099
9100 return Val;
9101}
9102
9103int64_t
9104AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9105 using namespace AMDGPU::DPP;
9106
9107 // sel:%d
9108
9109 int64_t Val;
9110 SMLoc Loc = getLoc();
9111
9112 if (getParser().parseAbsoluteExpression(Val))
9113 return -1;
9114
9115 struct DppCtrlCheck {
9116 int64_t Ctrl;
9117 int Lo;
9118 int Hi;
9119 };
9120
9121 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9122 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9123 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9124 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9125 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9126 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9127 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9128 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9129 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9130 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9131 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9132 .Default({-1, 0, 0});
9133
9134 bool Valid;
9135 if (Check.Ctrl == -1) {
9136 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
9137 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9138 } else {
9139 Valid = Check.Lo <= Val && Val <= Check.Hi;
9140 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
9141 }
9142
9143 if (!Valid) {
9144 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
9145 return -1;
9146 }
9147
9148 return Val;
9149}
9150
9151ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
9152 using namespace AMDGPU::DPP;
9153
9154 if (!isToken(AsmToken::Identifier) ||
9155 !isSupportedDPPCtrl(getTokenStr(), Operands))
9156 return ParseStatus::NoMatch;
9157
9158 SMLoc S = getLoc();
9159 int64_t Val = -1;
9161
9162 parseId(Ctrl);
9163
9164 if (Ctrl == "row_mirror") {
9165 Val = DppCtrl::ROW_MIRROR;
9166 } else if (Ctrl == "row_half_mirror") {
9167 Val = DppCtrl::ROW_HALF_MIRROR;
9168 } else {
9169 if (skipToken(AsmToken::Colon, "expected a colon")) {
9170 if (Ctrl == "quad_perm") {
9171 Val = parseDPPCtrlPerm();
9172 } else {
9173 Val = parseDPPCtrlSel(Ctrl);
9174 }
9175 }
9176 }
9177
9178 if (Val == -1)
9179 return ParseStatus::Failure;
9180
9181 Operands.push_back(
9182 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9183 return ParseStatus::Success;
9184}
9185
9186void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
9187 bool IsDPP8) {
9188 OptionalImmIndexMap OptionalIdx;
9189 unsigned Opc = Inst.getOpcode();
9190 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9191
9192 // MAC instructions are special because they have 'old'
9193 // operand which is not tied to dst (but assumed to be).
9194 // They also have dummy unused src2_modifiers.
9195 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
9196 int Src2ModIdx =
9197 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
9198 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9199 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
9200
9201 unsigned I = 1;
9202 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9203 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9204 }
9205
9206 int Fi = 0;
9207 for (unsigned E = Operands.size(); I != E; ++I) {
9208
9209 if (IsMAC) {
9210 int NumOperands = Inst.getNumOperands();
9211 if (OldIdx == NumOperands) {
9212 // Handle old operand
9213 constexpr int DST_IDX = 0;
9214 Inst.addOperand(Inst.getOperand(DST_IDX));
9215 } else if (Src2ModIdx == NumOperands) {
9216 // Add unused dummy src2_modifiers
9218 }
9219 }
9220
9221 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9222 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
9223 Inst.addOperand(Inst.getOperand(0));
9224 }
9225
9226 bool IsVOP3CvtSrDpp =
9227 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9228 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9229 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9230 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9231 if (IsVOP3CvtSrDpp) {
9232 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
9235 }
9236 }
9237
9238 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9240 if (TiedTo != -1) {
9241 assert((unsigned)TiedTo < Inst.getNumOperands());
9242 // handle tied old or src2 for MAC instructions
9243 Inst.addOperand(Inst.getOperand(TiedTo));
9244 }
9245 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9246 // Add the register arguments
9247 if (IsDPP8 && Op.isDppFI()) {
9248 Fi = Op.getImm();
9249 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9250 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9251 } else if (Op.isReg()) {
9252 Op.addRegOperands(Inst, 1);
9253 } else if (Op.isImm() &&
9254 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
9255 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
9256 Op.addImmOperands(Inst, 1);
9257 } else if (Op.isImm()) {
9258 OptionalIdx[Op.getImmTy()] = I;
9259 } else {
9260 llvm_unreachable("unhandled operand type");
9261 }
9262 }
9263
9264 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel))
9265 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9266 AMDGPUOperand::ImmTyByteSel);
9267
9268 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9269 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
9270
9271 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9272 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
9273
9274 if (Desc.TSFlags & SIInstrFlags::VOP3P)
9275 cvtVOP3P(Inst, Operands, OptionalIdx);
9276 else if (Desc.TSFlags & SIInstrFlags::VOP3)
9277 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
9278 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9279 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9280 }
9281
9282 if (IsDPP8) {
9283 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
9284 using namespace llvm::AMDGPU::DPP;
9285 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9286 } else {
9287 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
9288 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9289 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9290 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9291
9292 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
9293 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9294 AMDGPUOperand::ImmTyDppFI);
9295 }
9296}
9297
9298void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
9299 OptionalImmIndexMap OptionalIdx;
9300
9301 unsigned I = 1;
9302 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9303 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9304 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9305 }
9306
9307 int Fi = 0;
9308 for (unsigned E = Operands.size(); I != E; ++I) {
9309 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9311 if (TiedTo != -1) {
9312 assert((unsigned)TiedTo < Inst.getNumOperands());
9313 // handle tied old or src2 for MAC instructions
9314 Inst.addOperand(Inst.getOperand(TiedTo));
9315 }
9316 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9317 // Add the register arguments
9318 if (Op.isReg() && validateVccOperand(Op.getReg())) {
9319 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
9320 // Skip it.
9321 continue;
9322 }
9323
9324 if (IsDPP8) {
9325 if (Op.isDPP8()) {
9326 Op.addImmOperands(Inst, 1);
9327 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9328 Op.addRegWithFPInputModsOperands(Inst, 2);
9329 } else if (Op.isDppFI()) {
9330 Fi = Op.getImm();
9331 } else if (Op.isReg()) {
9332 Op.addRegOperands(Inst, 1);
9333 } else {
9334 llvm_unreachable("Invalid operand type");
9335 }
9336 } else {
9338 Op.addRegWithFPInputModsOperands(Inst, 2);
9339 } else if (Op.isReg()) {
9340 Op.addRegOperands(Inst, 1);
9341 } else if (Op.isDPPCtrl()) {
9342 Op.addImmOperands(Inst, 1);
9343 } else if (Op.isImm()) {
9344 // Handle optional arguments
9345 OptionalIdx[Op.getImmTy()] = I;
9346 } else {
9347 llvm_unreachable("Invalid operand type");
9348 }
9349 }
9350 }
9351
9352 if (IsDPP8) {
9353 using namespace llvm::AMDGPU::DPP;
9354 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9355 } else {
9356 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9357 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9358 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9359 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
9360 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9361 AMDGPUOperand::ImmTyDppFI);
9362 }
9363 }
9364}
9365
9366//===----------------------------------------------------------------------===//
9367// sdwa
9368//===----------------------------------------------------------------------===//
9369
9370ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9371 StringRef Prefix,
9372 AMDGPUOperand::ImmTy Type) {
9373 using namespace llvm::AMDGPU::SDWA;
9374
9375 SMLoc S = getLoc();
9377
9378 SMLoc StringLoc;
9379 ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
9380 if (!Res.isSuccess())
9381 return Res;
9382
9383 int64_t Int;
9385 .Case("BYTE_0", SdwaSel::BYTE_0)
9386 .Case("BYTE_1", SdwaSel::BYTE_1)
9387 .Case("BYTE_2", SdwaSel::BYTE_2)
9388 .Case("BYTE_3", SdwaSel::BYTE_3)
9389 .Case("WORD_0", SdwaSel::WORD_0)
9390 .Case("WORD_1", SdwaSel::WORD_1)
9391 .Case("DWORD", SdwaSel::DWORD)
9392 .Default(0xffffffff);
9393
9394 if (Int == 0xffffffff)
9395 return Error(StringLoc, "invalid " + Twine(Prefix) + " value");
9396
9397 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
9398 return ParseStatus::Success;
9399}
9400
9401ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9402 using namespace llvm::AMDGPU::SDWA;
9403
9404 SMLoc S = getLoc();
9406
9407 SMLoc StringLoc;
9408 ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc);
9409 if (!Res.isSuccess())
9410 return Res;
9411
9412 int64_t Int;
9414 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
9415 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
9416 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
9417 .Default(0xffffffff);
9418
9419 if (Int == 0xffffffff)
9420 return Error(StringLoc, "invalid dst_unused value");
9421
9422 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused));
9423 return ParseStatus::Success;
9424}
9425
9426void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9427 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
9428}
9429
9430void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9431 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
9432}
9433
9434void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9435 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
9436}
9437
9438void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9439 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
9440}
9441
9442void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9443 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9444}
9445
9446void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9447 uint64_t BasicInstType,
9448 bool SkipDstVcc,
9449 bool SkipSrcVcc) {
9450 using namespace llvm::AMDGPU::SDWA;
9451
9452 OptionalImmIndexMap OptionalIdx;
9453 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9454 bool SkippedVcc = false;
9455
9456 unsigned I = 1;
9457 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9458 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9459 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9460 }
9461
9462 for (unsigned E = Operands.size(); I != E; ++I) {
9463 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9464 if (SkipVcc && !SkippedVcc && Op.isReg() &&
9465 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9466 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9467 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9468 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9469 // Skip VCC only if we didn't skip it on previous iteration.
9470 // Note that src0 and src1 occupy 2 slots each because of modifiers.
9471 if (BasicInstType == SIInstrFlags::VOP2 &&
9472 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9473 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9474 SkippedVcc = true;
9475 continue;
9476 } else if (BasicInstType == SIInstrFlags::VOPC &&
9477 Inst.getNumOperands() == 0) {
9478 SkippedVcc = true;
9479 continue;
9480 }
9481 }
9483 Op.addRegOrImmWithInputModsOperands(Inst, 2);
9484 } else if (Op.isImm()) {
9485 // Handle optional arguments
9486 OptionalIdx[Op.getImmTy()] = I;
9487 } else {
9488 llvm_unreachable("Invalid operand type");
9489 }
9490 SkippedVcc = false;
9491 }
9492
9493 const unsigned Opc = Inst.getOpcode();
9494 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9495 Opc != AMDGPU::V_NOP_sdwa_vi) {
9496 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9497 switch (BasicInstType) {
9498 case SIInstrFlags::VOP1:
9499 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9500 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9501 AMDGPUOperand::ImmTyClampSI, 0);
9502
9503 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9504 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9505 AMDGPUOperand::ImmTyOModSI, 0);
9506
9507 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
9508 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9509 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9510
9511 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
9512 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9513 AMDGPUOperand::ImmTySDWADstUnused,
9514 DstUnused::UNUSED_PRESERVE);
9515
9516 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9517 break;
9518
9519 case SIInstrFlags::VOP2:
9520 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9521
9522 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
9523 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9524
9525 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9526 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
9527 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9528 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9529 break;
9530
9531 case SIInstrFlags::VOPC:
9532 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
9533 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9534 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9535 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9536 break;
9537
9538 default:
9539 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9540 }
9541 }
9542
9543 // special case v_mac_{f16, f32}:
9544 // it has src2 register operand that is tied to dst operand
9545 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9546 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9547 auto it = Inst.begin();
9548 std::advance(
9549 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9550 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9551 }
9552}
9553
9554/// Force static initialization.
9558}
9559
9560#define GET_REGISTER_MATCHER
9561#define GET_MATCHER_IMPLEMENTATION
9562#define GET_MNEMONIC_SPELL_CHECKER
9563#define GET_MNEMONIC_CHECKER
9564#include "AMDGPUGenAsmMatcher.inc"
9565
9566ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9567 unsigned MCK) {
9568 switch (MCK) {
9569 case MCK_addr64:
9570 return parseTokenOp("addr64", Operands);
9571 case MCK_done:
9572 return parseTokenOp("done", Operands);
9573 case MCK_idxen:
9574 return parseTokenOp("idxen", Operands);
9575 case MCK_lds:
9576 return parseTokenOp("lds", Operands);
9577 case MCK_offen:
9578 return parseTokenOp("offen", Operands);
9579 case MCK_off:
9580 return parseTokenOp("off", Operands);
9581 case MCK_row_95_en:
9582 return parseTokenOp("row_en", Operands);
9583 case MCK_gds:
9584 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
9585 case MCK_tfe:
9586 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
9587 }
9588 return tryCustomParseOperand(Operands, MCK);
9589}
9590
9591// This function should be defined after auto-generated include so that we have
9592// MatchClassKind enum defined
9593unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9594 unsigned Kind) {
9595 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9596 // But MatchInstructionImpl() expects to meet token and fails to validate
9597 // operand. This method checks if we are given immediate operand but expect to
9598 // get corresponding token.
9599 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9600 switch (Kind) {
9601 case MCK_addr64:
9602 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9603 case MCK_gds:
9604 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9605 case MCK_lds:
9606 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9607 case MCK_idxen:
9608 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9609 case MCK_offen:
9610 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9611 case MCK_tfe:
9612 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9613 case MCK_SSrc_b32:
9614 // When operands have expression values, they will return true for isToken,
9615 // because it is not possible to distinguish between a token and an
9616 // expression at parse time. MatchInstructionImpl() will always try to
9617 // match an operand as a token, when isToken returns true, and when the
9618 // name of the expression is not a valid token, the match will fail,
9619 // so we need to handle it here.
9620 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9621 case MCK_SSrc_f32:
9622 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9623 case MCK_SOPPBrTarget:
9624 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9625 case MCK_VReg32OrOff:
9626 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9627 case MCK_InterpSlot:
9628 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9629 case MCK_InterpAttr:
9630 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9631 case MCK_InterpAttrChan:
9632 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9633 case MCK_SReg_64:
9634 case MCK_SReg_64_XEXEC:
9635 // Null is defined as a 32-bit register but
9636 // it should also be enabled with 64-bit operands.
9637 // The following code enables it for SReg_64 operands
9638 // used as source and destination. Remaining source
9639 // operands are handled in isInlinableImm.
9640 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9641 default:
9642 return Match_InvalidOperand;
9643 }
9644}
9645
9646//===----------------------------------------------------------------------===//
9647// endpgm
9648//===----------------------------------------------------------------------===//
9649
9650ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9651 SMLoc S = getLoc();
9652 int64_t Imm = 0;
9653
9654 if (!parseExpr(Imm)) {
9655 // The operand is optional, if not present default to 0
9656 Imm = 0;
9657 }
9658
9659 if (!isUInt<16>(Imm))
9660 return Error(S, "expected a 16-bit value");
9661
9662 Operands.push_back(
9663 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9664 return ParseStatus::Success;
9665}
9666
9667bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9668
9669//===----------------------------------------------------------------------===//
9670// LDSDIR
9671//===----------------------------------------------------------------------===//
9672
9673bool AMDGPUOperand::isWaitVDST() const {
9674 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
9675}
9676
9677bool AMDGPUOperand::isWaitVAVDst() const {
9678 return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm());
9679}
9680
9681bool AMDGPUOperand::isWaitVMVSrc() const {
9682 return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm());
9683}
9684
9685//===----------------------------------------------------------------------===//
9686// VINTERP
9687//===----------------------------------------------------------------------===//
9688
9689bool AMDGPUOperand::isWaitEXP() const {
9690 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
9691}
9692
9693//===----------------------------------------------------------------------===//
9694// Split Barrier
9695//===----------------------------------------------------------------------===//
9696
9697bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static unsigned getSpecialRegForName(StringRef RegName)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, unsigned OpName)
static bool IsRevOpcode(const unsigned Opcode)
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_READNONE
Definition: Compiler.h:220
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
@ Default
Definition: DwarfDebug.cpp:87
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
Definition: InlineInfo.cpp:180
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
LLVMContext & Context
#define P(N)
#define G_00B848_FWD_PROGRESS(x)
Definition: SIDefines.h:1157
#define G_00B848_MEM_ORDERED(x)
Definition: SIDefines.h:1154
#define G_00B848_IEEE_MODE(x)
Definition: SIDefines.h:1148
#define G_00B848_DX10_CLAMP(x)
Definition: SIDefines.h:1139
#define G_00B848_WGP_MODE(x)
Definition: SIDefines.h:1151
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
unsigned unsigned DefaultVal
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
static bool Enabled
Definition: Statistic.cpp:46
StringSet - A set-like wrapper for the StringMap.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
BinaryOperator * Mul
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
support::ulittle16_t & Hi
Definition: aarch32.cpp:205
static const AMDGPUVariadicMCExpr * create(VariadicKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5196
Class for arbitrary precision integers.
Definition: APInt.h:76
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmMacro.h:110
bool is(TokenKind K) const
Definition: MCAsmMacro.h:82
TokenKind getKind() const
Definition: MCAsmMacro.h:81
This class represents an Operation in the Expression.
Register getReg() const
Base class for user error types.
Definition: Error.h:352
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:474
Class representing an expression and its matching format.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:123
virtual MCStreamer & getStreamer()=0
Return the output streamer for the assembler.
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
Context object for machine code objects.
Definition: MCContext.h:81
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:455
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:201
const MCSubtargetInfo * getSubtargetInfo() const
Definition: MCContext.h:459
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
SMLoc getLoc() const
Definition: MCInst.h:204
void setLoc(SMLoc loc)
Definition: MCInst.h:203
unsigned getOpcode() const
Definition: MCInst.h:198
iterator insert(iterator I, const MCOperand &Op)
Definition: MCInst.h:224
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
iterator begin()
Definition: MCInst.h:219
size_t size() const
Definition: MCInst.h:218
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:444
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
void setImm(int64_t Val)
Definition: MCInst.h:85
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
void setReg(unsigned Reg)
Set the register number.
Definition: MCInst.h:75
int64_t getImm() const
Definition: MCInst.h:80
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
bool isImm() const
Definition: MCInst.h:62
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
bool isReg() const
Definition: MCInst.h:61
bool isExpr() const
Definition: MCInst.h:65
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual bool isReg() const =0
isReg - Is this a register operand?
virtual bool isMem() const =0
isMem - Is this a memory operand?
virtual MCRegister getReg() const =0
virtual bool isToken() const =0
isToken - Is this a token operand?
virtual bool isImm() const =0
isImm - Is this an immediate operand?
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
Streaming machine code generation interface.
Definition: MCStreamer.h:212
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Definition: MCStreamer.h:304
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:40
void setVariableValue(const MCExpr *Value)
Definition: MCSymbol.cpp:47
MCTargetAsmParser - Generic interface to target specific assembly parsers.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
const MCInstrInfo & MII
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn't/can't handle effec...
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
ParseInstruction - Parse one assembly instruction.
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
MatchAndEmitInstruction - Recognize a series of operands of a parsed instruction as an actual MCInst ...
Target specific streamer interface.
Definition: MCStreamer.h:93
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Represents a location in source code.
Definition: SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
constexpr const char * getPointer() const
Definition: SMLoc.h:34
constexpr bool isValid() const
Definition: SMLoc.h:29
Represents a range in source code.
Definition: SMLoc.h:48
SMLoc Start
Definition: SMLoc.h:50
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:290
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Register getReg() const
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:845
StringMapEntry - This is used to represent one value that is inserted into a StringMap.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition: StringRef.h:647
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:601
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:269
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
bool contains(StringRef key) const
Check if the set contains the given key.
Definition: StringSet.h:55
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:690
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
const CustomOperand< const MCSubtargetInfo & > Opr[]
int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
Key
PAL metadata keys.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
int64_t getMsgOpId(int64_t MsgId, const StringRef Name)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
const CustomOperand< const MCSubtargetInfo & > Msg[]
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned COMPONENTS[]
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool isGFX9(const MCSubtargetInfo &STI)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition: SIDefines.h:201
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:211
@ OPERAND_REG_INLINE_C_V2INT32
Definition: SIDefines.h:227
@ OPERAND_REG_INLINE_C_FP64
Definition: SIDefines.h:223
@ OPERAND_REG_INLINE_C_BF16
Definition: SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2BF16
Definition: SIDefines.h:225
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:212
@ OPERAND_REG_IMM_BF16
Definition: SIDefines.h:205
@ OPERAND_REG_INLINE_AC_V2FP16
Definition: SIDefines.h:246
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
Definition: SIDefines.h:200
@ OPERAND_REG_IMM_V2BF16
Definition: SIDefines.h:210
@ OPERAND_REG_IMM_BF16_DEFERRED
Definition: SIDefines.h:207
@ OPERAND_REG_IMM_FP16
Definition: SIDefines.h:206
@ OPERAND_REG_INLINE_C_INT64
Definition: SIDefines.h:219
@ OPERAND_REG_INLINE_AC_BF16
Definition: SIDefines.h:240
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition: SIDefines.h:217
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
Definition: SIDefines.h:238
@ OPERAND_REG_IMM_FP64
Definition: SIDefines.h:204
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:226
@ OPERAND_REG_INLINE_AC_V2INT16
Definition: SIDefines.h:244
@ OPERAND_REG_INLINE_AC_FP16
Definition: SIDefines.h:241
@ OPERAND_REG_INLINE_AC_INT32
Definition: SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition: SIDefines.h:242
@ OPERAND_REG_INLINE_AC_V2BF16
Definition: SIDefines.h:245
@ OPERAND_REG_IMM_V2INT32
Definition: SIDefines.h:213
@ OPERAND_REG_IMM_FP32
Definition: SIDefines.h:203
@ OPERAND_INPUT_MODS
Definition: SIDefines.h:251
@ OPERAND_REG_INLINE_C_FP32
Definition: SIDefines.h:222
@ OPERAND_REG_INLINE_C_INT32
Definition: SIDefines.h:218
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:224
@ OPERAND_REG_IMM_V2FP32
Definition: SIDefines.h:214
@ OPERAND_REG_INLINE_AC_FP64
Definition: SIDefines.h:243
@ OPERAND_REG_INLINE_C_FP16
Definition: SIDefines.h:221
@ OPERAND_REG_IMM_INT16
Definition: SIDefines.h:202
@ OPERAND_REG_INLINE_C_V2FP32
Definition: SIDefines.h:228
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition: SIDefines.h:231
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isVI(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1336
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:212
const uint64_t Version
Definition: InstrProf.h:1153
@ OPERAND_IMMEDIATE
Definition: MCInstrDesc.h:60
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Reg
All possible values of the reg field in the ModR/M byte.
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
Format
The format used for serializing/deserializing remarks.
Definition: RemarkFormat.h:25
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition: Error.h:1071
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition: Alignment.h:217
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:428
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:239
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
void PrintError(const Twine &Msg)
Definition: Error.cpp:101
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
Target & getTheR600Target()
The target for R600 GPUs.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:138
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:177
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:143
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:244
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
unsigned M0(unsigned Val)
Definition: VE.h:375
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
bool parseAmdKernelCodeField(StringRef ID, MCAsmParser &Parser, amd_kernel_code_t &C, raw_ostream &Err)
#define N
RegisterKind Kind
StringLiteral Name
AMD Kernel Code Object (amd_kernel_code_t).
Instruction set architecture version.
Definition: TargetParser.h:125
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
Represents the counter values to wait for in an s_waitcnt instruction.
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:249
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:247
static const fltSemantics & BFloat() LLVM_READNONE
Definition: APFloat.cpp:248
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:246
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...