LLVM 19.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
14#include "SIDefines.h"
15#include "SIInstrInfo.h"
16#include "SIRegisterInfo.h"
21#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/StringSet.h"
24#include "llvm/ADT/Twine.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCContext.h"
29#include "llvm/MC/MCExpr.h"
30#include "llvm/MC/MCInst.h"
31#include "llvm/MC/MCInstrDesc.h"
36#include "llvm/MC/MCSymbol.h"
43#include <optional>
44
45using namespace llvm;
46using namespace llvm::AMDGPU;
47using namespace llvm::amdhsa;
48
49namespace {
50
51class AMDGPUAsmParser;
52
53enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
54
55//===----------------------------------------------------------------------===//
56// Operand
57//===----------------------------------------------------------------------===//
58
59class AMDGPUOperand : public MCParsedAsmOperand {
60 enum KindTy {
61 Token,
62 Immediate,
65 } Kind;
66
67 SMLoc StartLoc, EndLoc;
68 const AMDGPUAsmParser *AsmParser;
69
70public:
71 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
72 : Kind(Kind_), AsmParser(AsmParser_) {}
73
74 using Ptr = std::unique_ptr<AMDGPUOperand>;
75
76 struct Modifiers {
77 bool Abs = false;
78 bool Neg = false;
79 bool Sext = false;
80 bool Lit = false;
81
82 bool hasFPModifiers() const { return Abs || Neg; }
83 bool hasIntModifiers() const { return Sext; }
84 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
85
86 int64_t getFPModifiersOperand() const {
87 int64_t Operand = 0;
88 Operand |= Abs ? SISrcMods::ABS : 0u;
89 Operand |= Neg ? SISrcMods::NEG : 0u;
90 return Operand;
91 }
92
93 int64_t getIntModifiersOperand() const {
94 int64_t Operand = 0;
95 Operand |= Sext ? SISrcMods::SEXT : 0u;
96 return Operand;
97 }
98
99 int64_t getModifiersOperand() const {
100 assert(!(hasFPModifiers() && hasIntModifiers())
101 && "fp and int modifiers should not be used simultaneously");
102 if (hasFPModifiers()) {
103 return getFPModifiersOperand();
104 } else if (hasIntModifiers()) {
105 return getIntModifiersOperand();
106 } else {
107 return 0;
108 }
109 }
110
111 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
112 };
113
114 enum ImmTy {
115 ImmTyNone,
116 ImmTyGDS,
117 ImmTyLDS,
118 ImmTyOffen,
119 ImmTyIdxen,
120 ImmTyAddr64,
121 ImmTyOffset,
122 ImmTyInstOffset,
123 ImmTyOffset0,
124 ImmTyOffset1,
125 ImmTySMEMOffsetMod,
126 ImmTyCPol,
127 ImmTyTFE,
128 ImmTyD16,
129 ImmTyClampSI,
130 ImmTyOModSI,
131 ImmTySDWADstSel,
132 ImmTySDWASrc0Sel,
133 ImmTySDWASrc1Sel,
134 ImmTySDWADstUnused,
135 ImmTyDMask,
136 ImmTyDim,
137 ImmTyUNorm,
138 ImmTyDA,
139 ImmTyR128A16,
140 ImmTyA16,
141 ImmTyLWE,
142 ImmTyExpTgt,
143 ImmTyExpCompr,
144 ImmTyExpVM,
145 ImmTyFORMAT,
146 ImmTyHwreg,
147 ImmTyOff,
148 ImmTySendMsg,
149 ImmTyInterpSlot,
150 ImmTyInterpAttr,
151 ImmTyInterpAttrChan,
152 ImmTyOpSel,
153 ImmTyOpSelHi,
154 ImmTyNegLo,
155 ImmTyNegHi,
156 ImmTyIndexKey8bit,
157 ImmTyIndexKey16bit,
158 ImmTyDPP8,
159 ImmTyDppCtrl,
160 ImmTyDppRowMask,
161 ImmTyDppBankMask,
162 ImmTyDppBoundCtrl,
163 ImmTyDppFI,
164 ImmTySwizzle,
165 ImmTyGprIdxMode,
166 ImmTyHigh,
167 ImmTyBLGP,
168 ImmTyCBSZ,
169 ImmTyABID,
170 ImmTyEndpgm,
171 ImmTyWaitVDST,
172 ImmTyWaitEXP,
173 ImmTyWaitVAVDst,
174 ImmTyWaitVMVSrc,
175 };
176
177 // Immediate operand kind.
178 // It helps to identify the location of an offending operand after an error.
179 // Note that regular literals and mandatory literals (KImm) must be handled
180 // differently. When looking for an offending operand, we should usually
181 // ignore mandatory literals because they are part of the instruction and
182 // cannot be changed. Report location of mandatory operands only for VOPD,
183 // when both OpX and OpY have a KImm and there are no other literals.
184 enum ImmKindTy {
185 ImmKindTyNone,
186 ImmKindTyLiteral,
187 ImmKindTyMandatoryLiteral,
188 ImmKindTyConst,
189 };
190
191private:
192 struct TokOp {
193 const char *Data;
194 unsigned Length;
195 };
196
197 struct ImmOp {
198 int64_t Val;
199 ImmTy Type;
200 bool IsFPImm;
201 mutable ImmKindTy Kind;
202 Modifiers Mods;
203 };
204
205 struct RegOp {
206 unsigned RegNo;
207 Modifiers Mods;
208 };
209
210 union {
211 TokOp Tok;
212 ImmOp Imm;
213 RegOp Reg;
214 const MCExpr *Expr;
215 };
216
217public:
218 bool isToken() const override { return Kind == Token; }
219
220 bool isSymbolRefExpr() const {
221 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
222 }
223
224 bool isImm() const override {
225 return Kind == Immediate;
226 }
227
228 void setImmKindNone() const {
229 assert(isImm());
230 Imm.Kind = ImmKindTyNone;
231 }
232
233 void setImmKindLiteral() const {
234 assert(isImm());
235 Imm.Kind = ImmKindTyLiteral;
236 }
237
238 void setImmKindMandatoryLiteral() const {
239 assert(isImm());
240 Imm.Kind = ImmKindTyMandatoryLiteral;
241 }
242
243 void setImmKindConst() const {
244 assert(isImm());
245 Imm.Kind = ImmKindTyConst;
246 }
247
248 bool IsImmKindLiteral() const {
249 return isImm() && Imm.Kind == ImmKindTyLiteral;
250 }
251
252 bool IsImmKindMandatoryLiteral() const {
253 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
254 }
255
256 bool isImmKindConst() const {
257 return isImm() && Imm.Kind == ImmKindTyConst;
258 }
259
260 bool isInlinableImm(MVT type) const;
261 bool isLiteralImm(MVT type) const;
262
263 bool isRegKind() const {
264 return Kind == Register;
265 }
266
267 bool isReg() const override {
268 return isRegKind() && !hasModifiers();
269 }
270
271 bool isRegOrInline(unsigned RCID, MVT type) const {
272 return isRegClass(RCID) || isInlinableImm(type);
273 }
274
275 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
276 return isRegOrInline(RCID, type) || isLiteralImm(type);
277 }
278
279 bool isRegOrImmWithInt16InputMods() const {
280 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
281 }
282
283 bool isRegOrImmWithIntT16InputMods() const {
284 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
285 }
286
287 bool isRegOrImmWithInt32InputMods() const {
288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
289 }
290
291 bool isRegOrInlineImmWithInt16InputMods() const {
292 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
293 }
294
295 bool isRegOrInlineImmWithInt32InputMods() const {
296 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
297 }
298
299 bool isRegOrImmWithInt64InputMods() const {
300 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
301 }
302
303 bool isRegOrImmWithFP16InputMods() const {
304 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
305 }
306
307 bool isRegOrImmWithFPT16InputMods() const {
308 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
309 }
310
311 bool isRegOrImmWithFP32InputMods() const {
312 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
313 }
314
315 bool isRegOrImmWithFP64InputMods() const {
316 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
317 }
318
319 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
320 return isRegOrInline(
321 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
322 }
323
324 bool isRegOrInlineImmWithFP32InputMods() const {
325 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
326 }
327
328 bool isPackedFP16InputMods() const {
329 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
330 }
331
332 bool isVReg() const {
333 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
334 isRegClass(AMDGPU::VReg_64RegClassID) ||
335 isRegClass(AMDGPU::VReg_96RegClassID) ||
336 isRegClass(AMDGPU::VReg_128RegClassID) ||
337 isRegClass(AMDGPU::VReg_160RegClassID) ||
338 isRegClass(AMDGPU::VReg_192RegClassID) ||
339 isRegClass(AMDGPU::VReg_256RegClassID) ||
340 isRegClass(AMDGPU::VReg_512RegClassID) ||
341 isRegClass(AMDGPU::VReg_1024RegClassID);
342 }
343
344 bool isVReg32() const {
345 return isRegClass(AMDGPU::VGPR_32RegClassID);
346 }
347
348 bool isVReg32OrOff() const {
349 return isOff() || isVReg32();
350 }
351
352 bool isNull() const {
353 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
354 }
355
356 bool isVRegWithInputMods() const;
357 template <bool IsFake16> bool isT16VRegWithInputMods() const;
358
359 bool isSDWAOperand(MVT type) const;
360 bool isSDWAFP16Operand() const;
361 bool isSDWAFP32Operand() const;
362 bool isSDWAInt16Operand() const;
363 bool isSDWAInt32Operand() const;
364
365 bool isImmTy(ImmTy ImmT) const {
366 return isImm() && Imm.Type == ImmT;
367 }
368
369 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
370
371 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
372
373 bool isImmModifier() const {
374 return isImm() && Imm.Type != ImmTyNone;
375 }
376
377 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
378 bool isDMask() const { return isImmTy(ImmTyDMask); }
379 bool isDim() const { return isImmTy(ImmTyDim); }
380 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
381 bool isOff() const { return isImmTy(ImmTyOff); }
382 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
383 bool isOffen() const { return isImmTy(ImmTyOffen); }
384 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
385 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
386 bool isOffset() const { return isImmTy(ImmTyOffset); }
387 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
388 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
389 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
390 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
391 bool isGDS() const { return isImmTy(ImmTyGDS); }
392 bool isLDS() const { return isImmTy(ImmTyLDS); }
393 bool isCPol() const { return isImmTy(ImmTyCPol); }
394 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
395 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
396 bool isTFE() const { return isImmTy(ImmTyTFE); }
397 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
398 bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); }
399 bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); }
400 bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
401 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
402 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
403 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
404 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
405 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
406 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
407 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
408 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
409 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
410 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
411 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
412 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
413
414 bool isRegOrImm() const {
415 return isReg() || isImm();
416 }
417
418 bool isRegClass(unsigned RCID) const;
419
420 bool isInlineValue() const;
421
422 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
423 return isRegOrInline(RCID, type) && !hasModifiers();
424 }
425
426 bool isSCSrcB16() const {
427 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
428 }
429
430 bool isSCSrcV2B16() const {
431 return isSCSrcB16();
432 }
433
434 bool isSCSrc_b32() const {
435 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
436 }
437
438 bool isSCSrc_b64() const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
440 }
441
442 bool isBoolReg() const;
443
444 bool isSCSrcF16() const {
445 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
446 }
447
448 bool isSCSrcV2F16() const {
449 return isSCSrcF16();
450 }
451
452 bool isSCSrcF32() const {
453 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
454 }
455
456 bool isSCSrcF64() const {
457 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
458 }
459
460 bool isSSrc_b32() const {
461 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
462 }
463
464 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
465
466 bool isSSrcV2B16() const {
467 llvm_unreachable("cannot happen");
468 return isSSrc_b16();
469 }
470
471 bool isSSrc_b64() const {
472 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
473 // See isVSrc64().
474 return isSCSrc_b64() || isLiteralImm(MVT::i64);
475 }
476
477 bool isSSrc_f32() const {
478 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
479 }
480
481 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
482
483 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
484
485 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
486
487 bool isSSrcV2F16() const {
488 llvm_unreachable("cannot happen");
489 return isSSrc_f16();
490 }
491
492 bool isSSrcV2FP32() const {
493 llvm_unreachable("cannot happen");
494 return isSSrc_f32();
495 }
496
497 bool isSCSrcV2FP32() const {
498 llvm_unreachable("cannot happen");
499 return isSCSrcF32();
500 }
501
502 bool isSSrcV2INT32() const {
503 llvm_unreachable("cannot happen");
504 return isSSrc_b32();
505 }
506
507 bool isSCSrcV2INT32() const {
508 llvm_unreachable("cannot happen");
509 return isSCSrc_b32();
510 }
511
512 bool isSSrcOrLds_b32() const {
513 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
514 isLiteralImm(MVT::i32) || isExpr();
515 }
516
517 bool isVCSrc_b32() const {
518 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
519 }
520
521 bool isVCSrcB64() const {
522 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
523 }
524
525 bool isVCSrcTB16() const {
526 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
527 }
528
529 bool isVCSrcTB16_Lo128() const {
530 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
531 }
532
533 bool isVCSrcFake16B16_Lo128() const {
534 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
535 }
536
537 bool isVCSrc_b16() const {
538 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
539 }
540
541 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
542
543 bool isVCSrc_f32() const {
544 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
545 }
546
547 bool isVCSrcF64() const {
548 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
549 }
550
551 bool isVCSrcTBF16() const {
552 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
553 }
554
555 bool isVCSrcTF16() const {
556 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
557 }
558
559 bool isVCSrcTBF16_Lo128() const {
560 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
561 }
562
563 bool isVCSrcTF16_Lo128() const {
564 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
565 }
566
567 bool isVCSrcFake16BF16_Lo128() const {
568 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
569 }
570
571 bool isVCSrcFake16F16_Lo128() const {
572 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
573 }
574
575 bool isVCSrc_bf16() const {
576 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
577 }
578
579 bool isVCSrc_f16() const {
580 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
581 }
582
583 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
584
585 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
586
587 bool isVSrc_b32() const {
588 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
589 }
590
591 bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
592
593 bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
594
595 bool isVSrcT_b16_Lo128() const {
596 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
597 }
598
599 bool isVSrcFake16_b16_Lo128() const {
600 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
601 }
602
603 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
604
605 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
606
607 bool isVCSrcV2FP32() const {
608 return isVCSrcF64();
609 }
610
611 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
612
613 bool isVCSrcV2INT32() const {
614 return isVCSrcB64();
615 }
616
617 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
618
619 bool isVSrc_f32() const {
620 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
621 }
622
623 bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
624
625 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
626
627 bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
628
629 bool isVSrcT_bf16_Lo128() const {
630 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
631 }
632
633 bool isVSrcT_f16_Lo128() const {
634 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
635 }
636
637 bool isVSrcFake16_bf16_Lo128() const {
638 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
639 }
640
641 bool isVSrcFake16_f16_Lo128() const {
642 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
643 }
644
645 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
646
647 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
648
649 bool isVSrc_v2bf16() const {
650 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
651 }
652
653 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
654
655 bool isVISrcB32() const {
656 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
657 }
658
659 bool isVISrcB16() const {
660 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
661 }
662
663 bool isVISrcV2B16() const {
664 return isVISrcB16();
665 }
666
667 bool isVISrcF32() const {
668 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
669 }
670
671 bool isVISrcF16() const {
672 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
673 }
674
675 bool isVISrcV2F16() const {
676 return isVISrcF16() || isVISrcB32();
677 }
678
679 bool isVISrc_64_bf16() const {
680 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
681 }
682
683 bool isVISrc_64_f16() const {
684 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
685 }
686
687 bool isVISrc_64_b32() const {
688 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
689 }
690
691 bool isVISrc_64B64() const {
692 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
693 }
694
695 bool isVISrc_64_f64() const {
696 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
697 }
698
699 bool isVISrc_64V2FP32() const {
700 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
701 }
702
703 bool isVISrc_64V2INT32() const {
704 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
705 }
706
707 bool isVISrc_256_b32() const {
708 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
709 }
710
711 bool isVISrc_256_f32() const {
712 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
713 }
714
715 bool isVISrc_256B64() const {
716 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
717 }
718
719 bool isVISrc_256_f64() const {
720 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
721 }
722
723 bool isVISrc_128B16() const {
724 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
725 }
726
727 bool isVISrc_128V2B16() const {
728 return isVISrc_128B16();
729 }
730
731 bool isVISrc_128_b32() const {
732 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
733 }
734
735 bool isVISrc_128_f32() const {
736 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
737 }
738
739 bool isVISrc_256V2FP32() const {
740 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
741 }
742
743 bool isVISrc_256V2INT32() const {
744 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
745 }
746
747 bool isVISrc_512_b32() const {
748 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
749 }
750
751 bool isVISrc_512B16() const {
752 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
753 }
754
755 bool isVISrc_512V2B16() const {
756 return isVISrc_512B16();
757 }
758
759 bool isVISrc_512_f32() const {
760 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
761 }
762
763 bool isVISrc_512F16() const {
764 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
765 }
766
767 bool isVISrc_512V2F16() const {
768 return isVISrc_512F16() || isVISrc_512_b32();
769 }
770
771 bool isVISrc_1024_b32() const {
772 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
773 }
774
775 bool isVISrc_1024B16() const {
776 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
777 }
778
779 bool isVISrc_1024V2B16() const {
780 return isVISrc_1024B16();
781 }
782
783 bool isVISrc_1024_f32() const {
784 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
785 }
786
787 bool isVISrc_1024F16() const {
788 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
789 }
790
791 bool isVISrc_1024V2F16() const {
792 return isVISrc_1024F16() || isVISrc_1024_b32();
793 }
794
795 bool isAISrcB32() const {
796 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
797 }
798
799 bool isAISrcB16() const {
800 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
801 }
802
803 bool isAISrcV2B16() const {
804 return isAISrcB16();
805 }
806
807 bool isAISrcF32() const {
808 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
809 }
810
811 bool isAISrcF16() const {
812 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
813 }
814
815 bool isAISrcV2F16() const {
816 return isAISrcF16() || isAISrcB32();
817 }
818
819 bool isAISrc_64B64() const {
820 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
821 }
822
823 bool isAISrc_64_f64() const {
824 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
825 }
826
827 bool isAISrc_128_b32() const {
828 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
829 }
830
831 bool isAISrc_128B16() const {
832 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
833 }
834
835 bool isAISrc_128V2B16() const {
836 return isAISrc_128B16();
837 }
838
839 bool isAISrc_128_f32() const {
840 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
841 }
842
843 bool isAISrc_128F16() const {
844 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
845 }
846
847 bool isAISrc_128V2F16() const {
848 return isAISrc_128F16() || isAISrc_128_b32();
849 }
850
851 bool isVISrc_128_bf16() const {
852 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
853 }
854
855 bool isVISrc_128_f16() const {
856 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
857 }
858
859 bool isVISrc_128V2F16() const {
860 return isVISrc_128_f16() || isVISrc_128_b32();
861 }
862
863 bool isAISrc_256B64() const {
864 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
865 }
866
867 bool isAISrc_256_f64() const {
868 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
869 }
870
871 bool isAISrc_512_b32() const {
872 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
873 }
874
875 bool isAISrc_512B16() const {
876 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
877 }
878
879 bool isAISrc_512V2B16() const {
880 return isAISrc_512B16();
881 }
882
883 bool isAISrc_512_f32() const {
884 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
885 }
886
887 bool isAISrc_512F16() const {
888 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
889 }
890
891 bool isAISrc_512V2F16() const {
892 return isAISrc_512F16() || isAISrc_512_b32();
893 }
894
895 bool isAISrc_1024_b32() const {
896 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
897 }
898
899 bool isAISrc_1024B16() const {
900 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
901 }
902
903 bool isAISrc_1024V2B16() const {
904 return isAISrc_1024B16();
905 }
906
907 bool isAISrc_1024_f32() const {
908 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
909 }
910
911 bool isAISrc_1024F16() const {
912 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
913 }
914
915 bool isAISrc_1024V2F16() const {
916 return isAISrc_1024F16() || isAISrc_1024_b32();
917 }
918
919 bool isKImmFP32() const {
920 return isLiteralImm(MVT::f32);
921 }
922
923 bool isKImmFP16() const {
924 return isLiteralImm(MVT::f16);
925 }
926
927 bool isMem() const override {
928 return false;
929 }
930
931 bool isExpr() const {
932 return Kind == Expression;
933 }
934
935 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
936
937 bool isSWaitCnt() const;
938 bool isDepCtr() const;
939 bool isSDelayALU() const;
940 bool isHwreg() const;
941 bool isSendMsg() const;
942 bool isSplitBarrier() const;
943 bool isSwizzle() const;
944 bool isSMRDOffset8() const;
945 bool isSMEMOffset() const;
946 bool isSMRDLiteralOffset() const;
947 bool isDPP8() const;
948 bool isDPPCtrl() const;
949 bool isBLGP() const;
950 bool isCBSZ() const;
951 bool isABID() const;
952 bool isGPRIdxMode() const;
953 bool isS16Imm() const;
954 bool isU16Imm() const;
955 bool isEndpgm() const;
956 bool isWaitVDST() const;
957 bool isWaitEXP() const;
958 bool isWaitVAVDst() const;
959 bool isWaitVMVSrc() const;
960
961 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
962 return std::bind(P, *this);
963 }
964
965 StringRef getToken() const {
966 assert(isToken());
967 return StringRef(Tok.Data, Tok.Length);
968 }
969
970 int64_t getImm() const {
971 assert(isImm());
972 return Imm.Val;
973 }
974
975 void setImm(int64_t Val) {
976 assert(isImm());
977 Imm.Val = Val;
978 }
979
980 ImmTy getImmTy() const {
981 assert(isImm());
982 return Imm.Type;
983 }
984
985 MCRegister getReg() const override {
986 assert(isRegKind());
987 return Reg.RegNo;
988 }
989
990 SMLoc getStartLoc() const override {
991 return StartLoc;
992 }
993
994 SMLoc getEndLoc() const override {
995 return EndLoc;
996 }
997
998 SMRange getLocRange() const {
999 return SMRange(StartLoc, EndLoc);
1000 }
1001
1002 Modifiers getModifiers() const {
1003 assert(isRegKind() || isImmTy(ImmTyNone));
1004 return isRegKind() ? Reg.Mods : Imm.Mods;
1005 }
1006
1007 void setModifiers(Modifiers Mods) {
1008 assert(isRegKind() || isImmTy(ImmTyNone));
1009 if (isRegKind())
1010 Reg.Mods = Mods;
1011 else
1012 Imm.Mods = Mods;
1013 }
1014
1015 bool hasModifiers() const {
1016 return getModifiers().hasModifiers();
1017 }
1018
1019 bool hasFPModifiers() const {
1020 return getModifiers().hasFPModifiers();
1021 }
1022
1023 bool hasIntModifiers() const {
1024 return getModifiers().hasIntModifiers();
1025 }
1026
1027 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1028
1029 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1030
1031 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1032
1033 void addRegOperands(MCInst &Inst, unsigned N) const;
1034
1035 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1036 if (isRegKind())
1037 addRegOperands(Inst, N);
1038 else
1039 addImmOperands(Inst, N);
1040 }
1041
1042 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1043 Modifiers Mods = getModifiers();
1044 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1045 if (isRegKind()) {
1046 addRegOperands(Inst, N);
1047 } else {
1048 addImmOperands(Inst, N, false);
1049 }
1050 }
1051
1052 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1053 assert(!hasIntModifiers());
1054 addRegOrImmWithInputModsOperands(Inst, N);
1055 }
1056
1057 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1058 assert(!hasFPModifiers());
1059 addRegOrImmWithInputModsOperands(Inst, N);
1060 }
1061
1062 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1063 Modifiers Mods = getModifiers();
1064 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1065 assert(isRegKind());
1066 addRegOperands(Inst, N);
1067 }
1068
1069 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1070 assert(!hasIntModifiers());
1071 addRegWithInputModsOperands(Inst, N);
1072 }
1073
1074 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1075 assert(!hasFPModifiers());
1076 addRegWithInputModsOperands(Inst, N);
1077 }
1078
1079 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1080 // clang-format off
1081 switch (Type) {
1082 case ImmTyNone: OS << "None"; break;
1083 case ImmTyGDS: OS << "GDS"; break;
1084 case ImmTyLDS: OS << "LDS"; break;
1085 case ImmTyOffen: OS << "Offen"; break;
1086 case ImmTyIdxen: OS << "Idxen"; break;
1087 case ImmTyAddr64: OS << "Addr64"; break;
1088 case ImmTyOffset: OS << "Offset"; break;
1089 case ImmTyInstOffset: OS << "InstOffset"; break;
1090 case ImmTyOffset0: OS << "Offset0"; break;
1091 case ImmTyOffset1: OS << "Offset1"; break;
1092 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1093 case ImmTyCPol: OS << "CPol"; break;
1094 case ImmTyIndexKey8bit: OS << "index_key"; break;
1095 case ImmTyIndexKey16bit: OS << "index_key"; break;
1096 case ImmTyTFE: OS << "TFE"; break;
1097 case ImmTyD16: OS << "D16"; break;
1098 case ImmTyFORMAT: OS << "FORMAT"; break;
1099 case ImmTyClampSI: OS << "ClampSI"; break;
1100 case ImmTyOModSI: OS << "OModSI"; break;
1101 case ImmTyDPP8: OS << "DPP8"; break;
1102 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1103 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1104 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1105 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1106 case ImmTyDppFI: OS << "DppFI"; break;
1107 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1108 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1109 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1110 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1111 case ImmTyDMask: OS << "DMask"; break;
1112 case ImmTyDim: OS << "Dim"; break;
1113 case ImmTyUNorm: OS << "UNorm"; break;
1114 case ImmTyDA: OS << "DA"; break;
1115 case ImmTyR128A16: OS << "R128A16"; break;
1116 case ImmTyA16: OS << "A16"; break;
1117 case ImmTyLWE: OS << "LWE"; break;
1118 case ImmTyOff: OS << "Off"; break;
1119 case ImmTyExpTgt: OS << "ExpTgt"; break;
1120 case ImmTyExpCompr: OS << "ExpCompr"; break;
1121 case ImmTyExpVM: OS << "ExpVM"; break;
1122 case ImmTyHwreg: OS << "Hwreg"; break;
1123 case ImmTySendMsg: OS << "SendMsg"; break;
1124 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1125 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1126 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1127 case ImmTyOpSel: OS << "OpSel"; break;
1128 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1129 case ImmTyNegLo: OS << "NegLo"; break;
1130 case ImmTyNegHi: OS << "NegHi"; break;
1131 case ImmTySwizzle: OS << "Swizzle"; break;
1132 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1133 case ImmTyHigh: OS << "High"; break;
1134 case ImmTyBLGP: OS << "BLGP"; break;
1135 case ImmTyCBSZ: OS << "CBSZ"; break;
1136 case ImmTyABID: OS << "ABID"; break;
1137 case ImmTyEndpgm: OS << "Endpgm"; break;
1138 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1139 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1140 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1141 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1142 }
1143 // clang-format on
1144 }
1145
1146 void print(raw_ostream &OS) const override {
1147 switch (Kind) {
1148 case Register:
1149 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1150 break;
1151 case Immediate:
1152 OS << '<' << getImm();
1153 if (getImmTy() != ImmTyNone) {
1154 OS << " type: "; printImmTy(OS, getImmTy());
1155 }
1156 OS << " mods: " << Imm.Mods << '>';
1157 break;
1158 case Token:
1159 OS << '\'' << getToken() << '\'';
1160 break;
1161 case Expression:
1162 OS << "<expr " << *Expr << '>';
1163 break;
1164 }
1165 }
1166
1167 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1168 int64_t Val, SMLoc Loc,
1169 ImmTy Type = ImmTyNone,
1170 bool IsFPImm = false) {
1171 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1172 Op->Imm.Val = Val;
1173 Op->Imm.IsFPImm = IsFPImm;
1174 Op->Imm.Kind = ImmKindTyNone;
1175 Op->Imm.Type = Type;
1176 Op->Imm.Mods = Modifiers();
1177 Op->StartLoc = Loc;
1178 Op->EndLoc = Loc;
1179 return Op;
1180 }
1181
1182 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1183 StringRef Str, SMLoc Loc,
1184 bool HasExplicitEncodingSize = true) {
1185 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1186 Res->Tok.Data = Str.data();
1187 Res->Tok.Length = Str.size();
1188 Res->StartLoc = Loc;
1189 Res->EndLoc = Loc;
1190 return Res;
1191 }
1192
1193 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1194 unsigned RegNo, SMLoc S,
1195 SMLoc E) {
1196 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1197 Op->Reg.RegNo = RegNo;
1198 Op->Reg.Mods = Modifiers();
1199 Op->StartLoc = S;
1200 Op->EndLoc = E;
1201 return Op;
1202 }
1203
1204 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1205 const class MCExpr *Expr, SMLoc S) {
1206 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1207 Op->Expr = Expr;
1208 Op->StartLoc = S;
1209 Op->EndLoc = S;
1210 return Op;
1211 }
1212};
1213
1214raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1215 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1216 return OS;
1217}
1218
1219//===----------------------------------------------------------------------===//
1220// AsmParser
1221//===----------------------------------------------------------------------===//
1222
1223// Holds info related to the current kernel, e.g. count of SGPRs used.
1224// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1225// .amdgpu_hsa_kernel or at EOF.
1226class KernelScopeInfo {
1227 int SgprIndexUnusedMin = -1;
1228 int VgprIndexUnusedMin = -1;
1229 int AgprIndexUnusedMin = -1;
1230 MCContext *Ctx = nullptr;
1231 MCSubtargetInfo const *MSTI = nullptr;
1232
1233 void usesSgprAt(int i) {
1234 if (i >= SgprIndexUnusedMin) {
1235 SgprIndexUnusedMin = ++i;
1236 if (Ctx) {
1237 MCSymbol* const Sym =
1238 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1239 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1240 }
1241 }
1242 }
1243
1244 void usesVgprAt(int i) {
1245 if (i >= VgprIndexUnusedMin) {
1246 VgprIndexUnusedMin = ++i;
1247 if (Ctx) {
1248 MCSymbol* const Sym =
1249 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1250 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1251 VgprIndexUnusedMin);
1252 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1253 }
1254 }
1255 }
1256
1257 void usesAgprAt(int i) {
1258 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1259 if (!hasMAIInsts(*MSTI))
1260 return;
1261
1262 if (i >= AgprIndexUnusedMin) {
1263 AgprIndexUnusedMin = ++i;
1264 if (Ctx) {
1265 MCSymbol* const Sym =
1266 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1267 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1268
1269 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1270 MCSymbol* const vSym =
1271 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1272 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1273 VgprIndexUnusedMin);
1274 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1275 }
1276 }
1277 }
1278
1279public:
1280 KernelScopeInfo() = default;
1281
1282 void initialize(MCContext &Context) {
1283 Ctx = &Context;
1284 MSTI = Ctx->getSubtargetInfo();
1285
1286 usesSgprAt(SgprIndexUnusedMin = -1);
1287 usesVgprAt(VgprIndexUnusedMin = -1);
1288 if (hasMAIInsts(*MSTI)) {
1289 usesAgprAt(AgprIndexUnusedMin = -1);
1290 }
1291 }
1292
1293 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1294 unsigned RegWidth) {
1295 switch (RegKind) {
1296 case IS_SGPR:
1297 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1298 break;
1299 case IS_AGPR:
1300 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1301 break;
1302 case IS_VGPR:
1303 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1304 break;
1305 default:
1306 break;
1307 }
1308 }
1309};
1310
1311class AMDGPUAsmParser : public MCTargetAsmParser {
1312 MCAsmParser &Parser;
1313
1314 unsigned ForcedEncodingSize = 0;
1315 bool ForcedDPP = false;
1316 bool ForcedSDWA = false;
1317 KernelScopeInfo KernelScope;
1318
1319 /// @name Auto-generated Match Functions
1320 /// {
1321
1322#define GET_ASSEMBLER_HEADER
1323#include "AMDGPUGenAsmMatcher.inc"
1324
1325 /// }
1326
1327private:
1328 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1329 bool OutOfRangeError(SMRange Range);
1330 /// Calculate VGPR/SGPR blocks required for given target, reserved
1331 /// registers, and user-specified NextFreeXGPR values.
1332 ///
1333 /// \param Features [in] Target features, used for bug corrections.
1334 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1335 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1336 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1337 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1338 /// descriptor field, if valid.
1339 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1340 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1341 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1342 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1343 /// \param VGPRBlocks [out] Result VGPR block count.
1344 /// \param SGPRBlocks [out] Result SGPR block count.
1345 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1346 bool FlatScrUsed, bool XNACKUsed,
1347 std::optional<bool> EnableWavefrontSize32,
1348 unsigned NextFreeVGPR, SMRange VGPRRange,
1349 unsigned NextFreeSGPR, SMRange SGPRRange,
1350 unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1351 bool ParseDirectiveAMDGCNTarget();
1352 bool ParseDirectiveAMDHSACodeObjectVersion();
1353 bool ParseDirectiveAMDHSAKernel();
1354 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1355 bool ParseDirectiveAMDKernelCodeT();
1356 // TODO: Possibly make subtargetHasRegister const.
1357 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1358 bool ParseDirectiveAMDGPUHsaKernel();
1359
1360 bool ParseDirectiveISAVersion();
1361 bool ParseDirectiveHSAMetadata();
1362 bool ParseDirectivePALMetadataBegin();
1363 bool ParseDirectivePALMetadata();
1364 bool ParseDirectiveAMDGPULDS();
1365
1366 /// Common code to parse out a block of text (typically YAML) between start and
1367 /// end directives.
1368 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1369 const char *AssemblerDirectiveEnd,
1370 std::string &CollectString);
1371
1372 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1373 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1374 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1375 unsigned &RegNum, unsigned &RegWidth,
1376 bool RestoreOnFailure = false);
1377 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1378 unsigned &RegNum, unsigned &RegWidth,
1380 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1381 unsigned &RegWidth,
1383 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1384 unsigned &RegWidth,
1386 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1387 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1388 bool ParseRegRange(unsigned& Num, unsigned& Width);
1389 unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
1390 unsigned RegWidth, SMLoc Loc);
1391
1392 bool isRegister();
1393 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1394 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1395 void initializeGprCountSymbol(RegisterKind RegKind);
1396 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1397 unsigned RegWidth);
1398 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1399 bool IsAtomic);
1400
1401public:
1402 enum AMDGPUMatchResultTy {
1403 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1404 };
1405 enum OperandMode {
1406 OperandMode_Default,
1407 OperandMode_NSA,
1408 };
1409
1410 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1411
1412 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1413 const MCInstrInfo &MII,
1414 const MCTargetOptions &Options)
1415 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1417
1418 if (getFeatureBits().none()) {
1419 // Set default features.
1420 copySTI().ToggleFeature("southern-islands");
1421 }
1422
1423 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1424
1425 {
1426 // TODO: make those pre-defined variables read-only.
1427 // Currently there is none suitable machinery in the core llvm-mc for this.
1428 // MCSymbol::isRedefinable is intended for another purpose, and
1429 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1431 MCContext &Ctx = getContext();
1432 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1433 MCSymbol *Sym =
1434 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1435 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1436 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1437 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1438 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1439 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1440 } else {
1441 MCSymbol *Sym =
1442 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1443 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1444 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1445 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1446 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1447 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1448 }
1449 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1450 initializeGprCountSymbol(IS_VGPR);
1451 initializeGprCountSymbol(IS_SGPR);
1452 } else
1453 KernelScope.initialize(getContext());
1454 }
1455 }
1456
1457 bool hasMIMG_R128() const {
1458 return AMDGPU::hasMIMG_R128(getSTI());
1459 }
1460
1461 bool hasPackedD16() const {
1462 return AMDGPU::hasPackedD16(getSTI());
1463 }
1464
1465 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1466
1467 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1468
1469 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1470
1471 bool isSI() const {
1472 return AMDGPU::isSI(getSTI());
1473 }
1474
1475 bool isCI() const {
1476 return AMDGPU::isCI(getSTI());
1477 }
1478
1479 bool isVI() const {
1480 return AMDGPU::isVI(getSTI());
1481 }
1482
1483 bool isGFX9() const {
1484 return AMDGPU::isGFX9(getSTI());
1485 }
1486
1487 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1488 bool isGFX90A() const {
1489 return AMDGPU::isGFX90A(getSTI());
1490 }
1491
1492 bool isGFX940() const {
1493 return AMDGPU::isGFX940(getSTI());
1494 }
1495
1496 bool isGFX9Plus() const {
1497 return AMDGPU::isGFX9Plus(getSTI());
1498 }
1499
1500 bool isGFX10() const {
1501 return AMDGPU::isGFX10(getSTI());
1502 }
1503
1504 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1505
1506 bool isGFX11() const {
1507 return AMDGPU::isGFX11(getSTI());
1508 }
1509
1510 bool isGFX11Plus() const {
1511 return AMDGPU::isGFX11Plus(getSTI());
1512 }
1513
1514 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1515
1516 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1517
1518 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1519
1520 bool isGFX10_BEncoding() const {
1522 }
1523
1524 bool hasInv2PiInlineImm() const {
1525 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1526 }
1527
1528 bool hasFlatOffsets() const {
1529 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1530 }
1531
1532 bool hasArchitectedFlatScratch() const {
1533 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1534 }
1535
1536 bool hasSGPR102_SGPR103() const {
1537 return !isVI() && !isGFX9();
1538 }
1539
1540 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1541
1542 bool hasIntClamp() const {
1543 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1544 }
1545
1546 bool hasPartialNSAEncoding() const {
1547 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1548 }
1549
1550 unsigned getNSAMaxSize(bool HasSampler = false) const {
1551 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1552 }
1553
1554 unsigned getMaxNumUserSGPRs() const {
1556 }
1557
1558 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1559
1560 AMDGPUTargetStreamer &getTargetStreamer() {
1562 return static_cast<AMDGPUTargetStreamer &>(TS);
1563 }
1564
1565 const MCRegisterInfo *getMRI() const {
1566 // We need this const_cast because for some reason getContext() is not const
1567 // in MCAsmParser.
1568 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1569 }
1570
1571 const MCInstrInfo *getMII() const {
1572 return &MII;
1573 }
1574
1575 const FeatureBitset &getFeatureBits() const {
1576 return getSTI().getFeatureBits();
1577 }
1578
1579 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1580 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1581 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1582
1583 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1584 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1585 bool isForcedDPP() const { return ForcedDPP; }
1586 bool isForcedSDWA() const { return ForcedSDWA; }
1587 ArrayRef<unsigned> getMatchedVariants() const;
1588 StringRef getMatchedVariantName() const;
1589
1590 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1591 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1592 bool RestoreOnFailure);
1593 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1595 SMLoc &EndLoc) override;
1596 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1598 unsigned Kind) override;
1599 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1602 bool MatchingInlineAsm) override;
1603 bool ParseDirective(AsmToken DirectiveID) override;
1604 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1605 OperandMode Mode = OperandMode_Default);
1606 StringRef parseMnemonicSuffix(StringRef Name);
1608 SMLoc NameLoc, OperandVector &Operands) override;
1609 //bool ProcessInstruction(MCInst &Inst);
1610
1612
1613 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1614
1616 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1617 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1618 std::function<bool(int64_t &)> ConvertResult = nullptr);
1619
1620 ParseStatus parseOperandArrayWithPrefix(
1621 const char *Prefix, OperandVector &Operands,
1622 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1623 bool (*ConvertResult)(int64_t &) = nullptr);
1624
1626 parseNamedBit(StringRef Name, OperandVector &Operands,
1627 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1628 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1630 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1631 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1632 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1633 SMLoc &StringLoc);
1634
1635 bool isModifier();
1636 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1637 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1638 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1639 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1640 bool parseSP3NegModifier();
1641 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1642 bool HasLit = false);
1644 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1645 bool HasLit = false);
1646 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1647 bool AllowImm = true);
1648 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1649 bool AllowImm = true);
1650 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1651 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1652 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1653 ParseStatus tryParseIndexKey(OperandVector &Operands,
1654 AMDGPUOperand::ImmTy ImmTy);
1655 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1656 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1657
1658 ParseStatus parseDfmtNfmt(int64_t &Format);
1659 ParseStatus parseUfmt(int64_t &Format);
1660 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1661 int64_t &Format);
1662 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1663 int64_t &Format);
1664 ParseStatus parseFORMAT(OperandVector &Operands);
1665 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1666 ParseStatus parseNumericFormat(int64_t &Format);
1667 ParseStatus parseFlatOffset(OperandVector &Operands);
1668 ParseStatus parseR128A16(OperandVector &Operands);
1670 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1671 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1672
1673 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1674
1675 bool parseCnt(int64_t &IntVal);
1676 ParseStatus parseSWaitCnt(OperandVector &Operands);
1677
1678 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1679 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1680 ParseStatus parseDepCtr(OperandVector &Operands);
1681
1682 bool parseDelay(int64_t &Delay);
1683 ParseStatus parseSDelayALU(OperandVector &Operands);
1684
1685 ParseStatus parseHwreg(OperandVector &Operands);
1686
1687private:
1688 struct OperandInfoTy {
1689 SMLoc Loc;
1690 int64_t Val;
1691 bool IsSymbolic = false;
1692 bool IsDefined = false;
1693
1694 OperandInfoTy(int64_t Val) : Val(Val) {}
1695 };
1696
1697 struct StructuredOpField : OperandInfoTy {
1700 unsigned Width;
1701 bool IsDefined = false;
1702
1703 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1704 int64_t Default)
1705 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1706 virtual ~StructuredOpField() = default;
1707
1708 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1709 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1710 return false;
1711 }
1712
1713 virtual bool validate(AMDGPUAsmParser &Parser) const {
1714 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1715 return Error(Parser, "not supported on this GPU");
1716 if (!isUIntN(Width, Val))
1717 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1718 return true;
1719 }
1720 };
1721
1722 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1723 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1724
1725 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1726 bool validateSendMsg(const OperandInfoTy &Msg,
1727 const OperandInfoTy &Op,
1728 const OperandInfoTy &Stream);
1729
1730 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1731 OperandInfoTy &Width);
1732
1733 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1734 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1735 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1736
1737 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1738 const OperandVector &Operands) const;
1739 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1740 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1741 SMLoc getLitLoc(const OperandVector &Operands,
1742 bool SearchMandatoryLiterals = false) const;
1743 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1744 SMLoc getConstLoc(const OperandVector &Operands) const;
1745 SMLoc getInstLoc(const OperandVector &Operands) const;
1746
1747 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1748 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1749 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1750 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1751 bool validateSOPLiteral(const MCInst &Inst) const;
1752 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1753 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1754 const OperandVector &Operands);
1755 bool validateIntClampSupported(const MCInst &Inst);
1756 bool validateMIMGAtomicDMask(const MCInst &Inst);
1757 bool validateMIMGGatherDMask(const MCInst &Inst);
1758 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1759 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1760 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1761 bool validateMIMGD16(const MCInst &Inst);
1762 bool validateMIMGMSAA(const MCInst &Inst);
1763 bool validateOpSel(const MCInst &Inst);
1764 bool validateNeg(const MCInst &Inst, int OpName);
1765 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1766 bool validateVccOperand(unsigned Reg) const;
1767 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1768 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1769 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1770 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1771 bool validateAGPRLdSt(const MCInst &Inst) const;
1772 bool validateVGPRAlign(const MCInst &Inst) const;
1773 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1774 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1775 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1776 bool validateDivScale(const MCInst &Inst);
1777 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1778 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1779 const SMLoc &IDLoc);
1780 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1781 const unsigned CPol);
1782 bool validateExeczVcczOperands(const OperandVector &Operands);
1783 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1784 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1785 unsigned getConstantBusLimit(unsigned Opcode) const;
1786 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1787 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1788 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1789
1790 bool isSupportedMnemo(StringRef Mnemo,
1791 const FeatureBitset &FBS);
1792 bool isSupportedMnemo(StringRef Mnemo,
1793 const FeatureBitset &FBS,
1794 ArrayRef<unsigned> Variants);
1795 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1796
1797 bool isId(const StringRef Id) const;
1798 bool isId(const AsmToken &Token, const StringRef Id) const;
1799 bool isToken(const AsmToken::TokenKind Kind) const;
1800 StringRef getId() const;
1801 bool trySkipId(const StringRef Id);
1802 bool trySkipId(const StringRef Pref, const StringRef Id);
1803 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1804 bool trySkipToken(const AsmToken::TokenKind Kind);
1805 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1806 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1807 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1808
1809 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1810 AsmToken::TokenKind getTokenKind() const;
1811 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1812 bool parseExpr(OperandVector &Operands);
1813 StringRef getTokenStr() const;
1814 AsmToken peekToken(bool ShouldSkipSpace = true);
1815 AsmToken getToken() const;
1816 SMLoc getLoc() const;
1817 void lex();
1818
1819public:
1820 void onBeginOfFile() override;
1821 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1822
1823 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1824
1825 ParseStatus parseExpTgt(OperandVector &Operands);
1826 ParseStatus parseSendMsg(OperandVector &Operands);
1827 ParseStatus parseInterpSlot(OperandVector &Operands);
1828 ParseStatus parseInterpAttr(OperandVector &Operands);
1829 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1830 ParseStatus parseBoolReg(OperandVector &Operands);
1831
1832 bool parseSwizzleOperand(int64_t &Op,
1833 const unsigned MinVal,
1834 const unsigned MaxVal,
1835 const StringRef ErrMsg,
1836 SMLoc &Loc);
1837 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1838 const unsigned MinVal,
1839 const unsigned MaxVal,
1840 const StringRef ErrMsg);
1841 ParseStatus parseSwizzle(OperandVector &Operands);
1842 bool parseSwizzleOffset(int64_t &Imm);
1843 bool parseSwizzleMacro(int64_t &Imm);
1844 bool parseSwizzleQuadPerm(int64_t &Imm);
1845 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1846 bool parseSwizzleBroadcast(int64_t &Imm);
1847 bool parseSwizzleSwap(int64_t &Imm);
1848 bool parseSwizzleReverse(int64_t &Imm);
1849
1850 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1851 int64_t parseGPRIdxMacro();
1852
1853 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1854 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1855
1856 ParseStatus parseOModSI(OperandVector &Operands);
1857
1858 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1859 OptionalImmIndexMap &OptionalIdx);
1860 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1861 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1862 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1863 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1864
1865 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1866 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1867 OptionalImmIndexMap &OptionalIdx);
1868 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1869 OptionalImmIndexMap &OptionalIdx);
1870
1871 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1872 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1873
1874 bool parseDimId(unsigned &Encoding);
1876 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1878 ParseStatus parseDPPCtrl(OperandVector &Operands);
1879 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1880 int64_t parseDPPCtrlSel(StringRef Ctrl);
1881 int64_t parseDPPCtrlPerm();
1882 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1883 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1884 cvtDPP(Inst, Operands, true);
1885 }
1886 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1887 bool IsDPP8 = false);
1888 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1889 cvtVOP3DPP(Inst, Operands, true);
1890 }
1891
1892 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1893 AMDGPUOperand::ImmTy Type);
1894 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1895 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1896 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1897 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1898 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1899 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1900 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1901 uint64_t BasicInstType,
1902 bool SkipDstVcc = false,
1903 bool SkipSrcVcc = false);
1904
1905 ParseStatus parseEndpgm(OperandVector &Operands);
1906
1908};
1909
1910} // end anonymous namespace
1911
1912// May be called with integer type with equivalent bitwidth.
1913static const fltSemantics *getFltSemantics(unsigned Size) {
1914 switch (Size) {
1915 case 4:
1916 return &APFloat::IEEEsingle();
1917 case 8:
1918 return &APFloat::IEEEdouble();
1919 case 2:
1920 return &APFloat::IEEEhalf();
1921 default:
1922 llvm_unreachable("unsupported fp type");
1923 }
1924}
1925
1927 return getFltSemantics(VT.getSizeInBits() / 8);
1928}
1929
1931 switch (OperandType) {
1932 // When floating-point immediate is used as operand of type i16, the 32-bit
1933 // representation of the constant truncated to the 16 LSBs should be used.
1953 return &APFloat::IEEEsingle();
1959 return &APFloat::IEEEdouble();
1968 return &APFloat::IEEEhalf();
1976 return &APFloat::BFloat();
1977 default:
1978 llvm_unreachable("unsupported fp type");
1979 }
1980}
1981
1982//===----------------------------------------------------------------------===//
1983// Operand
1984//===----------------------------------------------------------------------===//
1985
1986static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1987 bool Lost;
1988
1989 // Convert literal to single precision
1991 APFloat::rmNearestTiesToEven,
1992 &Lost);
1993 // We allow precision lost but not overflow or underflow
1994 if (Status != APFloat::opOK &&
1995 Lost &&
1996 ((Status & APFloat::opOverflow) != 0 ||
1997 (Status & APFloat::opUnderflow) != 0)) {
1998 return false;
1999 }
2000
2001 return true;
2002}
2003
2004static bool isSafeTruncation(int64_t Val, unsigned Size) {
2005 return isUIntN(Size, Val) || isIntN(Size, Val);
2006}
2007
2008static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2009 if (VT.getScalarType() == MVT::i16)
2010 return isInlinableLiteral32(Val, HasInv2Pi);
2011
2012 if (VT.getScalarType() == MVT::f16)
2013 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2014
2015 assert(VT.getScalarType() == MVT::bf16);
2016
2017 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2018}
2019
2020bool AMDGPUOperand::isInlinableImm(MVT type) const {
2021
2022 // This is a hack to enable named inline values like
2023 // shared_base with both 32-bit and 64-bit operands.
2024 // Note that these values are defined as
2025 // 32-bit operands only.
2026 if (isInlineValue()) {
2027 return true;
2028 }
2029
2030 if (!isImmTy(ImmTyNone)) {
2031 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2032 return false;
2033 }
2034 // TODO: We should avoid using host float here. It would be better to
2035 // check the float bit values which is what a few other places do.
2036 // We've had bot failures before due to weird NaN support on mips hosts.
2037
2038 APInt Literal(64, Imm.Val);
2039
2040 if (Imm.IsFPImm) { // We got fp literal token
2041 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2043 AsmParser->hasInv2PiInlineImm());
2044 }
2045
2046 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2047 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2048 return false;
2049
2050 if (type.getScalarSizeInBits() == 16) {
2051 bool Lost = false;
2052 switch (type.getScalarType().SimpleTy) {
2053 default:
2054 llvm_unreachable("unknown 16-bit type");
2055 case MVT::bf16:
2056 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2057 &Lost);
2058 break;
2059 case MVT::f16:
2060 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2061 &Lost);
2062 break;
2063 case MVT::i16:
2064 FPLiteral.convert(APFloatBase::IEEEsingle(),
2065 APFloat::rmNearestTiesToEven, &Lost);
2066 break;
2067 }
2068 // We need to use 32-bit representation here because when a floating-point
2069 // inline constant is used as an i16 operand, its 32-bit representation
2070 // representation will be used. We will need the 32-bit value to check if
2071 // it is FP inline constant.
2072 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2073 return isInlineableLiteralOp16(ImmVal, type,
2074 AsmParser->hasInv2PiInlineImm());
2075 }
2076
2077 // Check if single precision literal is inlinable
2079 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2080 AsmParser->hasInv2PiInlineImm());
2081 }
2082
2083 // We got int literal token.
2084 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2086 AsmParser->hasInv2PiInlineImm());
2087 }
2088
2089 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2090 return false;
2091 }
2092
2093 if (type.getScalarSizeInBits() == 16) {
2095 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2096 type, AsmParser->hasInv2PiInlineImm());
2097 }
2098
2100 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2101 AsmParser->hasInv2PiInlineImm());
2102}
2103
2104bool AMDGPUOperand::isLiteralImm(MVT type) const {
2105 // Check that this immediate can be added as literal
2106 if (!isImmTy(ImmTyNone)) {
2107 return false;
2108 }
2109
2110 if (!Imm.IsFPImm) {
2111 // We got int literal token.
2112
2113 if (type == MVT::f64 && hasFPModifiers()) {
2114 // Cannot apply fp modifiers to int literals preserving the same semantics
2115 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2116 // disable these cases.
2117 return false;
2118 }
2119
2120 unsigned Size = type.getSizeInBits();
2121 if (Size == 64)
2122 Size = 32;
2123
2124 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2125 // types.
2126 return isSafeTruncation(Imm.Val, Size);
2127 }
2128
2129 // We got fp literal token
2130 if (type == MVT::f64) { // Expected 64-bit fp operand
2131 // We would set low 64-bits of literal to zeroes but we accept this literals
2132 return true;
2133 }
2134
2135 if (type == MVT::i64) { // Expected 64-bit int operand
2136 // We don't allow fp literals in 64-bit integer instructions. It is
2137 // unclear how we should encode them.
2138 return false;
2139 }
2140
2141 // We allow fp literals with f16x2 operands assuming that the specified
2142 // literal goes into the lower half and the upper half is zero. We also
2143 // require that the literal may be losslessly converted to f16.
2144 //
2145 // For i16x2 operands, we assume that the specified literal is encoded as a
2146 // single-precision float. This is pretty odd, but it matches SP3 and what
2147 // happens in hardware.
2148 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2149 : (type == MVT::v2i16) ? MVT::f32
2150 : (type == MVT::v2f32) ? MVT::f32
2151 : type;
2152
2153 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2154 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2155}
2156
2157bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2158 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2159}
2160
2161bool AMDGPUOperand::isVRegWithInputMods() const {
2162 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2163 // GFX90A allows DPP on 64-bit operands.
2164 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2165 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2166}
2167
2168template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2169 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2170 : AMDGPU::VGPR_16_Lo128RegClassID);
2171}
2172
2173bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2174 if (AsmParser->isVI())
2175 return isVReg32();
2176 else if (AsmParser->isGFX9Plus())
2177 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2178 else
2179 return false;
2180}
2181
2182bool AMDGPUOperand::isSDWAFP16Operand() const {
2183 return isSDWAOperand(MVT::f16);
2184}
2185
2186bool AMDGPUOperand::isSDWAFP32Operand() const {
2187 return isSDWAOperand(MVT::f32);
2188}
2189
2190bool AMDGPUOperand::isSDWAInt16Operand() const {
2191 return isSDWAOperand(MVT::i16);
2192}
2193
2194bool AMDGPUOperand::isSDWAInt32Operand() const {
2195 return isSDWAOperand(MVT::i32);
2196}
2197
2198bool AMDGPUOperand::isBoolReg() const {
2199 auto FB = AsmParser->getFeatureBits();
2200 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2201 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2202}
2203
2204uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2205{
2206 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2207 assert(Size == 2 || Size == 4 || Size == 8);
2208
2209 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2210
2211 if (Imm.Mods.Abs) {
2212 Val &= ~FpSignMask;
2213 }
2214 if (Imm.Mods.Neg) {
2215 Val ^= FpSignMask;
2216 }
2217
2218 return Val;
2219}
2220
2221void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2222 if (isExpr()) {
2224 return;
2225 }
2226
2227 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2228 Inst.getNumOperands())) {
2229 addLiteralImmOperand(Inst, Imm.Val,
2230 ApplyModifiers &
2231 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2232 } else {
2233 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2235 setImmKindNone();
2236 }
2237}
2238
2239void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2240 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2241 auto OpNum = Inst.getNumOperands();
2242 // Check that this operand accepts literals
2243 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2244
2245 if (ApplyModifiers) {
2246 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2247 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2248 Val = applyInputFPModifiers(Val, Size);
2249 }
2250
2251 APInt Literal(64, Val);
2252 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2253
2254 if (Imm.IsFPImm) { // We got fp literal token
2255 switch (OpTy) {
2261 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2262 AsmParser->hasInv2PiInlineImm())) {
2263 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2264 setImmKindConst();
2265 return;
2266 }
2267
2268 // Non-inlineable
2269 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2270 // For fp operands we check if low 32 bits are zeros
2271 if (Literal.getLoBits(32) != 0) {
2272 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2273 "Can't encode literal as exact 64-bit floating-point operand. "
2274 "Low 32-bits will be set to zero");
2275 Val &= 0xffffffff00000000u;
2276 }
2277
2279 setImmKindLiteral();
2280 return;
2281 }
2282
2283 // We don't allow fp literals in 64-bit integer instructions. It is
2284 // unclear how we should encode them. This case should be checked earlier
2285 // in predicate methods (isLiteralImm())
2286 llvm_unreachable("fp literal in 64-bit integer instruction.");
2287
2295 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2296 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2297 // loss of precision. The constant represents ideomatic fp32 value of
2298 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2299 // bits. Prevent rounding below.
2300 Inst.addOperand(MCOperand::createImm(0x3e22));
2301 setImmKindLiteral();
2302 return;
2303 }
2304 [[fallthrough]];
2305
2333 bool lost;
2334 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2335 // Convert literal to single precision
2336 FPLiteral.convert(*getOpFltSemantics(OpTy),
2337 APFloat::rmNearestTiesToEven, &lost);
2338 // We allow precision lost but not overflow or underflow. This should be
2339 // checked earlier in isLiteralImm()
2340
2341 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2342 Inst.addOperand(MCOperand::createImm(ImmVal));
2343 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2344 setImmKindMandatoryLiteral();
2345 } else {
2346 setImmKindLiteral();
2347 }
2348 return;
2349 }
2350 default:
2351 llvm_unreachable("invalid operand size");
2352 }
2353
2354 return;
2355 }
2356
2357 // We got int literal token.
2358 // Only sign extend inline immediates.
2359 switch (OpTy) {
2375 if (isSafeTruncation(Val, 32) &&
2376 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2377 AsmParser->hasInv2PiInlineImm())) {
2379 setImmKindConst();
2380 return;
2381 }
2382
2383 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2384 setImmKindLiteral();
2385 return;
2386
2392 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2394 setImmKindConst();
2395 return;
2396 }
2397
2398 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2399 : Lo_32(Val);
2400
2402 setImmKindLiteral();
2403 return;
2404
2408 if (isSafeTruncation(Val, 16) &&
2409 AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2410 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2411 setImmKindConst();
2412 return;
2413 }
2414
2415 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2416 setImmKindLiteral();
2417 return;
2418
2423 if (isSafeTruncation(Val, 16) &&
2424 AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2425 AsmParser->hasInv2PiInlineImm())) {
2427 setImmKindConst();
2428 return;
2429 }
2430
2431 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2432 setImmKindLiteral();
2433 return;
2434
2439 if (isSafeTruncation(Val, 16) &&
2440 AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2441 AsmParser->hasInv2PiInlineImm())) {
2443 setImmKindConst();
2444 return;
2445 }
2446
2447 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2448 setImmKindLiteral();
2449 return;
2450
2453 assert(isSafeTruncation(Val, 16));
2454 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2456 return;
2457 }
2460 assert(isSafeTruncation(Val, 16));
2461 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2462 AsmParser->hasInv2PiInlineImm()));
2463
2465 return;
2466 }
2467
2470 assert(isSafeTruncation(Val, 16));
2471 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2472 AsmParser->hasInv2PiInlineImm()));
2473
2475 return;
2476 }
2477
2479 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2480 setImmKindMandatoryLiteral();
2481 return;
2483 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2484 setImmKindMandatoryLiteral();
2485 return;
2486 default:
2487 llvm_unreachable("invalid operand size");
2488 }
2489}
2490
2491void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2492 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2493}
2494
2495bool AMDGPUOperand::isInlineValue() const {
2496 return isRegKind() && ::isInlineValue(getReg());
2497}
2498
2499//===----------------------------------------------------------------------===//
2500// AsmParser
2501//===----------------------------------------------------------------------===//
2502
2503static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2504 if (Is == IS_VGPR) {
2505 switch (RegWidth) {
2506 default: return -1;
2507 case 32:
2508 return AMDGPU::VGPR_32RegClassID;
2509 case 64:
2510 return AMDGPU::VReg_64RegClassID;
2511 case 96:
2512 return AMDGPU::VReg_96RegClassID;
2513 case 128:
2514 return AMDGPU::VReg_128RegClassID;
2515 case 160:
2516 return AMDGPU::VReg_160RegClassID;
2517 case 192:
2518 return AMDGPU::VReg_192RegClassID;
2519 case 224:
2520 return AMDGPU::VReg_224RegClassID;
2521 case 256:
2522 return AMDGPU::VReg_256RegClassID;
2523 case 288:
2524 return AMDGPU::VReg_288RegClassID;
2525 case 320:
2526 return AMDGPU::VReg_320RegClassID;
2527 case 352:
2528 return AMDGPU::VReg_352RegClassID;
2529 case 384:
2530 return AMDGPU::VReg_384RegClassID;
2531 case 512:
2532 return AMDGPU::VReg_512RegClassID;
2533 case 1024:
2534 return AMDGPU::VReg_1024RegClassID;
2535 }
2536 } else if (Is == IS_TTMP) {
2537 switch (RegWidth) {
2538 default: return -1;
2539 case 32:
2540 return AMDGPU::TTMP_32RegClassID;
2541 case 64:
2542 return AMDGPU::TTMP_64RegClassID;
2543 case 128:
2544 return AMDGPU::TTMP_128RegClassID;
2545 case 256:
2546 return AMDGPU::TTMP_256RegClassID;
2547 case 512:
2548 return AMDGPU::TTMP_512RegClassID;
2549 }
2550 } else if (Is == IS_SGPR) {
2551 switch (RegWidth) {
2552 default: return -1;
2553 case 32:
2554 return AMDGPU::SGPR_32RegClassID;
2555 case 64:
2556 return AMDGPU::SGPR_64RegClassID;
2557 case 96:
2558 return AMDGPU::SGPR_96RegClassID;
2559 case 128:
2560 return AMDGPU::SGPR_128RegClassID;
2561 case 160:
2562 return AMDGPU::SGPR_160RegClassID;
2563 case 192:
2564 return AMDGPU::SGPR_192RegClassID;
2565 case 224:
2566 return AMDGPU::SGPR_224RegClassID;
2567 case 256:
2568 return AMDGPU::SGPR_256RegClassID;
2569 case 288:
2570 return AMDGPU::SGPR_288RegClassID;
2571 case 320:
2572 return AMDGPU::SGPR_320RegClassID;
2573 case 352:
2574 return AMDGPU::SGPR_352RegClassID;
2575 case 384:
2576 return AMDGPU::SGPR_384RegClassID;
2577 case 512:
2578 return AMDGPU::SGPR_512RegClassID;
2579 }
2580 } else if (Is == IS_AGPR) {
2581 switch (RegWidth) {
2582 default: return -1;
2583 case 32:
2584 return AMDGPU::AGPR_32RegClassID;
2585 case 64:
2586 return AMDGPU::AReg_64RegClassID;
2587 case 96:
2588 return AMDGPU::AReg_96RegClassID;
2589 case 128:
2590 return AMDGPU::AReg_128RegClassID;
2591 case 160:
2592 return AMDGPU::AReg_160RegClassID;
2593 case 192:
2594 return AMDGPU::AReg_192RegClassID;
2595 case 224:
2596 return AMDGPU::AReg_224RegClassID;
2597 case 256:
2598 return AMDGPU::AReg_256RegClassID;
2599 case 288:
2600 return AMDGPU::AReg_288RegClassID;
2601 case 320:
2602 return AMDGPU::AReg_320RegClassID;
2603 case 352:
2604 return AMDGPU::AReg_352RegClassID;
2605 case 384:
2606 return AMDGPU::AReg_384RegClassID;
2607 case 512:
2608 return AMDGPU::AReg_512RegClassID;
2609 case 1024:
2610 return AMDGPU::AReg_1024RegClassID;
2611 }
2612 }
2613 return -1;
2614}
2615
2618 .Case("exec", AMDGPU::EXEC)
2619 .Case("vcc", AMDGPU::VCC)
2620 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2621 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2622 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2623 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2624 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2625 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2626 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2627 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2628 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2629 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2630 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2631 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2632 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2633 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2634 .Case("m0", AMDGPU::M0)
2635 .Case("vccz", AMDGPU::SRC_VCCZ)
2636 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2637 .Case("execz", AMDGPU::SRC_EXECZ)
2638 .Case("src_execz", AMDGPU::SRC_EXECZ)
2639 .Case("scc", AMDGPU::SRC_SCC)
2640 .Case("src_scc", AMDGPU::SRC_SCC)
2641 .Case("tba", AMDGPU::TBA)
2642 .Case("tma", AMDGPU::TMA)
2643 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2644 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2645 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2646 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2647 .Case("vcc_lo", AMDGPU::VCC_LO)
2648 .Case("vcc_hi", AMDGPU::VCC_HI)
2649 .Case("exec_lo", AMDGPU::EXEC_LO)
2650 .Case("exec_hi", AMDGPU::EXEC_HI)
2651 .Case("tma_lo", AMDGPU::TMA_LO)
2652 .Case("tma_hi", AMDGPU::TMA_HI)
2653 .Case("tba_lo", AMDGPU::TBA_LO)
2654 .Case("tba_hi", AMDGPU::TBA_HI)
2655 .Case("pc", AMDGPU::PC_REG)
2656 .Case("null", AMDGPU::SGPR_NULL)
2657 .Default(AMDGPU::NoRegister);
2658}
2659
2660bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2661 SMLoc &EndLoc, bool RestoreOnFailure) {
2662 auto R = parseRegister();
2663 if (!R) return true;
2664 assert(R->isReg());
2665 RegNo = R->getReg();
2666 StartLoc = R->getStartLoc();
2667 EndLoc = R->getEndLoc();
2668 return false;
2669}
2670
2671bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2672 SMLoc &EndLoc) {
2673 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2674}
2675
2676ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2677 SMLoc &EndLoc) {
2678 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2679 bool PendingErrors = getParser().hasPendingError();
2680 getParser().clearPendingErrors();
2681 if (PendingErrors)
2682 return ParseStatus::Failure;
2683 if (Result)
2684 return ParseStatus::NoMatch;
2685 return ParseStatus::Success;
2686}
2687
2688bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2689 RegisterKind RegKind, unsigned Reg1,
2690 SMLoc Loc) {
2691 switch (RegKind) {
2692 case IS_SPECIAL:
2693 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2694 Reg = AMDGPU::EXEC;
2695 RegWidth = 64;
2696 return true;
2697 }
2698 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2699 Reg = AMDGPU::FLAT_SCR;
2700 RegWidth = 64;
2701 return true;
2702 }
2703 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2704 Reg = AMDGPU::XNACK_MASK;
2705 RegWidth = 64;
2706 return true;
2707 }
2708 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2709 Reg = AMDGPU::VCC;
2710 RegWidth = 64;
2711 return true;
2712 }
2713 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2714 Reg = AMDGPU::TBA;
2715 RegWidth = 64;
2716 return true;
2717 }
2718 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2719 Reg = AMDGPU::TMA;
2720 RegWidth = 64;
2721 return true;
2722 }
2723 Error(Loc, "register does not fit in the list");
2724 return false;
2725 case IS_VGPR:
2726 case IS_SGPR:
2727 case IS_AGPR:
2728 case IS_TTMP:
2729 if (Reg1 != Reg + RegWidth / 32) {
2730 Error(Loc, "registers in a list must have consecutive indices");
2731 return false;
2732 }
2733 RegWidth += 32;
2734 return true;
2735 default:
2736 llvm_unreachable("unexpected register kind");
2737 }
2738}
2739
2740struct RegInfo {
2742 RegisterKind Kind;
2743};
2744
2745static constexpr RegInfo RegularRegisters[] = {
2746 {{"v"}, IS_VGPR},
2747 {{"s"}, IS_SGPR},
2748 {{"ttmp"}, IS_TTMP},
2749 {{"acc"}, IS_AGPR},
2750 {{"a"}, IS_AGPR},
2751};
2752
2753static bool isRegularReg(RegisterKind Kind) {
2754 return Kind == IS_VGPR ||
2755 Kind == IS_SGPR ||
2756 Kind == IS_TTMP ||
2757 Kind == IS_AGPR;
2758}
2759
2761 for (const RegInfo &Reg : RegularRegisters)
2762 if (Str.starts_with(Reg.Name))
2763 return &Reg;
2764 return nullptr;
2765}
2766
2767static bool getRegNum(StringRef Str, unsigned& Num) {
2768 return !Str.getAsInteger(10, Num);
2769}
2770
2771bool
2772AMDGPUAsmParser::isRegister(const AsmToken &Token,
2773 const AsmToken &NextToken) const {
2774
2775 // A list of consecutive registers: [s0,s1,s2,s3]
2776 if (Token.is(AsmToken::LBrac))
2777 return true;
2778
2779 if (!Token.is(AsmToken::Identifier))
2780 return false;
2781
2782 // A single register like s0 or a range of registers like s[0:1]
2783
2784 StringRef Str = Token.getString();
2785 const RegInfo *Reg = getRegularRegInfo(Str);
2786 if (Reg) {
2787 StringRef RegName = Reg->Name;
2788 StringRef RegSuffix = Str.substr(RegName.size());
2789 if (!RegSuffix.empty()) {
2790 RegSuffix.consume_back(".l");
2791 RegSuffix.consume_back(".h");
2792 unsigned Num;
2793 // A single register with an index: rXX
2794 if (getRegNum(RegSuffix, Num))
2795 return true;
2796 } else {
2797 // A range of registers: r[XX:YY].
2798 if (NextToken.is(AsmToken::LBrac))
2799 return true;
2800 }
2801 }
2802
2803 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2804}
2805
2806bool
2807AMDGPUAsmParser::isRegister()
2808{
2809 return isRegister(getToken(), peekToken());
2810}
2811
2812unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2813 unsigned SubReg, unsigned RegWidth,
2814 SMLoc Loc) {
2815 assert(isRegularReg(RegKind));
2816
2817 unsigned AlignSize = 1;
2818 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2819 // SGPR and TTMP registers must be aligned.
2820 // Max required alignment is 4 dwords.
2821 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2822 }
2823
2824 if (RegNum % AlignSize != 0) {
2825 Error(Loc, "invalid register alignment");
2826 return AMDGPU::NoRegister;
2827 }
2828
2829 unsigned RegIdx = RegNum / AlignSize;
2830 int RCID = getRegClass(RegKind, RegWidth);
2831 if (RCID == -1) {
2832 Error(Loc, "invalid or unsupported register size");
2833 return AMDGPU::NoRegister;
2834 }
2835
2836 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2837 const MCRegisterClass RC = TRI->getRegClass(RCID);
2838 if (RegIdx >= RC.getNumRegs()) {
2839 Error(Loc, "register index is out of range");
2840 return AMDGPU::NoRegister;
2841 }
2842
2843 unsigned Reg = RC.getRegister(RegIdx);
2844
2845 if (SubReg) {
2846 Reg = TRI->getSubReg(Reg, SubReg);
2847
2848 // Currently all regular registers have their .l and .h subregisters, so
2849 // we should never need to generate an error here.
2850 assert(Reg && "Invalid subregister!");
2851 }
2852
2853 return Reg;
2854}
2855
2856bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2857 int64_t RegLo, RegHi;
2858 if (!skipToken(AsmToken::LBrac, "missing register index"))
2859 return false;
2860
2861 SMLoc FirstIdxLoc = getLoc();
2862 SMLoc SecondIdxLoc;
2863
2864 if (!parseExpr(RegLo))
2865 return false;
2866
2867 if (trySkipToken(AsmToken::Colon)) {
2868 SecondIdxLoc = getLoc();
2869 if (!parseExpr(RegHi))
2870 return false;
2871 } else {
2872 RegHi = RegLo;
2873 }
2874
2875 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2876 return false;
2877
2878 if (!isUInt<32>(RegLo)) {
2879 Error(FirstIdxLoc, "invalid register index");
2880 return false;
2881 }
2882
2883 if (!isUInt<32>(RegHi)) {
2884 Error(SecondIdxLoc, "invalid register index");
2885 return false;
2886 }
2887
2888 if (RegLo > RegHi) {
2889 Error(FirstIdxLoc, "first register index should not exceed second index");
2890 return false;
2891 }
2892
2893 Num = static_cast<unsigned>(RegLo);
2894 RegWidth = 32 * ((RegHi - RegLo) + 1);
2895 return true;
2896}
2897
2898unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2899 unsigned &RegNum, unsigned &RegWidth,
2900 SmallVectorImpl<AsmToken> &Tokens) {
2901 assert(isToken(AsmToken::Identifier));
2902 unsigned Reg = getSpecialRegForName(getTokenStr());
2903 if (Reg) {
2904 RegNum = 0;
2905 RegWidth = 32;
2906 RegKind = IS_SPECIAL;
2907 Tokens.push_back(getToken());
2908 lex(); // skip register name
2909 }
2910 return Reg;
2911}
2912
2913unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2914 unsigned &RegNum, unsigned &RegWidth,
2915 SmallVectorImpl<AsmToken> &Tokens) {
2916 assert(isToken(AsmToken::Identifier));
2917 StringRef RegName = getTokenStr();
2918 auto Loc = getLoc();
2919
2920 const RegInfo *RI = getRegularRegInfo(RegName);
2921 if (!RI) {
2922 Error(Loc, "invalid register name");
2923 return AMDGPU::NoRegister;
2924 }
2925
2926 Tokens.push_back(getToken());
2927 lex(); // skip register name
2928
2929 RegKind = RI->Kind;
2930 StringRef RegSuffix = RegName.substr(RI->Name.size());
2931 unsigned SubReg = NoSubRegister;
2932 if (!RegSuffix.empty()) {
2933 // We don't know the opcode till we are done parsing, so we don't know if
2934 // registers should be 16 or 32 bit. It is therefore mandatory to put .l or
2935 // .h to correctly specify 16 bit registers. We also can't determine class
2936 // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16.
2937 if (RegSuffix.consume_back(".l"))
2938 SubReg = AMDGPU::lo16;
2939 else if (RegSuffix.consume_back(".h"))
2940 SubReg = AMDGPU::hi16;
2941
2942 // Single 32-bit register: vXX.
2943 if (!getRegNum(RegSuffix, RegNum)) {
2944 Error(Loc, "invalid register index");
2945 return AMDGPU::NoRegister;
2946 }
2947 RegWidth = 32;
2948 } else {
2949 // Range of registers: v[XX:YY]. ":YY" is optional.
2950 if (!ParseRegRange(RegNum, RegWidth))
2951 return AMDGPU::NoRegister;
2952 }
2953
2954 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2955}
2956
2957unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2958 unsigned &RegWidth,
2959 SmallVectorImpl<AsmToken> &Tokens) {
2960 unsigned Reg = AMDGPU::NoRegister;
2961 auto ListLoc = getLoc();
2962
2963 if (!skipToken(AsmToken::LBrac,
2964 "expected a register or a list of registers")) {
2965 return AMDGPU::NoRegister;
2966 }
2967
2968 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2969
2970 auto Loc = getLoc();
2971 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2972 return AMDGPU::NoRegister;
2973 if (RegWidth != 32) {
2974 Error(Loc, "expected a single 32-bit register");
2975 return AMDGPU::NoRegister;
2976 }
2977
2978 for (; trySkipToken(AsmToken::Comma); ) {
2979 RegisterKind NextRegKind;
2980 unsigned NextReg, NextRegNum, NextRegWidth;
2981 Loc = getLoc();
2982
2983 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2984 NextRegNum, NextRegWidth,
2985 Tokens)) {
2986 return AMDGPU::NoRegister;
2987 }
2988 if (NextRegWidth != 32) {
2989 Error(Loc, "expected a single 32-bit register");
2990 return AMDGPU::NoRegister;
2991 }
2992 if (NextRegKind != RegKind) {
2993 Error(Loc, "registers in a list must be of the same kind");
2994 return AMDGPU::NoRegister;
2995 }
2996 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2997 return AMDGPU::NoRegister;
2998 }
2999
3000 if (!skipToken(AsmToken::RBrac,
3001 "expected a comma or a closing square bracket")) {
3002 return AMDGPU::NoRegister;
3003 }
3004
3005 if (isRegularReg(RegKind))
3006 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3007
3008 return Reg;
3009}
3010
3011bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3012 unsigned &RegNum, unsigned &RegWidth,
3013 SmallVectorImpl<AsmToken> &Tokens) {
3014 auto Loc = getLoc();
3015 Reg = AMDGPU::NoRegister;
3016
3017 if (isToken(AsmToken::Identifier)) {
3018 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3019 if (Reg == AMDGPU::NoRegister)
3020 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3021 } else {
3022 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3023 }
3024
3025 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3026 if (Reg == AMDGPU::NoRegister) {
3027 assert(Parser.hasPendingError());
3028 return false;
3029 }
3030
3031 if (!subtargetHasRegister(*TRI, Reg)) {
3032 if (Reg == AMDGPU::SGPR_NULL) {
3033 Error(Loc, "'null' operand is not supported on this GPU");
3034 } else {
3035 Error(Loc, "register not available on this GPU");
3036 }
3037 return false;
3038 }
3039
3040 return true;
3041}
3042
3043bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3044 unsigned &RegNum, unsigned &RegWidth,
3045 bool RestoreOnFailure /*=false*/) {
3046 Reg = AMDGPU::NoRegister;
3047
3049 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3050 if (RestoreOnFailure) {
3051 while (!Tokens.empty()) {
3052 getLexer().UnLex(Tokens.pop_back_val());
3053 }
3054 }
3055 return true;
3056 }
3057 return false;
3058}
3059
3060std::optional<StringRef>
3061AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3062 switch (RegKind) {
3063 case IS_VGPR:
3064 return StringRef(".amdgcn.next_free_vgpr");
3065 case IS_SGPR:
3066 return StringRef(".amdgcn.next_free_sgpr");
3067 default:
3068 return std::nullopt;
3069 }
3070}
3071
3072void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3073 auto SymbolName = getGprCountSymbolName(RegKind);
3074 assert(SymbolName && "initializing invalid register kind");
3075 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3076 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
3077}
3078
3079bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3080 unsigned DwordRegIndex,
3081 unsigned RegWidth) {
3082 // Symbols are only defined for GCN targets
3083 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3084 return true;
3085
3086 auto SymbolName = getGprCountSymbolName(RegKind);
3087 if (!SymbolName)
3088 return true;
3089 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3090
3091 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3092 int64_t OldCount;
3093
3094 if (!Sym->isVariable())
3095 return !Error(getLoc(),
3096 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3097 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
3098 return !Error(
3099 getLoc(),
3100 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3101
3102 if (OldCount <= NewMax)
3103 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
3104
3105 return true;
3106}
3107
3108std::unique_ptr<AMDGPUOperand>
3109AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3110 const auto &Tok = getToken();
3111 SMLoc StartLoc = Tok.getLoc();
3112 SMLoc EndLoc = Tok.getEndLoc();
3113 RegisterKind RegKind;
3114 unsigned Reg, RegNum, RegWidth;
3115
3116 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3117 return nullptr;
3118 }
3119 if (isHsaAbi(getSTI())) {
3120 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3121 return nullptr;
3122 } else
3123 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3124 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3125}
3126
3127ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3128 bool HasSP3AbsModifier, bool HasLit) {
3129 // TODO: add syntactic sugar for 1/(2*PI)
3130
3131 if (isRegister())
3132 return ParseStatus::NoMatch;
3133 assert(!isModifier());
3134
3135 if (!HasLit) {
3136 HasLit = trySkipId("lit");
3137 if (HasLit) {
3138 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3139 return ParseStatus::Failure;
3140 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
3141 if (S.isSuccess() &&
3142 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3143 return ParseStatus::Failure;
3144 return S;
3145 }
3146 }
3147
3148 const auto& Tok = getToken();
3149 const auto& NextTok = peekToken();
3150 bool IsReal = Tok.is(AsmToken::Real);
3151 SMLoc S = getLoc();
3152 bool Negate = false;
3153
3154 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3155 lex();
3156 IsReal = true;
3157 Negate = true;
3158 }
3159
3160 AMDGPUOperand::Modifiers Mods;
3161 Mods.Lit = HasLit;
3162
3163 if (IsReal) {
3164 // Floating-point expressions are not supported.
3165 // Can only allow floating-point literals with an
3166 // optional sign.
3167
3168 StringRef Num = getTokenStr();
3169 lex();
3170
3171 APFloat RealVal(APFloat::IEEEdouble());
3172 auto roundMode = APFloat::rmNearestTiesToEven;
3173 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3174 return ParseStatus::Failure;
3175 if (Negate)
3176 RealVal.changeSign();
3177
3178 Operands.push_back(
3179 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3180 AMDGPUOperand::ImmTyNone, true));
3181 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3182 Op.setModifiers(Mods);
3183
3184 return ParseStatus::Success;
3185
3186 } else {
3187 int64_t IntVal;
3188 const MCExpr *Expr;
3189 SMLoc S = getLoc();
3190
3191 if (HasSP3AbsModifier) {
3192 // This is a workaround for handling expressions
3193 // as arguments of SP3 'abs' modifier, for example:
3194 // |1.0|
3195 // |-1|
3196 // |1+x|
3197 // This syntax is not compatible with syntax of standard
3198 // MC expressions (due to the trailing '|').
3199 SMLoc EndLoc;
3200 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3201 return ParseStatus::Failure;
3202 } else {
3203 if (Parser.parseExpression(Expr))
3204 return ParseStatus::Failure;
3205 }
3206
3207 if (Expr->evaluateAsAbsolute(IntVal)) {
3208 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3209 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3210 Op.setModifiers(Mods);
3211 } else {
3212 if (HasLit)
3213 return ParseStatus::NoMatch;
3214 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3215 }
3216
3217 return ParseStatus::Success;
3218 }
3219
3220 return ParseStatus::NoMatch;
3221}
3222
3223ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3224 if (!isRegister())
3225 return ParseStatus::NoMatch;
3226
3227 if (auto R = parseRegister()) {
3228 assert(R->isReg());
3229 Operands.push_back(std::move(R));
3230 return ParseStatus::Success;
3231 }
3232 return ParseStatus::Failure;
3233}
3234
3235ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3236 bool HasSP3AbsMod, bool HasLit) {
3237 ParseStatus Res = parseReg(Operands);
3238 if (!Res.isNoMatch())
3239 return Res;
3240 if (isModifier())
3241 return ParseStatus::NoMatch;
3242 return parseImm(Operands, HasSP3AbsMod, HasLit);
3243}
3244
3245bool
3246AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3247 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3248 const auto &str = Token.getString();
3249 return str == "abs" || str == "neg" || str == "sext";
3250 }
3251 return false;
3252}
3253
3254bool
3255AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3256 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3257}
3258
3259bool
3260AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3261 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3262}
3263
3264bool
3265AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3266 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3267}
3268
3269// Check if this is an operand modifier or an opcode modifier
3270// which may look like an expression but it is not. We should
3271// avoid parsing these modifiers as expressions. Currently
3272// recognized sequences are:
3273// |...|
3274// abs(...)
3275// neg(...)
3276// sext(...)
3277// -reg
3278// -|...|
3279// -abs(...)
3280// name:...
3281//
3282bool
3283AMDGPUAsmParser::isModifier() {
3284
3285 AsmToken Tok = getToken();
3286 AsmToken NextToken[2];
3287 peekTokens(NextToken);
3288
3289 return isOperandModifier(Tok, NextToken[0]) ||
3290 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3291 isOpcodeModifierWithVal(Tok, NextToken[0]);
3292}
3293
3294// Check if the current token is an SP3 'neg' modifier.
3295// Currently this modifier is allowed in the following context:
3296//
3297// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3298// 2. Before an 'abs' modifier: -abs(...)
3299// 3. Before an SP3 'abs' modifier: -|...|
3300//
3301// In all other cases "-" is handled as a part
3302// of an expression that follows the sign.
3303//
3304// Note: When "-" is followed by an integer literal,
3305// this is interpreted as integer negation rather
3306// than a floating-point NEG modifier applied to N.
3307// Beside being contr-intuitive, such use of floating-point
3308// NEG modifier would have resulted in different meaning
3309// of integer literals used with VOP1/2/C and VOP3,
3310// for example:
3311// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3312// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3313// Negative fp literals with preceding "-" are
3314// handled likewise for uniformity
3315//
3316bool
3317AMDGPUAsmParser::parseSP3NegModifier() {
3318
3319 AsmToken NextToken[2];
3320 peekTokens(NextToken);
3321
3322 if (isToken(AsmToken::Minus) &&
3323 (isRegister(NextToken[0], NextToken[1]) ||
3324 NextToken[0].is(AsmToken::Pipe) ||
3325 isId(NextToken[0], "abs"))) {
3326 lex();
3327 return true;
3328 }
3329
3330 return false;
3331}
3332
3334AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3335 bool AllowImm) {
3336 bool Neg, SP3Neg;
3337 bool Abs, SP3Abs;
3338 bool Lit;
3339 SMLoc Loc;
3340
3341 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3342 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3343 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3344
3345 SP3Neg = parseSP3NegModifier();
3346
3347 Loc = getLoc();
3348 Neg = trySkipId("neg");
3349 if (Neg && SP3Neg)
3350 return Error(Loc, "expected register or immediate");
3351 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3352 return ParseStatus::Failure;
3353
3354 Abs = trySkipId("abs");
3355 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3356 return ParseStatus::Failure;
3357
3358 Lit = trySkipId("lit");
3359 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3360 return ParseStatus::Failure;
3361
3362 Loc = getLoc();
3363 SP3Abs = trySkipToken(AsmToken::Pipe);
3364 if (Abs && SP3Abs)
3365 return Error(Loc, "expected register or immediate");
3366
3367 ParseStatus Res;
3368 if (AllowImm) {
3369 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3370 } else {
3371 Res = parseReg(Operands);
3372 }
3373 if (!Res.isSuccess())
3374 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3375
3376 if (Lit && !Operands.back()->isImm())
3377 Error(Loc, "expected immediate with lit modifier");
3378
3379 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3380 return ParseStatus::Failure;
3381 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3382 return ParseStatus::Failure;
3383 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3384 return ParseStatus::Failure;
3385 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3386 return ParseStatus::Failure;
3387
3388 AMDGPUOperand::Modifiers Mods;
3389 Mods.Abs = Abs || SP3Abs;
3390 Mods.Neg = Neg || SP3Neg;
3391 Mods.Lit = Lit;
3392
3393 if (Mods.hasFPModifiers() || Lit) {
3394 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3395 if (Op.isExpr())
3396 return Error(Op.getStartLoc(), "expected an absolute expression");
3397 Op.setModifiers(Mods);
3398 }
3399 return ParseStatus::Success;
3400}
3401
3403AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3404 bool AllowImm) {
3405 bool Sext = trySkipId("sext");
3406 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3407 return ParseStatus::Failure;
3408
3409 ParseStatus Res;
3410 if (AllowImm) {
3411 Res = parseRegOrImm(Operands);
3412 } else {
3413 Res = parseReg(Operands);
3414 }
3415 if (!Res.isSuccess())
3416 return Sext ? ParseStatus::Failure : Res;
3417
3418 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3419 return ParseStatus::Failure;
3420
3421 AMDGPUOperand::Modifiers Mods;
3422 Mods.Sext = Sext;
3423
3424 if (Mods.hasIntModifiers()) {
3425 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3426 if (Op.isExpr())
3427 return Error(Op.getStartLoc(), "expected an absolute expression");
3428 Op.setModifiers(Mods);
3429 }
3430
3431 return ParseStatus::Success;
3432}
3433
3434ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3435 return parseRegOrImmWithFPInputMods(Operands, false);
3436}
3437
3438ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3439 return parseRegOrImmWithIntInputMods(Operands, false);
3440}
3441
3442ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3443 auto Loc = getLoc();
3444 if (trySkipId("off")) {
3445 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3446 AMDGPUOperand::ImmTyOff, false));
3447 return ParseStatus::Success;
3448 }
3449
3450 if (!isRegister())
3451 return ParseStatus::NoMatch;
3452
3453 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3454 if (Reg) {
3455 Operands.push_back(std::move(Reg));
3456 return ParseStatus::Success;
3457 }
3458
3459 return ParseStatus::Failure;
3460}
3461
3462unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3463 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3464
3465 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3466 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3467 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3468 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3469 return Match_InvalidOperand;
3470
3471 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3472 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3473 // v_mac_f32/16 allow only dst_sel == DWORD;
3474 auto OpNum =
3475 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3476 const auto &Op = Inst.getOperand(OpNum);
3477 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3478 return Match_InvalidOperand;
3479 }
3480 }
3481
3482 return Match_Success;
3483}
3484
3486 static const unsigned Variants[] = {
3490 };
3491
3492 return ArrayRef(Variants);
3493}
3494
3495// What asm variants we should check
3496ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3497 if (isForcedDPP() && isForcedVOP3()) {
3498 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3499 return ArrayRef(Variants);
3500 }
3501 if (getForcedEncodingSize() == 32) {
3502 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3503 return ArrayRef(Variants);
3504 }
3505
3506 if (isForcedVOP3()) {
3507 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3508 return ArrayRef(Variants);
3509 }
3510
3511 if (isForcedSDWA()) {
3512 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3514 return ArrayRef(Variants);
3515 }
3516
3517 if (isForcedDPP()) {
3518 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3519 return ArrayRef(Variants);
3520 }
3521
3522 return getAllVariants();
3523}
3524
3525StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3526 if (isForcedDPP() && isForcedVOP3())
3527 return "e64_dpp";
3528
3529 if (getForcedEncodingSize() == 32)
3530 return "e32";
3531
3532 if (isForcedVOP3())
3533 return "e64";
3534
3535 if (isForcedSDWA())
3536 return "sdwa";
3537
3538 if (isForcedDPP())
3539 return "dpp";
3540
3541 return "";
3542}
3543
3544unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3545 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3546 for (MCPhysReg Reg : Desc.implicit_uses()) {
3547 switch (Reg) {
3548 case AMDGPU::FLAT_SCR:
3549 case AMDGPU::VCC:
3550 case AMDGPU::VCC_LO:
3551 case AMDGPU::VCC_HI:
3552 case AMDGPU::M0:
3553 return Reg;
3554 default:
3555 break;
3556 }
3557 }
3558 return AMDGPU::NoRegister;
3559}
3560
3561// NB: This code is correct only when used to check constant
3562// bus limitations because GFX7 support no f16 inline constants.
3563// Note that there are no cases when a GFX7 opcode violates
3564// constant bus limitations due to the use of an f16 constant.
3565bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3566 unsigned OpIdx) const {
3567 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3568
3569 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3570 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3571 return false;
3572 }
3573
3574 const MCOperand &MO = Inst.getOperand(OpIdx);
3575
3576 int64_t Val = MO.getImm();
3577 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3578
3579 switch (OpSize) { // expected operand size
3580 case 8:
3581 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3582 case 4:
3583 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3584 case 2: {
3585 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3589 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3590
3595
3600
3605
3610 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3611
3616 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3617
3618 llvm_unreachable("invalid operand type");
3619 }
3620 default:
3621 llvm_unreachable("invalid operand size");
3622 }
3623}
3624
3625unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3626 if (!isGFX10Plus())
3627 return 1;
3628
3629 switch (Opcode) {
3630 // 64-bit shift instructions can use only one scalar value input
3631 case AMDGPU::V_LSHLREV_B64_e64:
3632 case AMDGPU::V_LSHLREV_B64_gfx10:
3633 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3634 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3635 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3636 case AMDGPU::V_LSHRREV_B64_e64:
3637 case AMDGPU::V_LSHRREV_B64_gfx10:
3638 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3639 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3640 case AMDGPU::V_ASHRREV_I64_e64:
3641 case AMDGPU::V_ASHRREV_I64_gfx10:
3642 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3643 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3644 case AMDGPU::V_LSHL_B64_e64:
3645 case AMDGPU::V_LSHR_B64_e64:
3646 case AMDGPU::V_ASHR_I64_e64:
3647 return 1;
3648 default:
3649 return 2;
3650 }
3651}
3652
3653constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3655
3656// Get regular operand indices in the same order as specified
3657// in the instruction (but append mandatory literals to the end).
3659 bool AddMandatoryLiterals = false) {
3660
3661 int16_t ImmIdx =
3662 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3663
3664 if (isVOPD(Opcode)) {
3665 int16_t ImmDeferredIdx =
3666 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3667 : -1;
3668
3669 return {getNamedOperandIdx(Opcode, OpName::src0X),
3670 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3671 getNamedOperandIdx(Opcode, OpName::src0Y),
3672 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3673 ImmDeferredIdx,
3674 ImmIdx};
3675 }
3676
3677 return {getNamedOperandIdx(Opcode, OpName::src0),
3678 getNamedOperandIdx(Opcode, OpName::src1),
3679 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3680}
3681
3682bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3683 const MCOperand &MO = Inst.getOperand(OpIdx);
3684 if (MO.isImm()) {
3685 return !isInlineConstant(Inst, OpIdx);
3686 } else if (MO.isReg()) {
3687 auto Reg = MO.getReg();
3688 if (!Reg) {
3689 return false;
3690 }
3691 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3692 auto PReg = mc2PseudoReg(Reg);
3693 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3694 } else {
3695 return true;
3696 }
3697}
3698
3699// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3700// Writelane is special in that it can use SGPR and M0 (which would normally
3701// count as using the constant bus twice - but in this case it is allowed since
3702// the lane selector doesn't count as a use of the constant bus). However, it is
3703// still required to abide by the 1 SGPR rule.
3704static bool checkWriteLane(const MCInst &Inst) {
3705 const unsigned Opcode = Inst.getOpcode();
3706 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3707 return false;
3708 const MCOperand &LaneSelOp = Inst.getOperand(2);
3709 if (!LaneSelOp.isReg())
3710 return false;
3711 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3712 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3713}
3714
3715bool AMDGPUAsmParser::validateConstantBusLimitations(
3716 const MCInst &Inst, const OperandVector &Operands) {
3717 const unsigned Opcode = Inst.getOpcode();
3718 const MCInstrDesc &Desc = MII.get(Opcode);
3719 unsigned LastSGPR = AMDGPU::NoRegister;
3720 unsigned ConstantBusUseCount = 0;
3721 unsigned NumLiterals = 0;
3722 unsigned LiteralSize;
3723
3724 if (!(Desc.TSFlags &
3727 !isVOPD(Opcode))
3728 return true;
3729
3730 if (checkWriteLane(Inst))
3731 return true;
3732
3733 // Check special imm operands (used by madmk, etc)
3734 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3735 ++NumLiterals;
3736 LiteralSize = 4;
3737 }
3738
3739 SmallDenseSet<unsigned> SGPRsUsed;
3740 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3741 if (SGPRUsed != AMDGPU::NoRegister) {
3742 SGPRsUsed.insert(SGPRUsed);
3743 ++ConstantBusUseCount;
3744 }
3745
3746 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3747
3748 for (int OpIdx : OpIndices) {
3749 if (OpIdx == -1)
3750 continue;
3751
3752 const MCOperand &MO = Inst.getOperand(OpIdx);
3753 if (usesConstantBus(Inst, OpIdx)) {
3754 if (MO.isReg()) {
3755 LastSGPR = mc2PseudoReg(MO.getReg());
3756 // Pairs of registers with a partial intersections like these
3757 // s0, s[0:1]
3758 // flat_scratch_lo, flat_scratch
3759 // flat_scratch_lo, flat_scratch_hi
3760 // are theoretically valid but they are disabled anyway.
3761 // Note that this code mimics SIInstrInfo::verifyInstruction
3762 if (SGPRsUsed.insert(LastSGPR).second) {
3763 ++ConstantBusUseCount;
3764 }
3765 } else { // Expression or a literal
3766
3767 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3768 continue; // special operand like VINTERP attr_chan
3769
3770 // An instruction may use only one literal.
3771 // This has been validated on the previous step.
3772 // See validateVOPLiteral.
3773 // This literal may be used as more than one operand.
3774 // If all these operands are of the same size,
3775 // this literal counts as one scalar value.
3776 // Otherwise it counts as 2 scalar values.
3777 // See "GFX10 Shader Programming", section 3.6.2.3.
3778
3779 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3780 if (Size < 4)
3781 Size = 4;
3782
3783 if (NumLiterals == 0) {
3784 NumLiterals = 1;
3785 LiteralSize = Size;
3786 } else if (LiteralSize != Size) {
3787 NumLiterals = 2;
3788 }
3789 }
3790 }
3791 }
3792 ConstantBusUseCount += NumLiterals;
3793
3794 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3795 return true;
3796
3797 SMLoc LitLoc = getLitLoc(Operands);
3798 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3799 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3800 Error(Loc, "invalid operand (violates constant bus restrictions)");
3801 return false;
3802}
3803
3804bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3805 const MCInst &Inst, const OperandVector &Operands) {
3806
3807 const unsigned Opcode = Inst.getOpcode();
3808 if (!isVOPD(Opcode))
3809 return true;
3810
3811 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3812
3813 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3814 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3815 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3816 ? Opr.getReg()
3818 };
3819
3820 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3821 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3822
3823 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3824 auto InvalidCompOprIdx =
3825 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3826 if (!InvalidCompOprIdx)
3827 return true;
3828
3829 auto CompOprIdx = *InvalidCompOprIdx;
3830 auto ParsedIdx =
3831 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3832 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3833 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3834
3835 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3836 if (CompOprIdx == VOPD::Component::DST) {
3837 Error(Loc, "one dst register must be even and the other odd");
3838 } else {
3839 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3840 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3841 " operands must use different VGPR banks");
3842 }
3843
3844 return false;
3845}
3846
3847bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3848
3849 const unsigned Opc = Inst.getOpcode();
3850 const MCInstrDesc &Desc = MII.get(Opc);
3851
3852 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3853 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3854 assert(ClampIdx != -1);
3855 return Inst.getOperand(ClampIdx).getImm() == 0;
3856 }
3857
3858 return true;
3859}
3860
3863
3864bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3865 const SMLoc &IDLoc) {
3866
3867 const unsigned Opc = Inst.getOpcode();
3868 const MCInstrDesc &Desc = MII.get(Opc);
3869
3870 if ((Desc.TSFlags & MIMGFlags) == 0)
3871 return true;
3872
3873 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3874 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3875 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3876
3877 assert(VDataIdx != -1);
3878
3879 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3880 return true;
3881
3882 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3883 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3884 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3885 if (DMask == 0)
3886 DMask = 1;
3887
3888 bool IsPackedD16 = false;
3889 unsigned DataSize =
3890 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3891 if (hasPackedD16()) {
3892 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3893 IsPackedD16 = D16Idx >= 0;
3894 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3895 DataSize = (DataSize + 1) / 2;
3896 }
3897
3898 if ((VDataSize / 4) == DataSize + TFESize)
3899 return true;
3900
3901 StringRef Modifiers;
3902 if (isGFX90A())
3903 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3904 else
3905 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3906
3907 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3908 return false;
3909}
3910
3911bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3912 const SMLoc &IDLoc) {
3913 const unsigned Opc = Inst.getOpcode();
3914 const MCInstrDesc &Desc = MII.get(Opc);
3915
3916 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3917 return true;
3918
3920
3921 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3923 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3924 int RSrcOpName = Desc.TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
3925 : AMDGPU::OpName::rsrc;
3926 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3927 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3928 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3929
3930 assert(VAddr0Idx != -1);
3931 assert(SrsrcIdx != -1);
3932 assert(SrsrcIdx > VAddr0Idx);
3933
3934 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3935 if (BaseOpcode->BVH) {
3936 if (IsA16 == BaseOpcode->A16)
3937 return true;
3938 Error(IDLoc, "image address size does not match a16");
3939 return false;
3940 }
3941
3942 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3944 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3945 unsigned ActualAddrSize =
3946 IsNSA ? SrsrcIdx - VAddr0Idx
3947 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3948
3949 unsigned ExpectedAddrSize =
3950 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3951
3952 if (IsNSA) {
3953 if (hasPartialNSAEncoding() &&
3954 ExpectedAddrSize >
3956 int VAddrLastIdx = SrsrcIdx - 1;
3957 unsigned VAddrLastSize =
3958 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3959
3960 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3961 }
3962 } else {
3963 if (ExpectedAddrSize > 12)
3964 ExpectedAddrSize = 16;
3965
3966 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3967 // This provides backward compatibility for assembly created
3968 // before 160b/192b/224b types were directly supported.
3969 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3970 return true;
3971 }
3972
3973 if (ActualAddrSize == ExpectedAddrSize)
3974 return true;
3975
3976 Error(IDLoc, "image address size does not match dim and a16");
3977 return false;
3978}
3979
3980bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3981
3982 const unsigned Opc = Inst.getOpcode();
3983 const MCInstrDesc &Desc = MII.get(Opc);
3984
3985 if ((Desc.TSFlags & MIMGFlags) == 0)
3986 return true;
3987 if (!Desc.mayLoad() || !Desc.mayStore())
3988 return true; // Not atomic
3989
3990 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3991 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3992
3993 // This is an incomplete check because image_atomic_cmpswap
3994 // may only use 0x3 and 0xf while other atomic operations
3995 // may use 0x1 and 0x3. However these limitations are
3996 // verified when we check that dmask matches dst size.
3997 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3998}
3999
4000bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4001
4002 const unsigned Opc = Inst.getOpcode();
4003 const MCInstrDesc &Desc = MII.get(Opc);
4004
4005 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4006 return true;
4007
4008 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4009 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4010
4011 // GATHER4 instructions use dmask in a different fashion compared to
4012 // other MIMG instructions. The only useful DMASK values are
4013 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4014 // (red,red,red,red) etc.) The ISA document doesn't mention
4015 // this.
4016 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4017}
4018
4019bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4020 const unsigned Opc = Inst.getOpcode();
4021 const MCInstrDesc &Desc = MII.get(Opc);
4022
4023 if ((Desc.TSFlags & MIMGFlags) == 0)
4024 return true;
4025
4027 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4029
4030 if (!BaseOpcode->MSAA)
4031 return true;
4032
4033 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4034 assert(DimIdx != -1);
4035
4036 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4038
4039 return DimInfo->MSAA;
4040}
4041
4042static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4043{
4044 switch (Opcode) {
4045 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4046 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4047 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4048 return true;
4049 default:
4050 return false;
4051 }
4052}
4053
4054// movrels* opcodes should only allow VGPRS as src0.
4055// This is specified in .td description for vop1/vop3,
4056// but sdwa is handled differently. See isSDWAOperand.
4057bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4058 const OperandVector &Operands) {
4059
4060 const unsigned Opc = Inst.getOpcode();
4061 const MCInstrDesc &Desc = MII.get(Opc);
4062
4063 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4064 return true;
4065
4066 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4067 assert(Src0Idx != -1);
4068
4069 SMLoc ErrLoc;
4070 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4071 if (Src0.isReg()) {
4072 auto Reg = mc2PseudoReg(Src0.getReg());
4073 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4074 if (!isSGPR(Reg, TRI))
4075 return true;
4076 ErrLoc = getRegLoc(Reg, Operands);
4077 } else {
4078 ErrLoc = getConstLoc(Operands);
4079 }
4080
4081 Error(ErrLoc, "source operand must be a VGPR");
4082 return false;
4083}
4084
4085bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4086 const OperandVector &Operands) {
4087
4088 const unsigned Opc = Inst.getOpcode();
4089
4090 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4091 return true;
4092
4093 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4094 assert(Src0Idx != -1);
4095
4096 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4097 if (!Src0.isReg())
4098 return true;
4099
4100 auto Reg = mc2PseudoReg(Src0.getReg());
4101 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4102 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4103 Error(getRegLoc(Reg, Operands),
4104 "source operand must be either a VGPR or an inline constant");
4105 return false;
4106 }
4107
4108 return true;
4109}
4110
4111bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4112 const OperandVector &Operands) {
4113 unsigned Opcode = Inst.getOpcode();
4114 const MCInstrDesc &Desc = MII.get(Opcode);
4115
4116 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4117 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4118 return true;
4119
4120 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4121 if (Src2Idx == -1)
4122 return true;
4123
4124 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4125 Error(getConstLoc(Operands),
4126 "inline constants are not allowed for this operand");
4127 return false;
4128 }
4129
4130 return true;
4131}
4132
4133bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4134 const OperandVector &Operands) {
4135 const unsigned Opc = Inst.getOpcode();
4136 const MCInstrDesc &Desc = MII.get(Opc);
4137
4138 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4139 return true;
4140
4141 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4142 if (Src2Idx == -1)
4143 return true;
4144
4145 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4146 if (!Src2.isReg())
4147 return true;
4148
4149 MCRegister Src2Reg = Src2.getReg();
4150 MCRegister DstReg = Inst.getOperand(0).getReg();
4151 if (Src2Reg == DstReg)
4152 return true;
4153
4154 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4155 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4156 return true;
4157
4158 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4159 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
4160 "source 2 operand must not partially overlap with dst");
4161 return false;
4162 }
4163
4164 return true;
4165}
4166
4167bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4168 switch (Inst.getOpcode()) {
4169 default:
4170 return true;
4171 case V_DIV_SCALE_F32_gfx6_gfx7:
4172 case V_DIV_SCALE_F32_vi:
4173 case V_DIV_SCALE_F32_gfx10:
4174 case V_DIV_SCALE_F64_gfx6_gfx7:
4175 case V_DIV_SCALE_F64_vi:
4176 case V_DIV_SCALE_F64_gfx10:
4177 break;
4178 }
4179
4180 // TODO: Check that src0 = src1 or src2.
4181
4182 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4183 AMDGPU::OpName::src2_modifiers,
4184 AMDGPU::OpName::src2_modifiers}) {
4186 .getImm() &
4188 return false;
4189 }
4190 }
4191
4192 return true;
4193}
4194
4195bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4196
4197 const unsigned Opc = Inst.getOpcode();
4198 const MCInstrDesc &Desc = MII.get(Opc);
4199
4200 if ((Desc.TSFlags & MIMGFlags) == 0)
4201 return true;
4202
4203 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4204 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4205 if (isCI() || isSI())
4206 return false;
4207 }
4208
4209 return true;
4210}
4211
4212static bool IsRevOpcode(const unsigned Opcode)
4213{
4214 switch (Opcode) {
4215 case AMDGPU::V_SUBREV_F32_e32:
4216 case AMDGPU::V_SUBREV_F32_e64:
4217 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4218 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4219 case AMDGPU::V_SUBREV_F32_e32_vi:
4220 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4221 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4222 case AMDGPU::V_SUBREV_F32_e64_vi:
4223
4224 case AMDGPU::V_SUBREV_CO_U32_e32:
4225 case AMDGPU::V_SUBREV_CO_U32_e64:
4226 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4227 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4228
4229 case AMDGPU::V_SUBBREV_U32_e32:
4230 case AMDGPU::V_SUBBREV_U32_e64:
4231 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4232 case AMDGPU::V_SUBBREV_U32_e32_vi:
4233 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4234 case AMDGPU::V_SUBBREV_U32_e64_vi:
4235
4236 case AMDGPU::V_SUBREV_U32_e32:
4237 case AMDGPU::V_SUBREV_U32_e64:
4238 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4239 case AMDGPU::V_SUBREV_U32_e32_vi:
4240 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4241 case AMDGPU::V_SUBREV_U32_e64_vi:
4242
4243 case AMDGPU::V_SUBREV_F16_e32:
4244 case AMDGPU::V_SUBREV_F16_e64:
4245 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4246 case AMDGPU::V_SUBREV_F16_e32_vi:
4247 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4248 case AMDGPU::V_SUBREV_F16_e64_vi:
4249
4250 case AMDGPU::V_SUBREV_U16_e32:
4251 case AMDGPU::V_SUBREV_U16_e64:
4252 case AMDGPU::V_SUBREV_U16_e32_vi:
4253 case AMDGPU::V_SUBREV_U16_e64_vi:
4254
4255 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4256 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4257 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4258
4259 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4260 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4261
4262 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4263 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4264
4265 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4266 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4267
4268 case AMDGPU::V_LSHRREV_B32_e32:
4269 case AMDGPU::V_LSHRREV_B32_e64:
4270 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4271 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4272 case AMDGPU::V_LSHRREV_B32_e32_vi:
4273 case AMDGPU::V_LSHRREV_B32_e64_vi:
4274 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4275 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4276
4277 case AMDGPU::V_ASHRREV_I32_e32:
4278 case AMDGPU::V_ASHRREV_I32_e64:
4279 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4280 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4281 case AMDGPU::V_ASHRREV_I32_e32_vi:
4282 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4283 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4284 case AMDGPU::V_ASHRREV_I32_e64_vi:
4285
4286 case AMDGPU::V_LSHLREV_B32_e32:
4287 case AMDGPU::V_LSHLREV_B32_e64:
4288 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4289 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4290 case AMDGPU::V_LSHLREV_B32_e32_vi:
4291 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4292 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4293 case AMDGPU::V_LSHLREV_B32_e64_vi:
4294
4295 case AMDGPU::V_LSHLREV_B16_e32:
4296 case AMDGPU::V_LSHLREV_B16_e64:
4297 case AMDGPU::V_LSHLREV_B16_e32_vi:
4298 case AMDGPU::V_LSHLREV_B16_e64_vi:
4299 case AMDGPU::V_LSHLREV_B16_gfx10:
4300
4301 case AMDGPU::V_LSHRREV_B16_e32:
4302 case AMDGPU::V_LSHRREV_B16_e64:
4303 case AMDGPU::V_LSHRREV_B16_e32_vi:
4304 case AMDGPU::V_LSHRREV_B16_e64_vi:
4305 case AMDGPU::V_LSHRREV_B16_gfx10:
4306
4307 case AMDGPU::V_ASHRREV_I16_e32:
4308 case AMDGPU::V_ASHRREV_I16_e64:
4309 case AMDGPU::V_ASHRREV_I16_e32_vi:
4310 case AMDGPU::V_ASHRREV_I16_e64_vi:
4311 case AMDGPU::V_ASHRREV_I16_gfx10:
4312
4313 case AMDGPU::V_LSHLREV_B64_e64:
4314 case AMDGPU::V_LSHLREV_B64_gfx10:
4315 case AMDGPU::V_LSHLREV_B64_vi:
4316
4317 case AMDGPU::V_LSHRREV_B64_e64:
4318 case AMDGPU::V_LSHRREV_B64_gfx10:
4319 case AMDGPU::V_LSHRREV_B64_vi:
4320
4321 case AMDGPU::V_ASHRREV_I64_e64:
4322 case AMDGPU::V_ASHRREV_I64_gfx10:
4323 case AMDGPU::V_ASHRREV_I64_vi:
4324
4325 case AMDGPU::V_PK_LSHLREV_B16:
4326 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4327 case AMDGPU::V_PK_LSHLREV_B16_vi:
4328
4329 case AMDGPU::V_PK_LSHRREV_B16:
4330 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4331 case AMDGPU::V_PK_LSHRREV_B16_vi:
4332 case AMDGPU::V_PK_ASHRREV_I16:
4333 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4334 case AMDGPU::V_PK_ASHRREV_I16_vi:
4335 return true;
4336 default:
4337 return false;
4338 }
4339}
4340
4341std::optional<StringRef>
4342AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4343
4344 using namespace SIInstrFlags;
4345 const unsigned Opcode = Inst.getOpcode();
4346 const MCInstrDesc &Desc = MII.get(Opcode);
4347
4348 // lds_direct register is defined so that it can be used
4349 // with 9-bit operands only. Ignore encodings which do not accept these.
4350 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4351 if ((Desc.TSFlags & Enc) == 0)
4352 return std::nullopt;
4353
4354 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4355 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4356 if (SrcIdx == -1)
4357 break;
4358 const auto &Src = Inst.getOperand(SrcIdx);
4359 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4360
4361 if (isGFX90A() || isGFX11Plus())
4362 return StringRef("lds_direct is not supported on this GPU");
4363
4364 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4365 return StringRef("lds_direct cannot be used with this instruction");
4366
4367 if (SrcName != OpName::src0)
4368 return StringRef("lds_direct may be used as src0 only");
4369 }
4370 }
4371
4372 return std::nullopt;
4373}
4374
4375SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4376 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4377 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4378 if (Op.isFlatOffset())
4379 return Op.getStartLoc();
4380 }
4381 return getLoc();
4382}
4383
4384bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4385 const OperandVector &Operands) {
4386 auto Opcode = Inst.getOpcode();
4387 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4388 if (OpNum == -1)
4389 return true;
4390
4391 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4392 if ((TSFlags & SIInstrFlags::FLAT))
4393 return validateFlatOffset(Inst, Operands);
4394
4395 if ((TSFlags & SIInstrFlags::SMRD))
4396 return validateSMEMOffset(Inst, Operands);
4397
4398 const auto &Op = Inst.getOperand(OpNum);
4399 if (isGFX12Plus() &&
4400 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4401 const unsigned OffsetSize = 24;
4402 if (!isIntN(OffsetSize, Op.getImm())) {
4403 Error(getFlatOffsetLoc(Operands),
4404 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4405 return false;
4406 }
4407 } else {
4408 const unsigned OffsetSize = 16;
4409 if (!isUIntN(OffsetSize, Op.getImm())) {
4410 Error(getFlatOffsetLoc(Operands),
4411 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4412 return false;
4413 }
4414 }
4415 return true;
4416}
4417
4418bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4419 const OperandVector &Operands) {
4420 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4421 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4422 return true;
4423
4424 auto Opcode = Inst.getOpcode();
4425 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4426 assert(OpNum != -1);
4427
4428 const auto &Op = Inst.getOperand(OpNum);
4429 if (!hasFlatOffsets() && Op.getImm() != 0) {
4430 Error(getFlatOffsetLoc(Operands),
4431 "flat offset modifier is not supported on this GPU");
4432 return false;
4433 }
4434
4435 // For pre-GFX12 FLAT instructions the offset must be positive;
4436 // MSB is ignored and forced to zero.
4437 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4438 bool AllowNegative =
4440 isGFX12Plus();
4441 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4442 Error(getFlatOffsetLoc(Operands),
4443 Twine("expected a ") +
4444 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4445 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4446 return false;
4447 }
4448
4449 return true;
4450}
4451
4452SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4453 // Start with second operand because SMEM Offset cannot be dst or src0.
4454 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4455 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4456 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4457 return Op.getStartLoc();
4458 }
4459 return getLoc();
4460}
4461
4462bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4463 const OperandVector &Operands) {
4464 if (isCI() || isSI())
4465 return true;
4466
4467 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4468 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4469 return true;
4470
4471 auto Opcode = Inst.getOpcode();
4472 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4473 if (OpNum == -1)
4474 return true;
4475
4476 const auto &Op = Inst.getOperand(OpNum);
4477 if (!Op.isImm())
4478 return true;
4479
4480 uint64_t Offset = Op.getImm();
4481 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4484 return true;
4485
4486 Error(getSMEMOffsetLoc(Operands),
4487 isGFX12Plus() ? "expected a 24-bit signed offset"
4488 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4489 : "expected a 21-bit signed offset");
4490
4491 return false;
4492}
4493
4494bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4495 unsigned Opcode = Inst.getOpcode();
4496 const MCInstrDesc &Desc = MII.get(Opcode);
4497 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4498 return true;
4499
4500 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4501 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4502
4503 const int OpIndices[] = { Src0Idx, Src1Idx };
4504
4505 unsigned NumExprs = 0;
4506 unsigned NumLiterals = 0;
4508
4509 for (int OpIdx : OpIndices) {
4510 if (OpIdx == -1) break;
4511
4512 const MCOperand &MO = Inst.getOperand(OpIdx);
4513 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4514 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4515 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4516 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4517 if (NumLiterals == 0 || LiteralValue != Value) {
4519 ++NumLiterals;
4520 }
4521 } else if (MO.isExpr()) {
4522 ++NumExprs;
4523 }
4524 }
4525 }
4526
4527 return NumLiterals + NumExprs <= 1;
4528}
4529
4530bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4531 const unsigned Opc = Inst.getOpcode();
4532 if (isPermlane16(Opc)) {
4533 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4534 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4535
4536 if (OpSel & ~3)
4537 return false;
4538 }
4539
4540 uint64_t TSFlags = MII.get(Opc).TSFlags;
4541
4542 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4543 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4544 if (OpSelIdx != -1) {
4545 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4546 return false;
4547 }
4548 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4549 if (OpSelHiIdx != -1) {
4550 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4551 return false;
4552 }
4553 }
4554
4555 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4556 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4557 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4558 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4559 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4560 if (OpSel & 3)
4561 return false;
4562 }
4563
4564 return true;
4565}
4566
4567bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
4568 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4569
4570 const unsigned Opc = Inst.getOpcode();
4571 uint64_t TSFlags = MII.get(Opc).TSFlags;
4572
4573 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4574 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4575 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4576 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4577 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4578 !(TSFlags & SIInstrFlags::IsSWMMAC))
4579 return true;
4580
4581 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4582 if (NegIdx == -1)
4583 return true;
4584
4585 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4586
4587 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4588 // on some src operands but not allowed on other.
4589 // It is convenient that such instructions don't have src_modifiers operand
4590 // for src operands that don't allow neg because they also don't allow opsel.
4591
4592 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4593 AMDGPU::OpName::src1_modifiers,
4594 AMDGPU::OpName::src2_modifiers};
4595
4596 for (unsigned i = 0; i < 3; ++i) {
4597 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4598 if (Neg & (1 << i))
4599 return false;
4600 }
4601 }
4602
4603 return true;
4604}
4605
4606bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4607 const OperandVector &Operands) {
4608 const unsigned Opc = Inst.getOpcode();
4609 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4610 if (DppCtrlIdx >= 0) {
4611 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4612
4613 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4614 AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4615 // DP ALU DPP is supported for row_newbcast only on GFX9*
4616 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4617 Error(S, "DP ALU dpp only supports row_newbcast");
4618 return false;
4619 }
4620 }
4621
4622 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4623 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4624
4625 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4626 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4627 if (Src1Idx >= 0) {
4628 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4629 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4630 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
4631 auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());
4632 SMLoc S = getRegLoc(Reg, Operands);
4633 Error(S, "invalid operand for instruction");
4634 return false;
4635 }
4636 if (Src1.isImm()) {
4637 Error(getInstLoc(Operands),
4638 "src1 immediate operand invalid for instruction");
4639 return false;
4640 }
4641 }
4642 }
4643
4644 return true;
4645}
4646
4647// Check if VCC register matches wavefront size
4648bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4649 auto FB = getFeatureBits();
4650 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4651 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4652}
4653
4654// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4655bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4656 const OperandVector &Operands) {
4657 unsigned Opcode = Inst.getOpcode();
4658 const MCInstrDesc &Desc = MII.get(Opcode);
4659 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4660 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4661 !HasMandatoryLiteral && !isVOPD(Opcode))
4662 return true;
4663
4664 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4665
4666 unsigned NumExprs = 0;
4667 unsigned NumLiterals = 0;
4669
4670 for (int OpIdx : OpIndices) {
4671 if (OpIdx == -1)
4672 continue;
4673
4674 const MCOperand &MO = Inst.getOperand(OpIdx);
4675 if (!MO.isImm() && !MO.isExpr())
4676 continue;
4677 if (!isSISrcOperand(Desc, OpIdx))
4678 continue;
4679
4680 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4681 uint64_t Value = static_cast<uint64_t>(MO.getImm());
4682 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4683 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4684 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4685
4686 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4687 Error(getLitLoc(Operands), "invalid operand for instruction");
4688 return false;
4689 }
4690
4691 if (IsFP64 && IsValid32Op)
4692 Value = Hi_32(Value);
4693
4694 if (NumLiterals == 0 || LiteralValue != Value) {
4696 ++NumLiterals;
4697 }
4698 } else if (MO.isExpr()) {
4699 ++NumExprs;
4700 }
4701 }
4702 NumLiterals += NumExprs;
4703
4704 if (!NumLiterals)
4705 return true;
4706
4707 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4708 Error(getLitLoc(Operands), "literal operands are not supported");
4709 return false;
4710 }
4711
4712 if (NumLiterals > 1) {
4713 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4714 return false;
4715 }
4716
4717 return true;
4718}
4719
4720// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4721static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4722 const MCRegisterInfo *MRI) {
4723 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4724 if (OpIdx < 0)
4725 return -1;
4726
4727 const MCOperand &Op = Inst.getOperand(OpIdx);
4728 if (!Op.isReg())
4729 return -1;
4730
4731 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4732 auto Reg = Sub ? Sub : Op.getReg();
4733 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4734 return AGPR32.contains(Reg) ? 1 : 0;
4735}
4736
4737bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4738 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4739 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4741 SIInstrFlags::DS)) == 0)
4742 return true;
4743
4744 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4745 : AMDGPU::OpName::vdata;
4746
4747 const MCRegisterInfo *MRI = getMRI();
4748 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4749 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4750
4751 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4752 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4753 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4754 return false;
4755 }
4756
4757 auto FB = getFeatureBits();
4758 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4759 if (DataAreg < 0 || DstAreg < 0)
4760 return true;
4761 return DstAreg == DataAreg;
4762 }
4763
4764 return DstAreg < 1 && DataAreg < 1;
4765}
4766
4767bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4768 auto FB = getFeatureBits();
4769 if (!FB[AMDGPU::FeatureGFX90AInsts])
4770 return true;
4771
4772 const MCRegisterInfo *MRI = getMRI();
4773 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4774 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4775 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4776 const MCOperand &Op = Inst.getOperand(I);
4777 if (!Op.isReg())
4778 continue;
4779
4780 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4781 if (!Sub)
4782 continue;
4783
4784 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4785 return false;
4786 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4787 return false;
4788 }
4789
4790 return true;
4791}
4792
4793SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4794 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4795 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4796 if (Op.isBLGP())
4797 return Op.getStartLoc();
4798 }
4799 return SMLoc();
4800}
4801
4802bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4803 const OperandVector &Operands) {
4804 unsigned Opc = Inst.getOpcode();
4805 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4806 if (BlgpIdx == -1)
4807 return true;
4808 SMLoc BLGPLoc = getBLGPLoc(Operands);
4809 if (!BLGPLoc.isValid())
4810 return true;
4811 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4812 auto FB = getFeatureBits();
4813 bool UsesNeg = false;
4814 if (FB[AMDGPU::FeatureGFX940Insts]) {
4815 switch (Opc) {
4816 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4817 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4818 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4819 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4820 UsesNeg = true;
4821 }
4822 }
4823
4824 if (IsNeg == UsesNeg)
4825 return true;
4826
4827 Error(BLGPLoc,
4828 UsesNeg ? "invalid modifier: blgp is not supported"
4829 : "invalid modifier: neg is not supported");
4830
4831 return false;
4832}
4833
4834bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4835 const OperandVector &Operands) {
4836 if (!isGFX11Plus())
4837 return true;
4838
4839 unsigned Opc = Inst.getOpcode();
4840 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4841 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4842 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4843 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4844 return true;
4845
4846 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4847 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4848 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4849 if (Reg == AMDGPU::SGPR_NULL)
4850 return true;
4851
4852 SMLoc RegLoc = getRegLoc(Reg, Operands);
4853 Error(RegLoc, "src0 must be null");
4854 return false;
4855}
4856
4857bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4858 const OperandVector &Operands) {
4859 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4860 if ((TSFlags & SIInstrFlags::DS) == 0)
4861 return true;
4862 if (TSFlags & SIInstrFlags::GWS)
4863 return validateGWS(Inst, Operands);
4864 // Only validate GDS for non-GWS instructions.
4865 if (hasGDS())
4866 return true;
4867 int GDSIdx =
4868 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4869 if (GDSIdx < 0)
4870 return true;
4871 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4872 if (GDS) {
4873 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4874 Error(S, "gds modifier is not supported on this GPU");
4875 return false;
4876 }
4877 return true;
4878}
4879
4880// gfx90a has an undocumented limitation:
4881// DS_GWS opcodes must use even aligned registers.
4882bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4883 const OperandVector &Operands) {
4884 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4885 return true;
4886
4887 int Opc = Inst.getOpcode();
4888 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4889 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4890 return true;
4891
4892 const MCRegisterInfo *MRI = getMRI();
4893 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4894 int Data0Pos =
4895 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4896 assert(Data0Pos != -1);
4897 auto Reg = Inst.getOperand(Data0Pos).getReg();
4898 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4899 if (RegIdx & 1) {
4900 SMLoc RegLoc = getRegLoc(Reg, Operands);
4901 Error(RegLoc, "vgpr must be even aligned");
4902 return false;
4903 }
4904
4905 return true;
4906}
4907
4908bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4909 const OperandVector &Operands,
4910 const SMLoc &IDLoc) {
4911 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4912 AMDGPU::OpName::cpol);
4913 if (CPolPos == -1)
4914 return true;
4915
4916 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4917
4918 if (isGFX12Plus())
4919 return validateTHAndScopeBits(Inst, Operands, CPol);
4920
4921 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4922 if (TSFlags & SIInstrFlags::SMRD) {
4923 if (CPol && (isSI() || isCI())) {
4924 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4925 Error(S, "cache policy is not supported for SMRD instructions");
4926 return false;
4927 }
4928 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4929 Error(IDLoc, "invalid cache policy for SMEM instruction");
4930 return false;
4931 }
4932 }
4933
4934 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4935 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4938 if (!(TSFlags & AllowSCCModifier)) {
4939 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4940 StringRef CStr(S.getPointer());
4941 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4942 Error(S,
4943 "scc modifier is not supported for this instruction on this GPU");
4944 return false;
4945 }
4946 }
4947
4949 return true;
4950
4951 if (TSFlags & SIInstrFlags::IsAtomicRet) {
4952 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4953 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4954 : "instruction must use glc");
4955 return false;
4956 }
4957 } else {
4958 if (CPol & CPol::GLC) {
4959 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4960 StringRef CStr(S.getPointer());
4962 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4963 Error(S, isGFX940() ? "instruction must not use sc0"
4964 : "instruction must not use glc");
4965 return false;
4966 }
4967 }
4968
4969 return true;
4970}
4971
4972bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4973 const OperandVector &Operands,
4974 const unsigned CPol) {
4975 const unsigned TH = CPol & AMDGPU::CPol::TH;
4976 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
4977
4978 const unsigned Opcode = Inst.getOpcode();
4979 const MCInstrDesc &TID = MII.get(Opcode);
4980
4981 auto PrintError = [&](StringRef Msg) {
4982 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4983 Error(S, Msg);
4984 return false;
4985 };
4986
4987 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
4990 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
4991
4992 if (TH == 0)
4993 return true;
4994
4995 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
4996 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
4997 (TH == AMDGPU::CPol::TH_NT_HT)))
4998 return PrintError("invalid th value for SMEM instruction");
4999
5000 if (TH == AMDGPU::CPol::TH_BYPASS) {
5001 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5003 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5005 return PrintError("scope and th combination is not valid");
5006 }
5007
5008 bool IsStore = TID.mayStore();
5009 bool IsAtomic =
5011
5012 if (IsAtomic) {
5013 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5014 return PrintError("invalid th value for atomic instructions");
5015 } else if (IsStore) {
5016 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5017 return PrintError("invalid th value for store instructions");
5018 } else {
5019 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5020 return PrintError("invalid th value for load instructions");
5021 }
5022
5023 return true;
5024}
5025
5026bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
5027 if (!isGFX11Plus())
5028 return true;
5029 for (auto &Operand : Operands) {
5030 if (!Operand->isReg())
5031 continue;
5032 unsigned Reg = Operand->getReg();
5033 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
5034 Error(getRegLoc(Reg, Operands),
5035 "execz and vccz are not supported on this GPU");
5036 return false;
5037 }
5038 }
5039 return true;
5040}
5041
5042bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5043 const OperandVector &Operands) {
5044 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5045 if (Desc.mayStore() &&
5047 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5048 if (Loc != getInstLoc(Operands)) {
5049 Error(Loc, "TFE modifier has no meaning for store instructions");
5050 return false;
5051 }
5052 }
5053
5054 return true;
5055}
5056
5057bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5058 const SMLoc &IDLoc,
5059 const OperandVector &Operands) {
5060 if (auto ErrMsg = validateLdsDirect(Inst)) {
5061 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
5062 return false;
5063 }
5064 if (!validateSOPLiteral(Inst)) {
5065 Error(getLitLoc(Operands),
5066 "only one unique literal operand is allowed");
5067 return false;
5068 }
5069 if (!validateVOPLiteral(Inst, Operands)) {
5070 return false;
5071 }
5072 if (!validateConstantBusLimitations(Inst, Operands)) {
5073 return false;
5074 }
5075 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5076 return false;
5077 }
5078 if (!validateIntClampSupported(Inst)) {
5079 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
5080 "integer clamping is not supported on this GPU");
5081 return false;
5082 }
5083 if (!validateOpSel(Inst)) {
5084 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5085 "invalid op_sel operand");
5086 return false;
5087 }
5088 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5089 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5090 "invalid neg_lo operand");
5091 return false;
5092 }
5093 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5094 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5095 "invalid neg_hi operand");
5096 return false;
5097 }
5098 if (!validateDPP(Inst, Operands)) {
5099 return false;
5100 }
5101 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5102 if (!validateMIMGD16(Inst)) {
5103 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5104 "d16 modifier is not supported on this GPU");
5105 return false;
5106 }
5107 if (!validateMIMGMSAA(Inst)) {
5108 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5109 "invalid dim; must be MSAA type");
5110 return false;
5111 }
5112 if (!validateMIMGDataSize(Inst, IDLoc)) {
5113 return false;
5114 }
5115 if (!validateMIMGAddrSize(Inst, IDLoc))
5116 return false;
5117 if (!validateMIMGAtomicDMask(Inst)) {
5118 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5119 "invalid atomic image dmask");
5120 return false;
5121 }
5122 if (!validateMIMGGatherDMask(Inst)) {
5123 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5124 "invalid image_gather dmask: only one bit must be set");
5125 return false;
5126 }
5127 if (!validateMovrels(Inst, Operands)) {
5128 return false;
5129 }
5130 if (!validateOffset(Inst, Operands)) {
5131 return false;
5132 }
5133 if (!validateMAIAccWrite(Inst, Operands)) {
5134 return false;
5135 }
5136 if (!validateMAISrc2(Inst, Operands)) {
5137 return false;
5138 }
5139 if (!validateMFMA(Inst, Operands)) {
5140 return false;
5141 }
5142 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5143 return false;
5144 }
5145
5146 if (!validateAGPRLdSt(Inst)) {
5147 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5148 ? "invalid register class: data and dst should be all VGPR or AGPR"
5149 : "invalid register class: agpr loads and stores not supported on this GPU"
5150 );
5151 return false;
5152 }
5153 if (!validateVGPRAlign(Inst)) {
5154 Error(IDLoc,
5155 "invalid register class: vgpr tuples must be 64 bit aligned");
5156 return false;
5157 }
5158 if (!validateDS(Inst, Operands)) {
5159 return false;
5160 }
5161
5162 if (!validateBLGP(Inst, Operands)) {
5163 return false;
5164 }
5165
5166 if (!validateDivScale(Inst)) {
5167 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5168 return false;
5169 }
5170 if (!validateWaitCnt(Inst, Operands)) {
5171 return false;
5172 }
5173 if (!validateExeczVcczOperands(Operands)) {
5174 return false;
5175 }
5176 if (!validateTFE(Inst, Operands)) {
5177 return false;
5178 }
5179
5180 return true;
5181}
5182
5184 const FeatureBitset &FBS,
5185 unsigned VariantID = 0);
5186
5187static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5188 const FeatureBitset &AvailableFeatures,
5189 unsigned VariantID);
5190
5191bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5192 const FeatureBitset &FBS) {
5193 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5194}
5195
5196bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5197 const FeatureBitset &FBS,
5198 ArrayRef<unsigned> Variants) {
5199 for (auto Variant : Variants) {
5200 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5201 return true;
5202 }
5203
5204 return false;
5205}
5206
5207bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5208 const SMLoc &IDLoc) {
5209 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5210
5211 // Check if requested instruction variant is supported.
5212 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5213 return false;
5214
5215 // This instruction is not supported.
5216 // Clear any other pending errors because they are no longer relevant.
5217 getParser().clearPendingErrors();
5218
5219 // Requested instruction variant is not supported.
5220 // Check if any other variants are supported.
5221 StringRef VariantName = getMatchedVariantName();
5222 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5223 return Error(IDLoc,
5224 Twine(VariantName,
5225 " variant of this instruction is not supported"));
5226 }
5227
5228 // Check if this instruction may be used with a different wavesize.
5229 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5230 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5231
5232 FeatureBitset FeaturesWS32 = getFeatureBits();
5233 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5234 .flip(AMDGPU::FeatureWavefrontSize32);
5235 FeatureBitset AvailableFeaturesWS32 =
5236 ComputeAvailableFeatures(FeaturesWS32);
5237
5238 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5239 return Error(IDLoc, "instruction requires wavesize=32");
5240 }
5241
5242 // Finally check if this instruction is supported on any other GPU.
5243 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5244 return Error(IDLoc, "instruction not supported on this GPU");
5245 }
5246
5247 // Instruction not supported on any GPU. Probably a typo.
5248 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5249 return Error(IDLoc, "invalid instruction" + Suggestion);
5250}
5251
5253 uint64_t InvalidOprIdx) {
5254 assert(InvalidOprIdx < Operands.size());
5255 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5256 if (Op.isToken() && InvalidOprIdx > 1) {
5257 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5258 return PrevOp.isToken() && PrevOp.getToken() == "::";
5259 }
5260 return false;
5261}
5262
5263bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5265 MCStreamer &Out,
5267 bool MatchingInlineAsm) {
5268 MCInst Inst;
5269 unsigned Result = Match_Success;
5270 for (auto Variant : getMatchedVariants()) {
5271 uint64_t EI;
5272 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5273 Variant);
5274 // We order match statuses from least to most specific. We use most specific
5275 // status as resulting
5276 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
5277 if ((R == Match_Success) ||
5278 (R == Match_PreferE32) ||
5279 (R == Match_MissingFeature && Result != Match_PreferE32) ||
5280 (R == Match_InvalidOperand && Result != Match_MissingFeature
5281 && Result != Match_PreferE32) ||
5282 (R == Match_MnemonicFail && Result != Match_InvalidOperand
5283 && Result != Match_MissingFeature
5284 && Result != Match_PreferE32)) {
5285 Result = R;
5286 ErrorInfo = EI;
5287 }
5288 if (R == Match_Success)
5289 break;
5290 }
5291
5292 if (Result == Match_Success) {
5293 if (!validateInstruction(Inst, IDLoc, Operands)) {
5294 return true;
5295 }
5296 Inst.setLoc(IDLoc);
5297 Out.emitInstruction(Inst, getSTI());
5298 return false;
5299 }
5300
5301 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5302 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5303 return true;
5304 }
5305
5306 switch (Result) {
5307 default: break;
5308 case Match_MissingFeature:
5309 // It has been verified that the specified instruction
5310 // mnemonic is valid. A match was found but it requires
5311 // features which are not supported on this GPU.
5312 return Error(IDLoc, "operands are not valid for this GPU or mode");
5313
5314 case Match_InvalidOperand: {
5315 SMLoc ErrorLoc = IDLoc;
5316 if (ErrorInfo != ~0ULL) {
5317 if (ErrorInfo >= Operands.size()) {
5318 return Error(IDLoc, "too few operands for instruction");
5319 }
5320 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5321 if (ErrorLoc == SMLoc())
5322 ErrorLoc = IDLoc;
5323
5325 return Error(ErrorLoc, "invalid VOPDY instruction");
5326 }
5327 return Error(ErrorLoc, "invalid operand for instruction");
5328 }
5329
5330 case Match_PreferE32:
5331 return Error(IDLoc, "internal error: instruction without _e64 suffix "
5332 "should be encoded as e32");
5333 case Match_MnemonicFail:
5334 llvm_unreachable("Invalid instructions should have been handled already");
5335 }
5336 llvm_unreachable("Implement any new match types added!");
5337}
5338
5339bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5340 int64_t Tmp = -1;
5341 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5342 return true;
5343 }
5344 if (getParser().parseAbsoluteExpression(Tmp)) {
5345 return true;
5346 }
5347 Ret = static_cast<uint32_t>(Tmp);
5348 return false;
5349}
5350
5351bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5352 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5353 return TokError("directive only supported for amdgcn architecture");
5354
5355 std::string TargetIDDirective;
5356 SMLoc TargetStart = getTok().getLoc();
5357 if (getParser().parseEscapedString(TargetIDDirective))
5358 return true;
5359
5360 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5361 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5362 return getParser().Error(TargetRange.Start,
5363 (Twine(".amdgcn_target directive's target id ") +
5364 Twine(TargetIDDirective) +
5365 Twine(" does not match the specified target id ") +
5366 Twine(getTargetStreamer().getTargetID()->toString())).str());
5367
5368 return false;
5369}
5370
5371bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5372 return Error(Range.Start, "value out of range", Range);
5373}
5374
5375bool AMDGPUAsmParser::calculateGPRBlocks(
5376 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
5377 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
5378 unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
5379 SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
5380 // TODO(scott.linder): These calculations are duplicated from
5381 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5382 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5383
5384 unsigned NumVGPRs = NextFreeVGPR;
5385 unsigned NumSGPRs = NextFreeSGPR;
5386
5387 if (Version.Major >= 10)
5388 NumSGPRs = 0;
5389 else {
5390 unsigned MaxAddressableNumSGPRs =
5392
5393 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
5394 NumSGPRs > MaxAddressableNumSGPRs)
5395 return OutOfRangeError(SGPRRange);
5396
5397 NumSGPRs +=
5398 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
5399
5400 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5401 NumSGPRs > MaxAddressableNumSGPRs)
5402 return OutOfRangeError(SGPRRange);
5403
5404 if (Features.test(FeatureSGPRInitBug))
5406 }
5407
5408 VGPRBlocks = IsaInfo::getEncodedNumVGPRBlocks(&getSTI(), NumVGPRs,
5409 EnableWavefrontSize32);
5410 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
5411
5412 return false;
5413}
5414
5415bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5416 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5417 return TokError("directive only supported for amdgcn architecture");
5418
5419 if (!isHsaAbi(getSTI()))
5420 return TokError("directive only supported for amdhsa OS");
5421
5422 StringRef KernelName;
5423 if (getParser().parseIdentifier(KernelName))
5424 return true;
5425
5428 &getSTI(), getContext());
5429
5430 StringSet<> Seen;
5431
5432 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5433
5434 SMRange VGPRRange;
5435 uint64_t NextFreeVGPR = 0;
5436 uint64_t AccumOffset = 0;
5437 uint64_t SharedVGPRCount = 0;
5438 uint64_t PreloadLength = 0;
5439 uint64_t PreloadOffset = 0;
5440 SMRange SGPRRange;
5441 uint64_t NextFreeSGPR = 0;
5442
5443 // Count the number of user SGPRs implied from the enabled feature bits.
5444 unsigned ImpliedUserSGPRCount = 0;
5445
5446 // Track if the asm explicitly contains the directive for the user SGPR
5447 // count.
5448 std::optional<unsigned> ExplicitUserSGPRCount;
5449 bool ReserveVCC = true;
5450 bool ReserveFlatScr = true;
5451 std::optional<bool> EnableWavefrontSize32;
5452
5453 while (true) {
5454 while (trySkipToken(AsmToken::EndOfStatement));
5455
5456 StringRef ID;
5457 SMRange IDRange = getTok().getLocRange();
5458 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5459 return true;
5460
5461 if (ID == ".end_amdhsa_kernel")
5462 break;
5463
5464 if (!Seen.insert(ID).second)
5465 return TokError(".amdhsa_ directives cannot be repeated");
5466
5467 SMLoc ValStart = getLoc();
5468 const MCExpr *ExprVal;
5469 if (getParser().parseExpression(ExprVal))
5470 return true;
5471 SMLoc ValEnd = getLoc();
5472 SMRange ValRange = SMRange(ValStart, ValEnd);
5473
5474 int64_t IVal = 0;
5475 uint64_t Val = IVal;
5476 bool EvaluatableExpr;
5477 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5478 if (IVal < 0)
5479 return OutOfRangeError(ValRange);
5480 Val = IVal;
5481 }
5482
5483#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5484 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5485 return OutOfRangeError(RANGE); \
5486 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5487 getContext());
5488
5489// Some fields use the parsed value immediately which requires the expression to
5490// be solvable.
5491#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5492 if (!(RESOLVED)) \
5493 return Error(IDRange.Start, "directive should have resolvable expression", \
5494 IDRange);
5495
5496 if (ID == ".amdhsa_group_segment_fixed_size") {
5498 CHAR_BIT>(Val))
5499 return OutOfRangeError(ValRange);
5500 KD.group_segment_fixed_size = ExprVal;
5501 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5503 CHAR_BIT>(Val))
5504 return OutOfRangeError(ValRange);
5505 KD.private_segment_fixed_size = ExprVal;
5506 } else if (ID == ".amdhsa_kernarg_size") {
5507 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
5508 return OutOfRangeError(ValRange);
5509 KD.kernarg_size = ExprVal;
5510 } else if (ID == ".amdhsa_user_sgpr_count") {
5511 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5512 ExplicitUserSGPRCount = Val;
5513 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5514 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5516 return Error(IDRange.Start,
5517 "directive is not supported with architected flat scratch",
5518 IDRange);
5520 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5521 ExprVal, ValRange);
5522 if (Val)
5523 ImpliedUserSGPRCount += 4;
5524 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5525 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5526 if (!hasKernargPreload())
5527 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5528
5529 if (Val > getMaxNumUserSGPRs())
5530 return OutOfRangeError(ValRange);
5531 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
5532 ValRange);
5533 if (Val) {
5534 ImpliedUserSGPRCount += Val;
5535 PreloadLength = Val;
5536 }
5537 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5538 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5539 if (!hasKernargPreload())
5540 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5541
5542 if (Val >= 1024)
5543 return OutOfRangeError(ValRange);
5544 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
5545 ValRange);
5546 if (Val)
5547 PreloadOffset = Val;
5548 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5549 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5551 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5552 ValRange);
5553 if (Val)
5554 ImpliedUserSGPRCount += 2;
5555 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5556 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5558 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5559 ValRange);
5560 if (Val)
5561 ImpliedUserSGPRCount += 2;
5562 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5563 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5565 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5566 ExprVal, ValRange);
5567 if (Val)
5568 ImpliedUserSGPRCount += 2;
5569 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5570 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5572 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5573 ValRange);
5574 if (Val)
5575 ImpliedUserSGPRCount += 2;
5576 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5578 return Error(IDRange.Start,
5579 "directive is not supported with architected flat scratch",
5580 IDRange);
5581 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5583 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5584 ExprVal, ValRange);
5585 if (Val)
5586 ImpliedUserSGPRCount += 2;
5587 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5588 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5590 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5591 ExprVal, ValRange);
5592 if (Val)
5593 ImpliedUserSGPRCount += 1;
5594 } else if (ID == ".amdhsa_wavefront_size32") {
5595 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5596 if (IVersion.Major < 10)
5597 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5598 EnableWavefrontSize32 = Val;
5600 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5601 ValRange);
5602 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5604 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5605 ValRange);
5606 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5608 return Error(IDRange.Start,
5609 "directive is not supported with architected flat scratch",
5610 IDRange);
5612 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5613 ValRange);
5614 } else if (ID == ".amdhsa_enable_private_segment") {
5616 return Error(
5617 IDRange.Start,
5618 "directive is not supported without architected flat scratch",
5619 IDRange);
5621 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5622 ValRange);
5623 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5625 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5626 ValRange);
5627 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5629 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5630 ValRange);
5631 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5633 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5634 ValRange);
5635 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5637 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5638 ValRange);
5639 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5641 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5642 ValRange);
5643 } else if (ID == ".amdhsa_next_free_vgpr") {
5644 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5645 VGPRRange = ValRange;
5646 NextFreeVGPR = Val;
5647 } else if (ID == ".amdhsa_next_free_sgpr") {
5648 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5649 SGPRRange = ValRange;
5650 NextFreeSGPR = Val;
5651 } else if (ID == ".amdhsa_accum_offset") {
5652 if (!isGFX90A())
5653 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5654 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5655 AccumOffset = Val;
5656 } else if (ID == ".amdhsa_reserve_vcc") {
5657 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5658 if (!isUInt<1>(Val))
5659 return OutOfRangeError(ValRange);
5660 ReserveVCC = Val;
5661 } else if (ID == ".amdhsa_reserve_flat_scratch") {
5662 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5663 if (IVersion.Major < 7)
5664 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5666 return Error(IDRange.Start,
5667 "directive is not supported with architected flat scratch",
5668 IDRange);
5669 if (!isUInt<1>(Val))
5670 return OutOfRangeError(ValRange);
5671 ReserveFlatScr = Val;
5672 } else if (ID == ".amdhsa_reserve_xnack_mask") {
5673 if (IVersion.Major < 8)
5674 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5675 if (!isUInt<1>(Val))
5676 return OutOfRangeError(ValRange);
5677 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5678 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5679 IDRange);
5680 } else if (ID == ".amdhsa_float_round_mode_32") {
5682 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5683 ValRange);
5684 } else if (ID == ".amdhsa_float_round_mode_16_64") {
5686 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5687 ValRange);
5688 } else if (ID == ".amdhsa_float_denorm_mode_32") {
5690 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5691 ValRange);
5692 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5694 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5695 ValRange);
5696 } else if (ID == ".amdhsa_dx10_clamp") {
5697 if (IVersion.Major >= 12)
5698 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5700 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5701 ValRange);
5702 } else if (ID == ".amdhsa_ieee_mode") {
5703 if (IVersion.Major >= 12)
5704 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5706 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5707 ValRange);
5708 } else if (ID == ".amdhsa_fp16_overflow") {
5709 if (IVersion.Major < 9)
5710 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5712 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5713 ValRange);
5714 } else if (ID == ".amdhsa_tg_split") {
5715 if (!isGFX90A())
5716 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5717 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
5718 ExprVal, ValRange);
5719 } else if (ID == ".amdhsa_workgroup_processor_mode") {
5720 if (IVersion.Major < 10)
5721 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5723 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5724 ValRange);
5725 } else if (ID == ".amdhsa_memory_ordered") {
5726 if (IVersion.Major < 10)
5727 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5729 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5730 ValRange);
5731 } else if (ID == ".amdhsa_forward_progress") {
5732 if (IVersion.Major < 10)
5733 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5735 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5736 ValRange);
5737 } else if (ID == ".amdhsa_shared_vgpr_count") {
5738 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5739 if (IVersion.Major < 10 || IVersion.Major >= 12)
5740 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
5741 IDRange);
5742 SharedVGPRCount = Val;
5744 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5745 ValRange);
5746 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5749 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5750 ExprVal, ValRange);
5751 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5753 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5754 ExprVal, ValRange);
5755 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5758 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5759 ExprVal, ValRange);
5760 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5762 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5763 ExprVal, ValRange);
5764 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5766 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5767 ExprVal, ValRange);
5768 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5770 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5771 ExprVal, ValRange);
5772 } else if (ID == ".amdhsa_exception_int_div_zero") {
5774 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5775 ExprVal, ValRange);
5776 } else if (ID == ".amdhsa_round_robin_scheduling") {
5777 if (IVersion.Major < 12)
5778 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
5780 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5781 ValRange);
5782 } else {
5783 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5784 }
5785
5786#undef PARSE_BITS_ENTRY
5787 }
5788
5789 if (!Seen.contains(".amdhsa_next_free_vgpr"))
5790 return TokError(".amdhsa_next_free_vgpr directive is required");
5791
5792 if (!Seen.contains(".amdhsa_next_free_sgpr"))
5793 return TokError(".amdhsa_next_free_sgpr directive is required");
5794
5795 unsigned VGPRBlocks;
5796 unsigned SGPRBlocks;
5797 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5798 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5799 EnableWavefrontSize32, NextFreeVGPR,
5800 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5801 SGPRBlocks))
5802 return true;
5803
5804 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5805 VGPRBlocks))
5806 return OutOfRangeError(VGPRRange);
5808 KD.compute_pgm_rsrc1, MCConstantExpr::create(VGPRBlocks, getContext()),
5809 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5810 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
5811
5812 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5813 SGPRBlocks))
5814 return OutOfRangeError(SGPRRange);
5816 KD.compute_pgm_rsrc1, MCConstantExpr::create(SGPRBlocks, getContext()),
5817 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5818 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
5819
5820 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5821 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5822 "enabled user SGPRs");
5823
5824 unsigned UserSGPRCount =
5825 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5826
5827 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5828 return TokError("too many user SGPRs enabled");
5830 KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
5831 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5832 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
5833
5834 int64_t IVal = 0;
5835 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
5836 return TokError("Kernarg size should be resolvable");
5837 uint64_t kernarg_size = IVal;
5838 if (PreloadLength && kernarg_size &&
5839 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5840 return TokError("Kernarg preload length + offset is larger than the "
5841 "kernarg segment size");
5842
5843 if (isGFX90A()) {
5844 if (!Seen.contains(".amdhsa_accum_offset"))
5845 return TokError(".amdhsa_accum_offset directive is required");
5846 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5847 return TokError("accum_offset should be in range [4..256] in "
5848 "increments of 4");
5849 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5850 return TokError("accum_offset exceeds total VGPR allocation");
5853 MCConstantExpr::create(AccumOffset / 4 - 1, getContext()),
5854 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5855 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
5856 }
5857
5858 if (IVersion.Major >= 10 && IVersion.Major < 12) {
5859 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5860 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5861 return TokError("shared_vgpr_count directive not valid on "
5862 "wavefront size 32");
5863 }
5864 if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5865 return TokError("shared_vgpr_count*2 + "
5866 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5867 "exceed 63\n");
5868 }
5869 }
5870
5871 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5872 NextFreeVGPR, NextFreeSGPR,
5873 ReserveVCC, ReserveFlatScr);
5874 return false;
5875}
5876
5877bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5879 if (ParseAsAbsoluteExpression(Version))
5880 return true;
5881
5882 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5883 return false;
5884}
5885
5886bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5887 amd_kernel_code_t &Header) {
5888 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5889 // assembly for backwards compatibility.
5890 if (ID == "max_scratch_backing_memory_byte_size") {
5891 Parser.eatToEndOfStatement();
5892 return false;
5893 }
5894
5895 SmallString<40> ErrStr;
5896 raw_svector_ostream Err(ErrStr);
5897 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5898 return TokError(Err.str());
5899 }
5900 Lex();
5901
5902 if (ID == "enable_dx10_clamp") {
5903 if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
5904 isGFX12Plus())
5905 return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
5906 }
5907
5908 if (ID == "enable_ieee_mode") {
5909 if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
5910 isGFX12Plus())
5911 return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
5912 }
5913
5914 if (ID == "enable_wavefront_size32") {
5915 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5916 if (!isGFX10Plus())
5917 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5918 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5919 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5920 } else {
5921 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5922 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5923 }
5924 }
5925
5926 if (ID == "wavefront_size") {
5927 if (Header.wavefront_size == 5) {
5928 if (!isGFX10Plus())
5929 return TokError("wavefront_size=5 is only allowed on GFX10+");
5930 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5931 return TokError("wavefront_size=5 requires +WavefrontSize32");
5932 } else if (Header.wavefront_size == 6) {
5933 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5934 return TokError("wavefront_size=6 requires +WavefrontSize64");
5935 }
5936 }
5937
5938 if (ID == "enable_wgp_mode") {
5939 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5940 !isGFX10Plus())
5941 return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5942 }
5943
5944 if (ID == "enable_mem_ordered") {
5945 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5946 !isGFX10Plus())
5947 return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5948 }
5949
5950 if (ID == "enable_fwd_progress") {
5951 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5952 !isGFX10Plus())
5953 return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5954 }
5955
5956 return false;
5957}
5958
5959bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5960 amd_kernel_code_t Header;
5961 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5962
5963 while (true) {
5964 // Lex EndOfStatement. This is in a while loop, because lexing a comment
5965 // will set the current token to EndOfStatement.
5966 while(trySkipToken(AsmToken::EndOfStatement));
5967
5968 StringRef ID;
5969 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5970 return true;
5971
5972 if (ID == ".end_amd_kernel_code_t")
5973 break;
5974
5975 if (ParseAMDKernelCodeTValue(ID, Header))
5976 return true;
5977 }
5978
5979 getTargetStreamer().EmitAMDKernelCodeT(Header);
5980
5981 return false;
5982}
5983
5984bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5985 StringRef KernelName;
5986 if (!parseId(KernelName, "expected symbol name"))
5987 return true;
5988
5989 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5991
5992 KernelScope.initialize(getContext());
5993 return false;
5994}
5995
5996bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5997 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5998 return Error(getLoc(),
5999 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6000 "architectures");
6001 }
6002
6003 auto TargetIDDirective = getLexer().getTok().getStringContents();
6004 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6005 return Error(getParser().getTok().getLoc(), "target id must match options");
6006
6007 getTargetStreamer().EmitISAVersion();
6008 Lex();
6009
6010 return false;
6011}
6012
6013bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6014 assert(isHsaAbi(getSTI()));
6015
6016 std::string HSAMetadataString;
6017 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6018 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6019 return true;
6020
6021 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6022 return Error(getLoc(), "invalid HSA metadata");
6023
6024 return false;
6025}
6026
6027/// Common code to parse out a block of text (typically YAML) between start and
6028/// end directives.
6029bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6030 const char *AssemblerDirectiveEnd,
6031 std::string &CollectString) {
6032
6033 raw_string_ostream CollectStream(CollectString);
6034
6035 getLexer().setSkipSpace(false);
6036
6037 bool FoundEnd = false;
6038 while (!isToken(AsmToken::Eof)) {
6039 while (isToken(AsmToken::Space)) {
6040 CollectStream << getTokenStr();
6041 Lex();
6042 }
6043
6044 if (trySkipId(AssemblerDirectiveEnd)) {
6045 FoundEnd = true;
6046 break;
6047 }
6048
6049 CollectStream << Parser.parseStringToEndOfStatement()
6050 << getContext().getAsmInfo()->getSeparatorString();
6051
6052 Parser.eatToEndOfStatement();
6053 }
6054
6055 getLexer().setSkipSpace(true);
6056
6057 if (isToken(AsmToken::Eof) && !FoundEnd) {
6058 return TokError(Twine("expected directive ") +
6059 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6060 }
6061
6062 CollectStream.flush();
6063 return false;
6064}
6065
6066/// Parse the assembler directive for new MsgPack-format PAL metadata.
6067bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6068 std::string String;
6069 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6071 return true;
6072
6073 auto PALMetadata = getTargetStreamer().getPALMetadata();
6074 if (!PALMetadata->setFromString(String))
6075 return Error(getLoc(), "invalid PAL metadata");
6076 return false;
6077}
6078
6079/// Parse the assembler directive for old linear-format PAL metadata.
6080bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6081 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6082 return Error(getLoc(),
6083 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6084 "not available on non-amdpal OSes")).str());
6085 }
6086
6087 auto PALMetadata = getTargetStreamer().getPALMetadata();
6088 PALMetadata->setLegacy();
6089 for (;;) {
6091 if (ParseAsAbsoluteExpression(Key)) {
6092 return TokError(Twine("invalid value in ") +
6094 }
6095 if (!trySkipToken(AsmToken::Comma)) {
6096 return TokError(Twine("expected an even number of values in ") +
6098 }
6099 if (ParseAsAbsoluteExpression(Value)) {
6100 return TokError(Twine("invalid value in ") +
6102 }
6103 PALMetadata->setRegister(Key, Value);
6104 if (!trySkipToken(AsmToken::Comma))
6105 break;
6106 }
6107 return false;
6108}
6109
6110/// ParseDirectiveAMDGPULDS
6111/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6112bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6113 if (getParser().checkForValidSection())
6114 return true;
6115
6117 SMLoc NameLoc = getLoc();
6118 if (getParser().parseIdentifier(Name))
6119 return TokError("expected identifier in directive");
6120
6121 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6122 if (getParser().parseComma())
6123 return true;
6124
6125 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6126
6127 int64_t Size;
6128 SMLoc SizeLoc = getLoc();
6129 if (getParser().parseAbsoluteExpression(Size))
6130 return true;
6131 if (Size < 0)
6132 return Error(SizeLoc, "size must be non-negative");
6133 if (Size > LocalMemorySize)
6134 return Error(SizeLoc, "size is too large");
6135
6136 int64_t Alignment = 4;
6137 if (trySkipToken(AsmToken::Comma)) {
6138 SMLoc AlignLoc = getLoc();
6139 if (getParser().parseAbsoluteExpression(Alignment))
6140 return true;
6141 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6142 return Error(AlignLoc, "alignment must be a power of two");
6143
6144 // Alignment larger than the size of LDS is possible in theory, as long
6145 // as the linker manages to place to symbol at address 0, but we do want
6146 // to make sure the alignment fits nicely into a 32-bit integer.
6147 if (Alignment >= 1u << 31)
6148 return Error(AlignLoc, "alignment is too large");
6149 }
6150
6151 if (parseEOL())
6152 return true;
6153
6154 Symbol->redefineIfPossible();
6155 if (!Symbol->isUndefined())
6156 return Error(NameLoc, "invalid symbol redefinition");
6157
6158 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6159 return false;
6160}
6161
6162bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6163 StringRef IDVal = DirectiveID.getString();
6164
6165 if (isHsaAbi(getSTI())) {
6166 if (IDVal == ".amdhsa_kernel")
6167 return ParseDirectiveAMDHSAKernel();
6168
6169 if (IDVal == ".amdhsa_code_object_version")
6170 return ParseDirectiveAMDHSACodeObjectVersion();
6171
6172 // TODO: Restructure/combine with PAL metadata directive.
6174 return ParseDirectiveHSAMetadata();
6175 } else {
6176 if (IDVal == ".amd_kernel_code_t")
6177 return ParseDirectiveAMDKernelCodeT();
6178
6179 if (IDVal == ".amdgpu_hsa_kernel")
6180 return ParseDirectiveAMDGPUHsaKernel();
6181
6182 if (IDVal == ".amd_amdgpu_isa")
6183 return ParseDirectiveISAVersion();
6184
6186 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6187 Twine(" directive is "
6188 "not available on non-amdhsa OSes"))
6189 .str());
6190 }
6191 }
6192
6193 if (IDVal == ".amdgcn_target")
6194 return ParseDirectiveAMDGCNTarget();
6195
6196 if (IDVal == ".amdgpu_lds")
6197 return ParseDirectiveAMDGPULDS();
6198
6199 if (IDVal == PALMD::AssemblerDirectiveBegin)
6200 return ParseDirectivePALMetadataBegin();
6201
6202 if (IDVal == PALMD::AssemblerDirective)
6203 return ParseDirectivePALMetadata();
6204
6205 return true;
6206}
6207
6208bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6209 unsigned RegNo) {
6210
6211 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
6212 return isGFX9Plus();
6213
6214 // GFX10+ has 2 more SGPRs 104 and 105.
6215 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
6216 return hasSGPR104_SGPR105();
6217
6218 switch (RegNo) {
6219 case AMDGPU::SRC_SHARED_BASE_LO:
6220 case AMDGPU::SRC_SHARED_BASE:
6221 case AMDGPU::SRC_SHARED_LIMIT_LO:
6222 case AMDGPU::SRC_SHARED_LIMIT:
6223 case AMDGPU::SRC_PRIVATE_BASE_LO:
6224 case AMDGPU::SRC_PRIVATE_BASE:
6225 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
6226 case AMDGPU::SRC_PRIVATE_LIMIT:
6227 return isGFX9Plus();
6228 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
6229 return isGFX9Plus() && !isGFX11Plus();
6230 case AMDGPU::TBA:
6231 case AMDGPU::TBA_LO:
6232 case AMDGPU::TBA_HI:
6233 case AMDGPU::TMA:
6234 case AMDGPU::TMA_LO:
6235 case AMDGPU::TMA_HI:
6236 return !isGFX9Plus();
6237 case AMDGPU::XNACK_MASK:
6238 case AMDGPU::XNACK_MASK_LO:
6239 case AMDGPU::XNACK_MASK_HI:
6240 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6241 case AMDGPU::SGPR_NULL:
6242 return isGFX10Plus();
6243 default:
6244 break;
6245 }
6246
6247 if (isCI())
6248 return true;
6249
6250 if (isSI() || isGFX10Plus()) {
6251 // No flat_scr on SI.
6252 // On GFX10Plus flat scratch is not a valid register operand and can only be
6253 // accessed with s_setreg/s_getreg.
6254 switch (RegNo) {
6255 case AMDGPU::FLAT_SCR:
6256 case AMDGPU::FLAT_SCR_LO:
6257 case AMDGPU::FLAT_SCR_HI:
6258 return false;
6259 default:
6260 return true;
6261 }
6262 }
6263
6264 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6265 // SI/CI have.
6266 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
6267 return hasSGPR102_SGPR103();
6268
6269 return true;
6270}
6271
6272ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6273 StringRef Mnemonic,
6274 OperandMode Mode) {
6275 ParseStatus Res = parseVOPD(Operands);
6276 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6277 return Res;
6278
6279 // Try to parse with a custom parser
6280 Res = MatchOperandParserImpl(Operands, Mnemonic);
6281
6282 // If we successfully parsed the operand or if there as an error parsing,
6283 // we are done.
6284 //
6285 // If we are parsing after we reach EndOfStatement then this means we
6286 // are appending default values to the Operands list. This is only done
6287 // by custom parser, so we shouldn't continue on to the generic parsing.
6288 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6289 return Res;
6290
6291 SMLoc RBraceLoc;
6292 SMLoc LBraceLoc = getLoc();
6293 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6294 unsigned Prefix = Operands.size();
6295
6296 for (;;) {
6297 auto Loc = getLoc();
6298 Res = parseReg(Operands);
6299 if (Res.isNoMatch())
6300 Error(Loc, "expected a register");
6301 if (!Res.isSuccess())
6302 return ParseStatus::Failure;
6303
6304 RBraceLoc = getLoc();
6305 if (trySkipToken(AsmToken::RBrac))
6306 break;
6307
6308 if (!skipToken(AsmToken::Comma,
6309 "expected a comma or a closing square bracket"))
6310 return ParseStatus::Failure;
6311 }
6312
6313 if (Operands.size() - Prefix > 1) {
6314 Operands.insert(Operands.begin() + Prefix,
6315 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6316 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6317 }
6318
6319 return ParseStatus::Success;
6320 }
6321
6322 return parseRegOrImm(Operands);
6323}
6324
6325StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6326 // Clear any forced encodings from the previous instruction.
6327 setForcedEncodingSize(0);
6328 setForcedDPP(false);
6329 setForcedSDWA(false);
6330
6331 if (Name.ends_with("_e64_dpp")) {
6332 setForcedDPP(true);
6333 setForcedEncodingSize(64);
6334 return Name.substr(0, Name.size() - 8);
6335 } else if (Name.ends_with("_e64")) {
6336 setForcedEncodingSize(64);
6337 return Name.substr(0, Name.size() - 4);
6338 } else if (Name.ends_with("_e32")) {
6339 setForcedEncodingSize(32);
6340 return Name.substr(0, Name.size() - 4);
6341 } else if (Name.ends_with("_dpp")) {
6342 setForcedDPP(true);
6343 return Name.substr(0, Name.size() - 4);
6344 } else if (Name.ends_with("_sdwa")) {
6345 setForcedSDWA(true);
6346 return Name.substr(0, Name.size() - 5);
6347 }
6348 return Name;
6349}
6350
6351static void applyMnemonicAliases(StringRef &Mnemonic,
6352 const FeatureBitset &Features,
6353 unsigned VariantID);
6354
6355bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
6357 SMLoc NameLoc, OperandVector &Operands) {
6358 // Add the instruction mnemonic
6359 Name = parseMnemonicSuffix(Name);
6360
6361 // If the target architecture uses MnemonicAlias, call it here to parse
6362 // operands correctly.
6363 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6364
6365 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6366
6367 bool IsMIMG = Name.starts_with("image_");
6368
6369 while (!trySkipToken(AsmToken::EndOfStatement)) {
6370 OperandMode Mode = OperandMode_Default;
6371 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6372 Mode = OperandMode_NSA;
6373 ParseStatus Res = parseOperand(Operands, Name, Mode);
6374
6375 if (!Res.isSuccess()) {
6376 checkUnsupportedInstruction(Name, NameLoc);
6377 if (!Parser.hasPendingError()) {
6378 // FIXME: use real operand location rather than the current location.
6379 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6380 : "not a valid operand.";
6381 Error(getLoc(), Msg);
6382 }
6383 while (!trySkipToken(AsmToken::EndOfStatement)) {
6384 lex();
6385 }
6386 return true;
6387 }
6388
6389 // Eat the comma or space if there is one.
6390 trySkipToken(AsmToken::Comma);
6391 }
6392
6393 return false;
6394}
6395
6396//===----------------------------------------------------------------------===//
6397// Utility functions
6398//===----------------------------------------------------------------------===//
6399
6400ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6402 SMLoc S = getLoc();
6403 if (!trySkipId(Name))
6404 return ParseStatus::NoMatch;
6405
6406 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6407 return ParseStatus::Success;
6408}
6409
6410ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6411 int64_t &IntVal) {
6412
6413 if (!trySkipId(Prefix, AsmToken::Colon))
6414 return ParseStatus::NoMatch;
6415
6416 return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;
6417}
6418
6419ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6420 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6421 std::function<bool(int64_t &)> ConvertResult) {
6422 SMLoc S = getLoc();
6423 int64_t Value = 0;
6424
6425 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6426 if (!Res.isSuccess())
6427 return Res;
6428
6429 if (ConvertResult && !ConvertResult(Value)) {
6430 Error(S, "invalid " + StringRef(Prefix) + " value.");
6431 }
6432
6433 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6434 return ParseStatus::Success;
6435}
6436
6437ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6438 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6439 bool (*ConvertResult)(int64_t &)) {
6440 SMLoc S = getLoc();
6441 if (!trySkipId(Prefix, AsmToken::Colon))
6442 return ParseStatus::NoMatch;
6443
6444 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6445 return ParseStatus::Failure;
6446
6447 unsigned Val = 0;
6448 const unsigned MaxSize = 4;
6449
6450 // FIXME: How to verify the number of elements matches the number of src
6451 // operands?
6452 for (int I = 0; ; ++I) {
6453 int64_t Op;
6454 SMLoc Loc = getLoc();
6455 if (!parseExpr(Op))
6456 return ParseStatus::Failure;
6457
6458 if (Op != 0 && Op != 1)
6459 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6460
6461 Val |= (Op << I);
6462
6463 if (trySkipToken(AsmToken::RBrac))
6464 break;
6465
6466 if (I + 1 == MaxSize)
6467 return Error(getLoc(), "expected a closing square bracket");
6468
6469 if (!skipToken(AsmToken::Comma, "expected a comma"))
6470 return ParseStatus::Failure;
6471 }
6472
6473 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6474 return ParseStatus::Success;
6475}
6476
6477ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6479 AMDGPUOperand::ImmTy ImmTy) {
6480 int64_t Bit;
6481 SMLoc S = getLoc();
6482
6483 if (trySkipId(Name)) {
6484 Bit = 1;
6485 } else if (trySkipId("no", Name)) {
6486 Bit = 0;
6487 } else {
6488 return ParseStatus::NoMatch;
6489 }
6490
6491 if (Name == "r128" && !hasMIMG_R128())
6492 return Error(S, "r128 modifier is not supported on this GPU");
6493 if (Name == "a16" && !hasA16())
6494 return Error(S, "a16 modifier is not supported on this GPU");
6495
6496 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6497 ImmTy = AMDGPUOperand::ImmTyR128A16;
6498
6499 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6500 return ParseStatus::Success;
6501}
6502
6503unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6504 bool &Disabling) const {
6505 Disabling = Id.consume_front("no");
6506
6507 if (isGFX940() && !Mnemo.starts_with("s_")) {
6508 return StringSwitch<unsigned>(Id)
6509 .Case("nt", AMDGPU::CPol::NT)
6510 .Case("sc0", AMDGPU::CPol::SC0)
6511 .Case("sc1", AMDGPU::CPol::SC1)
6512 .Default(0);
6513 }
6514
6515 return StringSwitch<unsigned>(Id)
6516 .Case("dlc", AMDGPU::CPol::DLC)
6517 .Case("glc", AMDGPU::CPol::GLC)
6518 .Case("scc", AMDGPU::CPol::SCC)
6519 .Case("slc", AMDGPU::CPol::SLC)
6520 .Default(0);
6521}
6522
6523ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6524 if (isGFX12Plus()) {
6525 SMLoc StringLoc = getLoc();
6526
6527 int64_t CPolVal = 0;
6530
6531 for (;;) {
6532 if (ResTH.isNoMatch()) {
6533 int64_t TH;
6534 ResTH = parseTH(Operands, TH);
6535 if (ResTH.isFailure())
6536 return ResTH;
6537 if (ResTH.isSuccess()) {
6538 CPolVal |= TH;
6539 continue;
6540 }
6541 }
6542
6543 if (ResScope.isNoMatch()) {
6544 int64_t Scope;
6545 ResScope = parseScope(Operands, Scope);
6546 if (ResScope.isFailure())
6547 return ResScope;
6548 if (ResScope.isSuccess()) {
6549 CPolVal |= Scope;
6550 continue;
6551 }
6552 }
6553
6554 break;
6555 }
6556
6557 if (ResTH.isNoMatch() && ResScope.isNoMatch())
6558 return ParseStatus::NoMatch;
6559
6560 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6561 AMDGPUOperand::ImmTyCPol));
6562 return ParseStatus::Success;
6563 }
6564
6565 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6566 SMLoc OpLoc = getLoc();
6567 unsigned Enabled = 0, Seen = 0;
6568 for (;;) {
6569 SMLoc S = getLoc();
6570 bool Disabling;
6571 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6572 if (!CPol)
6573 break;
6574
6575 lex();
6576
6577 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6578 return Error(S, "dlc modifier is not supported on this GPU");
6579
6580 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6581 return Error(S, "scc modifier is not supported on this GPU");
6582
6583 if (Seen & CPol)
6584 return Error(S, "duplicate cache policy modifier");
6585
6586 if (!Disabling)
6587 Enabled |= CPol;
6588
6589 Seen |= CPol;
6590 }
6591
6592 if (!Seen)
6593 return ParseStatus::NoMatch;
6594
6595 Operands.push_back(
6596 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6597 return ParseStatus::Success;
6598}
6599
6600ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6601 int64_t &Scope) {
6602 Scope = AMDGPU::CPol::SCOPE_CU; // default;
6603
6605 SMLoc StringLoc;
6606 ParseStatus Res;
6607
6608 Res = parseStringWithPrefix("scope", Value, StringLoc);
6609 if (!Res.isSuccess())
6610 return Res;
6611
6613 .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU)
6614 .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE)
6615 .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV)
6616 .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS)
6617 .Default(0xffffffff);
6618
6619 if (Scope == 0xffffffff)
6620 return Error(StringLoc, "invalid scope value");
6621
6622 return ParseStatus::Success;
6623}
6624
6625ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6626 TH = AMDGPU::CPol::TH_RT; // default
6627
6629 SMLoc StringLoc;
6630 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
6631 if (!Res.isSuccess())
6632 return Res;
6633
6634 if (Value == "TH_DEFAULT")
6636 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
6637 Value == "TH_LOAD_NT_WB") {
6638 return Error(StringLoc, "invalid th value");
6639 } else if (Value.consume_front("TH_ATOMIC_")) {
6641 } else if (Value.consume_front("TH_LOAD_")) {
6643 } else if (Value.consume_front("TH_STORE_")) {
6645 } else {
6646 return Error(StringLoc, "invalid th value");
6647 }
6648
6649 if (Value == "BYPASS")
6651
6652 if (TH != 0) {
6659 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
6662 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
6664 .Default(0xffffffff);
6665 else
6671 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
6672 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
6673 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
6674 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
6675 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
6676 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
6677 .Default(0xffffffff);
6678 }
6679
6680 if (TH == 0xffffffff)
6681 return Error(StringLoc, "invalid th value");
6682
6683 return ParseStatus::Success;
6684}
6685
6687 MCInst& Inst, const OperandVector& Operands,
6688 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6689 AMDGPUOperand::ImmTy ImmT,
6690 int64_t Default = 0) {
6691 auto i = OptionalIdx.find(ImmT);
6692 if (i != OptionalIdx.end()) {
6693 unsigned Idx = i->second;
6694 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6695 } else {
6697 }
6698}
6699
6700ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6702 SMLoc &StringLoc) {
6703 if (!trySkipId(Prefix, AsmToken::Colon))
6704 return ParseStatus::NoMatch;
6705
6706 StringLoc = getLoc();
6707 return parseId(Value, "expected an identifier") ? ParseStatus::Success
6709}
6710
6711//===----------------------------------------------------------------------===//
6712// MTBUF format
6713//===----------------------------------------------------------------------===//
6714
6715bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6716 int64_t MaxVal,
6717 int64_t &Fmt) {
6718 int64_t Val;
6719 SMLoc Loc = getLoc();
6720
6721 auto Res = parseIntWithPrefix(Pref, Val);
6722 if (Res.isFailure())
6723 return false;
6724 if (Res.isNoMatch())
6725 return true;
6726
6727 if (Val < 0 || Val > MaxVal) {
6728 Error(Loc, Twine("out of range ", StringRef(Pref)));
6729 return false;
6730 }
6731
6732 Fmt = Val;
6733 return true;
6734}
6735
6736ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
6737 AMDGPUOperand::ImmTy ImmTy) {
6738 const char *Pref = "index_key";
6739 int64_t ImmVal = 0;
6740 SMLoc Loc = getLoc();
6741 auto Res = parseIntWithPrefix(Pref, ImmVal);
6742 if (!Res.isSuccess())
6743 return Res;
6744
6745 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6746 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6747
6748 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6749 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6750
6751 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
6752 return ParseStatus::Success;
6753}
6754
6755ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
6756 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6757}
6758
6759ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
6760 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6761}
6762
6763// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6764// values to live in a joint format operand in the MCInst encoding.
6765ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6766 using namespace llvm::AMDGPU::MTBUFFormat;
6767
6768 int64_t Dfmt = DFMT_UNDEF;
6769 int64_t Nfmt = NFMT_UNDEF;
6770
6771 // dfmt and nfmt can appear in either order, and each is optional.
6772 for (int I = 0; I < 2; ++I) {
6773 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6774 return ParseStatus::Failure;
6775
6776 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6777 return ParseStatus::Failure;
6778
6779 // Skip optional comma between dfmt/nfmt
6780 // but guard against 2 commas following each other.
6781 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6782 !peekToken().is(AsmToken::Comma)) {
6783 trySkipToken(AsmToken::Comma);
6784 }
6785 }
6786
6787 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6788 return ParseStatus::NoMatch;
6789
6790 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6791 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6792
6793 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6794 return ParseStatus::Success;
6795}
6796
6797ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6798 using namespace llvm::AMDGPU::MTBUFFormat;
6799
6800 int64_t Fmt = UFMT_UNDEF;
6801
6802 if (!tryParseFmt("format", UFMT_MAX, Fmt))
6803 return ParseStatus::Failure;
6804
6805 if (Fmt == UFMT_UNDEF)
6806 return ParseStatus::NoMatch;
6807
6808 Format = Fmt;
6809 return ParseStatus::Success;
6810}
6811
6812bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6813 int64_t &Nfmt,
6814 StringRef FormatStr,
6815 SMLoc Loc) {
6816 using namespace llvm::AMDGPU::MTBUFFormat;
6817 int64_t Format;
6818
6819 Format = getDfmt(FormatStr);
6820 if (Format != DFMT_UNDEF) {
6821 Dfmt = Format;
6822 return true;
6823 }
6824
6825 Format = getNfmt(FormatStr, getSTI());
6826 if (Format != NFMT_UNDEF) {
6827 Nfmt = Format;
6828 return true;
6829 }
6830
6831 Error(Loc, "unsupported format");
6832 return false;
6833}
6834
6835ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6836 SMLoc FormatLoc,
6837 int64_t &Format) {
6838 using namespace llvm::AMDGPU::MTBUFFormat;
6839
6840 int64_t Dfmt = DFMT_UNDEF;
6841 int64_t Nfmt = NFMT_UNDEF;
6842 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6843 return ParseStatus::Failure;
6844
6845 if (trySkipToken(AsmToken::Comma)) {
6846 StringRef Str;
6847 SMLoc Loc = getLoc();
6848 if (!parseId(Str, "expected a format string") ||
6849 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6850 return ParseStatus::Failure;
6851 if (Dfmt == DFMT_UNDEF)
6852 return Error(Loc, "duplicate numeric format");
6853 if (Nfmt == NFMT_UNDEF)
6854 return Error(Loc, "duplicate data format");
6855 }
6856
6857 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6858 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6859
6860 if (isGFX10Plus()) {
6861 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6862 if (Ufmt == UFMT_UNDEF)
6863 return Error(FormatLoc, "unsupported format");
6864 Format = Ufmt;
6865 } else {
6866 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6867 }
6868
6869 return ParseStatus::Success;
6870}
6871
6872ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6873 SMLoc Loc,
6874 int64_t &Format) {
6875 using namespace llvm::AMDGPU::MTBUFFormat;
6876
6877 auto Id = getUnifiedFormat(FormatStr, getSTI());
6878 if (Id == UFMT_UNDEF)
6879 return ParseStatus::NoMatch;
6880
6881 if (!isGFX10Plus())
6882 return Error(Loc, "unified format is not supported on this GPU");
6883
6884 Format = Id;
6885 return ParseStatus::Success;
6886}
6887
6888ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6889 using namespace llvm::AMDGPU::MTBUFFormat;
6890 SMLoc Loc = getLoc();
6891
6892 if (!parseExpr(Format))
6893 return ParseStatus::Failure;
6894 if (!isValidFormatEncoding(Format, getSTI()))
6895 return Error(Loc, "out of range format");
6896
6897 return ParseStatus::Success;
6898}
6899
6900ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6901 using namespace llvm::AMDGPU::MTBUFFormat;
6902
6903 if (!trySkipId("format", AsmToken::Colon))
6904 return ParseStatus::NoMatch;
6905
6906 if (trySkipToken(AsmToken::LBrac)) {
6907 StringRef FormatStr;
6908 SMLoc Loc = getLoc();
6909 if (!parseId(FormatStr, "expected a format string"))
6910 return ParseStatus::Failure;
6911
6912 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6913 if (Res.isNoMatch())
6914 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6915 if (!Res.isSuccess())
6916 return Res;
6917
6918 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6919 return ParseStatus::Failure;
6920
6921 return ParseStatus::Success;
6922 }
6923
6924 return parseNumericFormat(Format);
6925}
6926
6927ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6928 using namespace llvm::AMDGPU::MTBUFFormat;
6929
6930 int64_t Format = getDefaultFormatEncoding(getSTI());
6931 ParseStatus Res;
6932 SMLoc Loc = getLoc();
6933
6934 // Parse legacy format syntax.
6935 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6936 if (Res.isFailure())
6937 return Res;
6938
6939 bool FormatFound = Res.isSuccess();
6940
6941 Operands.push_back(
6942 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6943
6944 if (FormatFound)
6945 trySkipToken(AsmToken::Comma);
6946
6947 if (isToken(AsmToken::EndOfStatement)) {
6948 // We are expecting an soffset operand,
6949 // but let matcher handle the error.
6950 return ParseStatus::Success;
6951 }
6952
6953 // Parse soffset.
6954 Res = parseRegOrImm(Operands);
6955 if (!Res.isSuccess())
6956 return Res;
6957
6958 trySkipToken(AsmToken::Comma);
6959
6960 if (!FormatFound) {
6961 Res = parseSymbolicOrNumericFormat(Format);
6962 if (Res.isFailure())
6963 return Res;
6964 if (Res.isSuccess()) {
6965 auto Size = Operands.size();
6966 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6967 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6968 Op.setImm(Format);
6969 }
6970 return ParseStatus::Success;
6971 }
6972
6973 if (isId("format") && peekToken().is(AsmToken::Colon))
6974 return Error(getLoc(), "duplicate format");
6975 return ParseStatus::Success;
6976}
6977
6978ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
6979 ParseStatus Res =
6980 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
6981 if (Res.isNoMatch()) {
6982 Res = parseIntWithPrefix("inst_offset", Operands,
6983 AMDGPUOperand::ImmTyInstOffset);
6984 }
6985 return Res;
6986}
6987
6988ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
6989 ParseStatus Res =
6990 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
6991 if (Res.isNoMatch())
6992 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
6993 return Res;
6994}
6995
6996ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
6997 ParseStatus Res =
6998 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
6999 if (Res.isNoMatch()) {
7000 Res =
7001 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7002 }
7003 return Res;
7004}
7005
7006//===----------------------------------------------------------------------===//
7007// Exp
7008//===----------------------------------------------------------------------===//
7009
7010void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7011 OptionalImmIndexMap OptionalIdx;
7012
7013 unsigned OperandIdx[4];
7014 unsigned EnMask = 0;
7015 int SrcIdx = 0;
7016
7017 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7018 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7019
7020 // Add the register arguments
7021 if (Op.isReg()) {
7022 assert(SrcIdx < 4);
7023 OperandIdx[SrcIdx] = Inst.size();
7024 Op.addRegOperands(Inst, 1);
7025 ++SrcIdx;
7026 continue;
7027 }
7028
7029 if (Op.isOff()) {
7030 assert(SrcIdx < 4);
7031 OperandIdx[SrcIdx] = Inst.size();
7032 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
7033 ++SrcIdx;
7034 continue;
7035 }
7036
7037 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7038 Op.addImmOperands(Inst, 1);
7039 continue;
7040 }
7041
7042 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7043 continue;
7044
7045 // Handle optional arguments
7046 OptionalIdx[Op.getImmTy()] = i;
7047 }
7048
7049 assert(SrcIdx == 4);
7050
7051 bool Compr = false;
7052 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7053 Compr = true;
7054 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7055 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
7056 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
7057 }
7058
7059 for (auto i = 0; i < SrcIdx; ++i) {
7060 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
7061 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7062 }
7063 }
7064
7065 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7066 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7067
7068 Inst.addOperand(MCOperand::createImm(EnMask));
7069}
7070
7071//===----------------------------------------------------------------------===//
7072// s_waitcnt
7073//===----------------------------------------------------------------------===//
7074
7075static bool
7077 const AMDGPU::IsaVersion ISA,
7078 int64_t &IntVal,
7079 int64_t CntVal,
7080 bool Saturate,
7081 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7082 unsigned (*decode)(const IsaVersion &Version, unsigned))
7083{
7084 bool Failed = false;
7085
7086 IntVal = encode(ISA, IntVal, CntVal);
7087 if (CntVal != decode(ISA, IntVal)) {
7088 if (Saturate) {
7089 IntVal = encode(ISA, IntVal, -1);
7090 } else {
7091 Failed = true;
7092 }
7093 }
7094 return Failed;
7095}
7096
7097bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7098
7099 SMLoc CntLoc = getLoc();
7100 StringRef CntName = getTokenStr();
7101
7102 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7103 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7104 return false;
7105
7106 int64_t CntVal;
7107 SMLoc ValLoc = getLoc();
7108 if (!parseExpr(CntVal))
7109 return false;
7110
7112
7113 bool Failed = true;
7114 bool Sat = CntName.ends_with("_sat");
7115
7116 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7117 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7118 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7119 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7120 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7121 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7122 } else {
7123 Error(CntLoc, "invalid counter name " + CntName);
7124 return false;
7125 }
7126
7127 if (Failed) {
7128 Error(ValLoc, "too large value for " + CntName);
7129 return false;
7130 }
7131
7132 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7133 return false;
7134
7135 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7136 if (isToken(AsmToken::EndOfStatement)) {
7137 Error(getLoc(), "expected a counter name");
7138 return false;
7139 }
7140 }
7141
7142 return true;
7143}
7144
7145ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7147 int64_t Waitcnt = getWaitcntBitMask(ISA);
7148 SMLoc S = getLoc();
7149
7150 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7151 while (!isToken(AsmToken::EndOfStatement)) {
7152 if (!parseCnt(Waitcnt))
7153 return ParseStatus::Failure;
7154 }
7155 } else {
7156 if (!parseExpr(Waitcnt))
7157 return ParseStatus::Failure;
7158 }
7159
7160 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7161 return ParseStatus::Success;
7162}
7163
7164bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7165 SMLoc FieldLoc = getLoc();
7166 StringRef FieldName = getTokenStr();
7167 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7168 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7169 return false;
7170
7171 SMLoc ValueLoc = getLoc();
7172 StringRef ValueName = getTokenStr();
7173 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7174 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7175 return false;
7176
7177 unsigned Shift;
7178 if (FieldName == "instid0") {
7179 Shift = 0;
7180 } else if (FieldName == "instskip") {
7181 Shift = 4;
7182 } else if (FieldName == "instid1") {
7183 Shift = 7;
7184 } else {
7185 Error(FieldLoc, "invalid field name " + FieldName);
7186 return false;
7187 }
7188
7189 int Value;
7190 if (Shift == 4) {
7191 // Parse values for instskip.
7193 .Case("SAME", 0)
7194 .Case("NEXT", 1)
7195 .Case("SKIP_1", 2)
7196 .Case("SKIP_2", 3)
7197 .Case("SKIP_3", 4)
7198 .Case("SKIP_4", 5)
7199 .Default(-1);
7200 } else {
7201 // Parse values for instid0 and instid1.
7203 .Case("NO_DEP", 0)
7204 .Case("VALU_DEP_1", 1)
7205 .Case("VALU_DEP_2", 2)
7206 .Case("VALU_DEP_3", 3)
7207 .Case("VALU_DEP_4", 4)
7208 .Case("TRANS32_DEP_1", 5)
7209 .Case("TRANS32_DEP_2", 6)
7210 .Case("TRANS32_DEP_3", 7)
7211 .Case("FMA_ACCUM_CYCLE_1", 8)
7212 .Case("SALU_CYCLE_1", 9)
7213 .Case("SALU_CYCLE_2", 10)
7214 .Case("SALU_CYCLE_3", 11)
7215 .Default(-1);
7216 }
7217 if (Value < 0) {
7218 Error(ValueLoc, "invalid value name " + ValueName);
7219 return false;
7220 }
7221
7222 Delay |= Value << Shift;
7223 return true;
7224}
7225
7226ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7227 int64_t Delay = 0;
7228 SMLoc S = getLoc();
7229
7230 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7231 do {
7232 if (!parseDelay(Delay))
7233 return ParseStatus::Failure;
7234 } while (trySkipToken(AsmToken::Pipe));
7235 } else {
7236 if (!parseExpr(Delay))
7237 return ParseStatus::Failure;
7238 }
7239
7240 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7241 return ParseStatus::Success;
7242}
7243
7244bool
7245AMDGPUOperand::isSWaitCnt() const {
7246 return isImm();
7247}
7248
7249bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7250
7251//===----------------------------------------------------------------------===//
7252// DepCtr
7253//===----------------------------------------------------------------------===//
7254
7255void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7256 StringRef DepCtrName) {
7257 switch (ErrorId) {
7258 case OPR_ID_UNKNOWN:
7259 Error(Loc, Twine("invalid counter name ", DepCtrName));
7260 return;
7261 case OPR_ID_UNSUPPORTED:
7262 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7263 return;
7264 case OPR_ID_DUPLICATE:
7265 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7266 return;
7267 case OPR_VAL_INVALID:
7268 Error(Loc, Twine("invalid value for ", DepCtrName));
7269 return;
7270 default:
7271 assert(false);
7272 }
7273}
7274
7275bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7276
7277 using namespace llvm::AMDGPU::DepCtr;
7278
7279 SMLoc DepCtrLoc = getLoc();
7280 StringRef DepCtrName = getTokenStr();
7281
7282 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7283 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7284 return false;
7285
7286 int64_t ExprVal;
7287 if (!parseExpr(ExprVal))
7288 return false;
7289
7290 unsigned PrevOprMask = UsedOprMask;
7291 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7292
7293 if (CntVal < 0) {
7294 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7295 return false;
7296 }
7297
7298 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7299 return false;
7300
7301 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7302 if (isToken(AsmToken::EndOfStatement)) {
7303 Error(getLoc(), "expected a counter name");
7304 return false;
7305 }
7306 }
7307
7308 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7309 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7310 return true;
7311}
7312
7313ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7314 using namespace llvm::AMDGPU::DepCtr;
7315
7316 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7317 SMLoc Loc = getLoc();
7318
7319 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7320 unsigned UsedOprMask = 0;
7321 while (!isToken(AsmToken::EndOfStatement)) {
7322 if (!parseDepCtr(DepCtr, UsedOprMask))
7323 return ParseStatus::Failure;
7324 }
7325 } else {
7326 if (!parseExpr(DepCtr))
7327 return ParseStatus::Failure;
7328 }
7329
7330 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7331 return ParseStatus::Success;
7332}
7333
7334bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7335
7336//===----------------------------------------------------------------------===//
7337// hwreg
7338//===----------------------------------------------------------------------===//
7339
7340ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7341 OperandInfoTy &Offset,
7342 OperandInfoTy &Width) {
7343 using namespace llvm::AMDGPU::Hwreg;
7344
7345 if (!trySkipId("hwreg", AsmToken::LParen))
7346 return ParseStatus::NoMatch;
7347
7348 // The register may be specified by name or using a numeric code
7349 HwReg.Loc = getLoc();
7350 if (isToken(AsmToken::Identifier) &&
7351 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7352 HwReg.IsSymbolic = true;
7353 lex(); // skip register name
7354 } else if (!parseExpr(HwReg.Val, "a register name")) {
7355 return ParseStatus::Failure;
7356 }
7357
7358 if (trySkipToken(AsmToken::RParen))
7359 return ParseStatus::Success;
7360
7361 // parse optional params
7362 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7363 return ParseStatus::Failure;
7364
7365 Offset.Loc = getLoc();
7366 if (!parseExpr(Offset.Val))
7367 return ParseStatus::Failure;
7368
7369 if (!skipToken(AsmToken::Comma, "expected a comma"))
7370 return ParseStatus::Failure;
7371
7372 Width.Loc = getLoc();
7373 if (!parseExpr(Width.Val) ||
7374 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7375 return ParseStatus::Failure;
7376
7377 return ParseStatus::Success;
7378}
7379
7380ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7381 using namespace llvm::AMDGPU::Hwreg;
7382
7383 int64_t ImmVal = 0;
7384 SMLoc Loc = getLoc();
7385
7386 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
7387 HwregId::Default);
7388 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
7389 HwregOffset::Default);
7390 struct : StructuredOpField {
7391 using StructuredOpField::StructuredOpField;
7392 bool validate(AMDGPUAsmParser &Parser) const override {
7393 if (!isUIntN(Width, Val - 1))
7394 return Error(Parser, "only values from 1 to 32 are legal");
7395 return true;
7396 }
7397 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
7398 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
7399
7400 if (Res.isNoMatch())
7401 Res = parseHwregFunc(HwReg, Offset, Width);
7402
7403 if (Res.isSuccess()) {
7404 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
7405 return ParseStatus::Failure;
7406 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
7407 }
7408
7409 if (Res.isNoMatch() &&
7410 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
7412
7413 if (!Res.isSuccess())
7414 return ParseStatus::Failure;
7415
7416 if (!isUInt<16>(ImmVal))
7417 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7418 Operands.push_back(
7419 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7420 return ParseStatus::Success;
7421}
7422
7423bool AMDGPUOperand::isHwreg() const {
7424 return isImmTy(ImmTyHwreg);
7425}
7426
7427//===----------------------------------------------------------------------===//
7428// sendmsg
7429//===----------------------------------------------------------------------===//
7430
7431bool
7432AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7433 OperandInfoTy &Op,
7434 OperandInfoTy &Stream) {
7435 using namespace llvm::AMDGPU::SendMsg;
7436
7437 Msg.Loc = getLoc();
7438 if (isToken(AsmToken::Identifier) &&
7439 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7440 Msg.IsSymbolic = true;
7441 lex(); // skip message name
7442 } else if (!parseExpr(Msg.Val, "a message name")) {
7443 return false;
7444 }
7445
7446 if (trySkipToken(AsmToken::Comma)) {
7447 Op.IsDefined = true;
7448 Op.Loc = getLoc();
7449 if (isToken(AsmToken::Identifier) &&
7450 (Op.Val = getMsgOpId(Msg.Val, getTokenStr())) >= 0) {
7451 lex(); // skip operation name
7452 } else if (!parseExpr(Op.Val, "an operation name")) {
7453 return false;
7454 }
7455
7456 if (trySkipToken(AsmToken::Comma)) {
7457 Stream.IsDefined = true;
7458 Stream.Loc = getLoc();
7459 if (!parseExpr(Stream.Val))
7460 return false;
7461 }
7462 }
7463
7464 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7465}
7466
7467bool
7468AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7469 const OperandInfoTy &Op,
7470 const OperandInfoTy &Stream) {
7471 using namespace llvm::AMDGPU::SendMsg;
7472
7473 // Validation strictness depends on whether message is specified
7474 // in a symbolic or in a numeric form. In the latter case
7475 // only encoding possibility is checked.
7476 bool Strict = Msg.IsSymbolic;
7477
7478 if (Strict) {
7479 if (Msg.Val == OPR_ID_UNSUPPORTED) {
7480 Error(Msg.Loc, "specified message id is not supported on this GPU");
7481 return false;
7482 }
7483 } else {
7484 if (!isValidMsgId(Msg.Val, getSTI())) {
7485 Error(Msg.Loc, "invalid message id");
7486 return false;
7487 }
7488 }
7489 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
7490 if (Op.IsDefined) {
7491 Error(Op.Loc, "message does not support operations");
7492 } else {
7493 Error(Msg.Loc, "missing message operation");
7494 }
7495 return false;
7496 }
7497 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
7498 Error(Op.Loc, "invalid operation id");
7499 return false;
7500 }
7501 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
7502 Stream.IsDefined) {
7503 Error(Stream.Loc, "message operation does not support streams");
7504 return false;
7505 }
7506 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
7507 Error(Stream.Loc, "invalid message stream id");
7508 return false;
7509 }
7510 return true;
7511}
7512
7513ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7514 using namespace llvm::AMDGPU::SendMsg;
7515
7516 int64_t ImmVal = 0;
7517 SMLoc Loc = getLoc();
7518
7519 if (trySkipId("sendmsg", AsmToken::LParen)) {
7520 OperandInfoTy Msg(OPR_ID_UNKNOWN);
7521 OperandInfoTy Op(OP_NONE_);
7522 OperandInfoTy Stream(STREAM_ID_NONE_);
7523 if (parseSendMsgBody(Msg, Op, Stream) &&
7524 validateSendMsg(Msg, Op, Stream)) {
7525 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
7526 } else {
7527 return ParseStatus::Failure;
7528 }
7529 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7530 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7531 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7532 } else {
7533 return ParseStatus::Failure;
7534 }
7535
7536 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7537 return ParseStatus::Success;
7538}
7539
7540bool AMDGPUOperand::isSendMsg() const {
7541 return isImmTy(ImmTySendMsg);
7542}
7543
7544//===----------------------------------------------------------------------===//
7545// v_interp
7546//===----------------------------------------------------------------------===//
7547
7548ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7549 StringRef Str;
7550 SMLoc S = getLoc();
7551
7552 if (!parseId(Str))
7553 return ParseStatus::NoMatch;
7554
7555 int Slot = StringSwitch<int>(Str)
7556 .Case("p10", 0)
7557 .Case("p20", 1)
7558 .Case("p0", 2)
7559 .Default(-1);
7560
7561 if (Slot == -1)
7562 return Error(S, "invalid interpolation slot");
7563
7564 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
7565 AMDGPUOperand::ImmTyInterpSlot));
7566 return ParseStatus::Success;
7567}
7568
7569ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7570 StringRef Str;
7571 SMLoc S = getLoc();
7572
7573 if (!parseId(Str))
7574 return ParseStatus::NoMatch;
7575
7576 if (!Str.starts_with("attr"))
7577 return Error(S, "invalid interpolation attribute");
7578
7579 StringRef Chan = Str.take_back(2);
7580 int AttrChan = StringSwitch<int>(Chan)
7581 .Case(".x", 0)
7582 .Case(".y", 1)
7583 .Case(".z", 2)
7584 .Case(".w", 3)
7585 .Default(-1);
7586 if (AttrChan == -1)
7587 return Error(S, "invalid or missing interpolation attribute channel");
7588
7589 Str = Str.drop_back(2).drop_front(4);
7590
7591 uint8_t Attr;
7592 if (Str.getAsInteger(10, Attr))
7593 return Error(S, "invalid or missing interpolation attribute number");
7594
7595 if (Attr > 32)
7596 return Error(S, "out of bounds interpolation attribute number");
7597
7598 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
7599
7600 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
7601 AMDGPUOperand::ImmTyInterpAttr));
7602 Operands.push_back(AMDGPUOperand::CreateImm(
7603 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7604 return ParseStatus::Success;
7605}
7606
7607//===----------------------------------------------------------------------===//
7608// exp
7609//===----------------------------------------------------------------------===//
7610
7611ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7612 using namespace llvm::AMDGPU::Exp;
7613
7614 StringRef Str;
7615 SMLoc S = getLoc();
7616
7617 if (!parseId(Str))
7618 return ParseStatus::NoMatch;
7619
7620 unsigned Id = getTgtId(Str);
7621 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
7622 return Error(S, (Id == ET_INVALID)
7623 ? "invalid exp target"
7624 : "exp target is not supported on this GPU");
7625
7626 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
7627 AMDGPUOperand::ImmTyExpTgt));
7628 return ParseStatus::Success;
7629}
7630
7631//===----------------------------------------------------------------------===//
7632// parser helpers
7633//===----------------------------------------------------------------------===//
7634
7635bool
7636AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7637 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
7638}
7639
7640bool
7641AMDGPUAsmParser::isId(const StringRef Id) const {
7642 return isId(getToken(), Id);
7643}
7644
7645bool
7646AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7647 return getTokenKind() == Kind;
7648}
7649
7650StringRef AMDGPUAsmParser::getId() const {
7651 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7652}
7653
7654bool
7655AMDGPUAsmParser::trySkipId(const StringRef Id) {
7656 if (isId(Id)) {
7657 lex();
7658 return true;
7659 }
7660 return false;
7661}
7662
7663bool
7664AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7665 if (isToken(AsmToken::Identifier)) {
7666 StringRef Tok = getTokenStr();
7667 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
7668 lex();
7669 return true;
7670 }
7671 }
7672 return false;
7673}
7674
7675bool
7676AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7677 if (isId(Id) && peekToken().is(Kind)) {
7678 lex();
7679 lex();
7680 return true;
7681 }
7682 return false;
7683}
7684
7685bool
7686AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7687 if (isToken(Kind)) {
7688 lex();
7689 return true;
7690 }
7691 return false;
7692}
7693
7694bool
7695AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7696 const StringRef ErrMsg) {
7697 if (!trySkipToken(Kind)) {
7698 Error(getLoc(), ErrMsg);
7699 return false;
7700 }
7701 return true;
7702}
7703
7704bool
7705AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7706 SMLoc S = getLoc();
7707
7708 const MCExpr *Expr;
7709 if (Parser.parseExpression(Expr))
7710 return false;
7711
7712 if (Expr->evaluateAsAbsolute(Imm))
7713 return true;
7714
7715 if (Expected.empty()) {
7716 Error(S, "expected absolute expression");
7717 } else {
7718 Error(S, Twine("expected ", Expected) +
7719 Twine(" or an absolute expression"));
7720 }
7721 return false;
7722}
7723
7724bool
7725AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7726 SMLoc S = getLoc();
7727
7728 const MCExpr *Expr;
7729 if (Parser.parseExpression(Expr))
7730 return false;
7731
7732 int64_t IntVal;
7733 if (Expr->evaluateAsAbsolute(IntVal)) {
7734 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7735 } else {
7736 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7737 }
7738 return true;
7739}
7740
7741bool
7742AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7743 if (isToken(AsmToken::String)) {
7744 Val = getToken().getStringContents();
7745 lex();
7746 return true;
7747 } else {
7748 Error(getLoc(), ErrMsg);
7749 return false;
7750 }
7751}
7752
7753bool
7754AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7755 if (isToken(AsmToken::Identifier)) {
7756 Val = getTokenStr();
7757 lex();
7758 return true;
7759 } else {
7760 if (!ErrMsg.empty())
7761 Error(getLoc(), ErrMsg);
7762 return false;
7763 }
7764}
7765
7767AMDGPUAsmParser::getToken() const {
7768 return Parser.getTok();
7769}
7770
7771AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7772 return isToken(AsmToken::EndOfStatement)
7773 ? getToken()
7774 : getLexer().peekTok(ShouldSkipSpace);
7775}
7776
7777void
7778AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7779 auto TokCount = getLexer().peekTokens(Tokens);
7780
7781 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7782 Tokens[Idx] = AsmToken(AsmToken::Error, "");
7783}
7784
7786AMDGPUAsmParser::getTokenKind() const {
7787 return getLexer().getKind();
7788}
7789
7790SMLoc
7791AMDGPUAsmParser::getLoc() const {
7792 return getToken().getLoc();
7793}
7794
7796AMDGPUAsmParser::getTokenStr() const {
7797 return getToken().getString();
7798}
7799
7800void
7801AMDGPUAsmParser::lex() {
7802 Parser.Lex();
7803}
7804
7805SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7806 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7807}
7808
7809SMLoc
7810AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7811 const OperandVector &Operands) const {
7812 for (unsigned i = Operands.size() - 1; i > 0; --i) {
7813 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7814 if (Test(Op))
7815 return Op.getStartLoc();
7816 }
7817 return getInstLoc(Operands);
7818}
7819
7820SMLoc
7821AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7822 const OperandVector &Operands) const {
7823 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7824 return getOperandLoc(Test, Operands);
7825}
7826
7827SMLoc
7828AMDGPUAsmParser::getRegLoc(unsigned Reg,
7829 const OperandVector &Operands) const {
7830 auto Test = [=](const AMDGPUOperand& Op) {
7831 return Op.isRegKind() && Op.getReg() == Reg;
7832 };
7833 return getOperandLoc(Test, Operands);
7834}
7835
7836SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7837 bool SearchMandatoryLiterals) const {
7838 auto Test = [](const AMDGPUOperand& Op) {
7839 return Op.IsImmKindLiteral() || Op.isExpr();
7840 };
7841 SMLoc Loc = getOperandLoc(Test, Operands);
7842 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7843 Loc = getMandatoryLitLoc(Operands);
7844 return Loc;
7845}
7846
7847SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7848 auto Test = [](const AMDGPUOperand &Op) {
7849 return Op.IsImmKindMandatoryLiteral();
7850 };
7851 return getOperandLoc(Test, Operands);
7852}
7853
7854SMLoc
7855AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7856 auto Test = [](const AMDGPUOperand& Op) {
7857 return Op.isImmKindConst();
7858 };
7859 return getOperandLoc(Test, Operands);
7860}
7861
7863AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
7864 if (!trySkipToken(AsmToken::LCurly))
7865 return ParseStatus::NoMatch;
7866
7867 bool First = true;
7868 while (!trySkipToken(AsmToken::RCurly)) {
7869 if (!First &&
7870 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
7871 return ParseStatus::Failure;
7872
7873 StringRef Id = getTokenStr();
7874 SMLoc IdLoc = getLoc();
7875 if (!skipToken(AsmToken::Identifier, "field name expected") ||
7876 !skipToken(AsmToken::Colon, "colon expected"))
7877 return ParseStatus::Failure;
7878
7879 auto I =
7880 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
7881 if (I == Fields.end())
7882 return Error(IdLoc, "unknown field");
7883 if ((*I)->IsDefined)
7884 return Error(IdLoc, "duplicate field");
7885
7886 // TODO: Support symbolic values.
7887 (*I)->Loc = getLoc();
7888 if (!parseExpr((*I)->Val))
7889 return ParseStatus::Failure;
7890 (*I)->IsDefined = true;
7891
7892 First = false;
7893 }
7894 return ParseStatus::Success;
7895}
7896
7897bool AMDGPUAsmParser::validateStructuredOpFields(
7899 return all_of(Fields, [this](const StructuredOpField *F) {
7900 return F->validate(*this);
7901 });
7902}
7903
7904//===----------------------------------------------------------------------===//
7905// swizzle
7906//===----------------------------------------------------------------------===//
7907
7909static unsigned
7910encodeBitmaskPerm(const unsigned AndMask,
7911 const unsigned OrMask,
7912 const unsigned XorMask) {
7913 using namespace llvm::AMDGPU::Swizzle;
7914
7915 return BITMASK_PERM_ENC |
7916 (AndMask << BITMASK_AND_SHIFT) |
7917 (OrMask << BITMASK_OR_SHIFT) |
7918 (XorMask << BITMASK_XOR_SHIFT);
7919}
7920
7921bool
7922AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7923 const unsigned MinVal,
7924 const unsigned MaxVal,
7925 const StringRef ErrMsg,
7926 SMLoc &Loc) {
7927 if (!skipToken(AsmToken::Comma, "expected a comma")) {
7928 return false;
7929 }
7930 Loc = getLoc();
7931 if (!parseExpr(Op)) {
7932 return false;
7933 }
7934 if (Op < MinVal || Op > MaxVal) {
7935 Error(Loc, ErrMsg);
7936 return false;
7937 }
7938
7939 return true;
7940}
7941
7942bool
7943AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7944 const unsigned MinVal,
7945 const unsigned MaxVal,
7946 const StringRef ErrMsg) {
7947 SMLoc Loc;
7948 for (unsigned i = 0; i < OpNum; ++i) {
7949 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7950 return false;
7951 }
7952
7953 return true;
7954}
7955
7956bool
7957AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7958 using namespace llvm::AMDGPU::Swizzle;
7959
7960 int64_t Lane[LANE_NUM];
7961 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7962 "expected a 2-bit lane id")) {
7964 for (unsigned I = 0; I < LANE_NUM; ++I) {
7965 Imm |= Lane[I] << (LANE_SHIFT * I);
7966 }
7967 return true;
7968 }
7969 return false;
7970}
7971
7972bool
7973AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7974 using namespace llvm::AMDGPU::Swizzle;
7975
7976 SMLoc Loc;
7977 int64_t GroupSize;
7978 int64_t LaneIdx;
7979
7980 if (!parseSwizzleOperand(GroupSize,
7981 2, 32,
7982 "group size must be in the interval [2,32]",
7983 Loc)) {
7984 return false;
7985 }
7986 if (!isPowerOf2_64(GroupSize)) {
7987 Error(Loc, "group size must be a power of two");
7988 return false;
7989 }
7990 if (parseSwizzleOperand(LaneIdx,
7991 0, GroupSize - 1,
7992 "lane id must be in the interval [0,group size - 1]",
7993 Loc)) {
7994 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7995 return true;
7996 }
7997 return false;
7998}
7999
8000bool
8001AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8002 using namespace llvm::AMDGPU::Swizzle;
8003
8004 SMLoc Loc;
8005 int64_t GroupSize;
8006
8007 if (!parseSwizzleOperand(GroupSize,
8008 2, 32,
8009 "group size must be in the interval [2,32]",
8010 Loc)) {
8011 return false;
8012 }
8013 if (!isPowerOf2_64(GroupSize)) {
8014 Error(Loc, "group size must be a power of two");
8015 return false;
8016 }
8017
8018 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8019 return true;
8020}
8021
8022bool
8023AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8024 using namespace llvm::AMDGPU::Swizzle;
8025
8026 SMLoc Loc;
8027 int64_t GroupSize;
8028
8029 if (!parseSwizzleOperand(GroupSize,
8030 1, 16,
8031 "group size must be in the interval [1,16]",
8032 Loc)) {
8033 return false;
8034 }
8035 if (!isPowerOf2_64(GroupSize)) {
8036 Error(Loc, "group size must be a power of two");
8037 return false;
8038 }
8039
8040 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8041 return true;
8042}
8043
8044bool
8045AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8046 using namespace llvm::AMDGPU::Swizzle;
8047
8048 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8049 return false;
8050 }
8051
8052 StringRef Ctl;
8053 SMLoc StrLoc = getLoc();
8054 if (!parseString(Ctl)) {
8055 return false;
8056 }
8057 if (Ctl.size() != BITMASK_WIDTH) {
8058 Error(StrLoc, "expected a 5-character mask");
8059 return false;
8060 }
8061
8062 unsigned AndMask = 0;
8063 unsigned OrMask = 0;
8064 unsigned XorMask = 0;
8065
8066 for (size_t i = 0; i < Ctl.size(); ++i) {
8067 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8068 switch(Ctl[i]) {
8069 default:
8070 Error(StrLoc, "invalid mask");
8071 return false;
8072 case '0':
8073 break;
8074 case '1':
8075 OrMask |= Mask;
8076 break;
8077 case 'p':
8078 AndMask |= Mask;
8079 break;
8080 case 'i':
8081 AndMask |= Mask;
8082 XorMask |= Mask;
8083 break;
8084 }
8085 }
8086
8087 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8088 return true;
8089}
8090
8091bool
8092AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8093
8094 SMLoc OffsetLoc = getLoc();
8095
8096 if (!parseExpr(Imm, "a swizzle macro")) {
8097 return false;
8098 }
8099 if (!isUInt<16>(Imm)) {
8100 Error(OffsetLoc, "expected a 16-bit offset");
8101 return false;
8102 }
8103 return true;
8104}
8105
8106bool
8107AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8108 using namespace llvm::AMDGPU::Swizzle;
8109
8110 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8111
8112 SMLoc ModeLoc = getLoc();
8113 bool Ok = false;
8114
8115 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8116 Ok = parseSwizzleQuadPerm(Imm);
8117 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8118 Ok = parseSwizzleBitmaskPerm(Imm);
8119 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8120 Ok = parseSwizzleBroadcast(Imm);
8121 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8122 Ok = parseSwizzleSwap(Imm);
8123 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8124 Ok = parseSwizzleReverse(Imm);
8125 } else {
8126 Error(ModeLoc, "expected a swizzle mode");
8127 }
8128
8129 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8130 }
8131
8132 return false;
8133}
8134
8135ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8136 SMLoc S = getLoc();
8137 int64_t Imm = 0;
8138
8139 if (trySkipId("offset")) {
8140
8141 bool Ok = false;
8142 if (skipToken(AsmToken::Colon, "expected a colon")) {
8143 if (trySkipId("swizzle")) {
8144 Ok = parseSwizzleMacro(Imm);
8145 } else {
8146 Ok = parseSwizzleOffset(Imm);
8147 }
8148 }
8149
8150 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8151
8153 }
8154 return ParseStatus::NoMatch;
8155}
8156
8157bool
8158AMDGPUOperand::isSwizzle() const {
8159 return isImmTy(ImmTySwizzle);
8160}
8161
8162//===----------------------------------------------------------------------===//
8163// VGPR Index Mode
8164//===----------------------------------------------------------------------===//
8165
8166int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8167
8168 using namespace llvm::AMDGPU::VGPRIndexMode;
8169
8170 if (trySkipToken(AsmToken::RParen)) {
8171 return OFF;
8172 }
8173
8174 int64_t Imm = 0;
8175
8176 while (true) {
8177 unsigned Mode = 0;
8178 SMLoc S = getLoc();
8179
8180 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8181 if (trySkipId(IdSymbolic[ModeId])) {
8182 Mode = 1 << ModeId;
8183 break;
8184 }
8185 }
8186
8187 if (Mode == 0) {
8188 Error(S, (Imm == 0)?
8189 "expected a VGPR index mode or a closing parenthesis" :
8190 "expected a VGPR index mode");
8191 return UNDEF;
8192 }
8193
8194 if (Imm & Mode) {
8195 Error(S, "duplicate VGPR index mode");
8196 return UNDEF;
8197 }
8198 Imm |= Mode;
8199
8200 if (trySkipToken(AsmToken::RParen))
8201 break;
8202 if (!skipToken(AsmToken::Comma,
8203 "expected a comma or a closing parenthesis"))
8204 return UNDEF;
8205 }
8206
8207 return Imm;
8208}
8209
8210ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8211
8212 using namespace llvm::AMDGPU::VGPRIndexMode;
8213
8214 int64_t Imm = 0;
8215 SMLoc S = getLoc();
8216
8217 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8218 Imm = parseGPRIdxMacro();
8219 if (Imm == UNDEF)
8220 return ParseStatus::Failure;
8221 } else {
8222 if (getParser().parseAbsoluteExpression(Imm))
8223 return ParseStatus::Failure;
8224 if (Imm < 0 || !isUInt<4>(Imm))
8225 return Error(S, "invalid immediate: only 4-bit values are legal");
8226 }
8227
8228 Operands.push_back(
8229 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8230 return ParseStatus::Success;
8231}
8232
8233bool AMDGPUOperand::isGPRIdxMode() const {
8234 return isImmTy(ImmTyGprIdxMode);
8235}
8236
8237//===----------------------------------------------------------------------===//
8238// sopp branch targets
8239//===----------------------------------------------------------------------===//
8240
8241ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8242
8243 // Make sure we are not parsing something
8244 // that looks like a label or an expression but is not.
8245 // This will improve error messages.
8246 if (isRegister() || isModifier())
8247 return ParseStatus::NoMatch;
8248
8249 if (!parseExpr(Operands))
8250 return ParseStatus::Failure;
8251
8252 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8253 assert(Opr.isImm() || Opr.isExpr());
8254 SMLoc Loc = Opr.getStartLoc();
8255
8256 // Currently we do not support arbitrary expressions as branch targets.
8257 // Only labels and absolute expressions are accepted.
8258 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8259 Error(Loc, "expected an absolute expression or a label");
8260 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8261 Error(Loc, "expected a 16-bit signed jump offset");
8262 }
8263
8264 return ParseStatus::Success;
8265}
8266
8267//===----------------------------------------------------------------------===//
8268// Boolean holding registers
8269//===----------------------------------------------------------------------===//
8270
8271ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8272 return parseReg(Operands);
8273}
8274
8275//===----------------------------------------------------------------------===//
8276// mubuf
8277//===----------------------------------------------------------------------===//
8278
8279void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8280 const OperandVector &Operands,
8281 bool IsAtomic) {
8282 OptionalImmIndexMap OptionalIdx;
8283 unsigned FirstOperandIdx = 1;
8284 bool IsAtomicReturn = false;
8285
8286 if (IsAtomic) {
8287 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
8289 }
8290
8291 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8292 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8293
8294 // Add the register arguments
8295 if (Op.isReg()) {
8296 Op.addRegOperands(Inst, 1);
8297 // Insert a tied src for atomic return dst.
8298 // This cannot be postponed as subsequent calls to
8299 // addImmOperands rely on correct number of MC operands.
8300 if (IsAtomicReturn && i == FirstOperandIdx)
8301 Op.addRegOperands(Inst, 1);
8302 continue;
8303 }
8304
8305 // Handle the case where soffset is an immediate
8306 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8307 Op.addImmOperands(Inst, 1);
8308 continue;
8309 }
8310
8311 // Handle tokens like 'offen' which are sometimes hard-coded into the
8312 // asm string. There are no MCInst operands for these.
8313 if (Op.isToken()) {
8314 continue;
8315 }
8316 assert(Op.isImm());
8317
8318 // Handle optional arguments
8319 OptionalIdx[Op.getImmTy()] = i;
8320 }
8321
8322 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8323 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8324}
8325
8326//===----------------------------------------------------------------------===//
8327// smrd
8328//===----------------------------------------------------------------------===//
8329
8330bool AMDGPUOperand::isSMRDOffset8() const {
8331 return isImmLiteral() && isUInt<8>(getImm());
8332}
8333
8334bool AMDGPUOperand::isSMEMOffset() const {
8335 // Offset range is checked later by validator.
8336 return isImmLiteral();
8337}
8338
8339bool AMDGPUOperand::isSMRDLiteralOffset() const {
8340 // 32-bit literals are only supported on CI and we only want to use them
8341 // when the offset is > 8-bits.
8342 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8343}
8344
8345//===----------------------------------------------------------------------===//
8346// vop3
8347//===----------------------------------------------------------------------===//
8348
8349static bool ConvertOmodMul(int64_t &Mul) {
8350 if (Mul != 1 && Mul != 2 && Mul != 4)
8351 return false;
8352
8353 Mul >>= 1;
8354 return true;
8355}
8356
8357static bool ConvertOmodDiv(int64_t &Div) {
8358 if (Div == 1) {
8359 Div = 0;
8360 return true;
8361 }
8362
8363 if (Div == 2) {
8364 Div = 3;
8365 return true;
8366 }
8367
8368 return false;
8369}
8370
8371// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8372// This is intentional and ensures compatibility with sp3.
8373// See bug 35397 for details.
8374bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8375 if (BoundCtrl == 0 || BoundCtrl == 1) {
8376 if (!isGFX11Plus())
8377 BoundCtrl = 1;
8378 return true;
8379 }
8380 return false;
8381}
8382
8383void AMDGPUAsmParser::onBeginOfFile() {
8384 if (!getParser().getStreamer().getTargetStreamer() ||
8385 getSTI().getTargetTriple().getArch() == Triple::r600)
8386 return;
8387
8388 if (!getTargetStreamer().getTargetID())
8389 getTargetStreamer().initializeTargetID(getSTI(),
8390 getSTI().getFeatureString());
8391
8392 if (isHsaAbi(getSTI()))
8393 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8394}
8395
8396/// Parse AMDGPU specific expressions.
8397///
8398/// expr ::= or(expr, ...) |
8399/// max(expr, ...)
8400///
8401bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8403
8404 if (isToken(AsmToken::Identifier)) {
8405 StringRef TokenId = getTokenStr();
8406 AGVK VK = StringSwitch<AGVK>(TokenId)
8407 .Case("max", AGVK::AGVK_Max)
8408 .Case("or", AGVK::AGVK_Or)
8409 .Default(AGVK::AGVK_None);
8410
8411 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
8413 uint64_t CommaCount = 0;
8414 lex(); // Eat 'max'/'or'
8415 lex(); // Eat '('
8416 while (true) {
8417 if (trySkipToken(AsmToken::RParen)) {
8418 if (Exprs.empty()) {
8419 Error(getToken().getLoc(),
8420 "empty " + Twine(TokenId) + " expression");
8421 return true;
8422 }
8423 if (CommaCount + 1 != Exprs.size()) {
8424 Error(getToken().getLoc(),
8425 "mismatch of commas in " + Twine(TokenId) + " expression");
8426 return true;
8427 }
8428 Res = AMDGPUVariadicMCExpr::create(VK, Exprs, getContext());
8429 return false;
8430 }
8431 const MCExpr *Expr;
8432 if (getParser().parseExpression(Expr, EndLoc))
8433 return true;
8434 Exprs.push_back(Expr);
8435 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
8436 if (LastTokenWasComma)
8437 CommaCount++;
8438 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
8439 Error(getToken().getLoc(),
8440 "unexpected token in " + Twine(TokenId) + " expression");
8441 return true;
8442 }
8443 }
8444 }
8445 }
8446 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
8447}
8448
8449ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8450 StringRef Name = getTokenStr();
8451 if (Name == "mul") {
8452 return parseIntWithPrefix("mul", Operands,
8453 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8454 }
8455
8456 if (Name == "div") {
8457 return parseIntWithPrefix("div", Operands,
8458 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8459 }
8460
8461 return ParseStatus::NoMatch;
8462}
8463
8464// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8465// the number of src operands present, then copies that bit into src0_modifiers.
8466static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
8467 int Opc = Inst.getOpcode();
8468 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8469 if (OpSelIdx == -1)
8470 return;
8471
8472 int SrcNum;
8473 const int Ops[] = { AMDGPU::OpName::src0,
8474 AMDGPU::OpName::src1,
8475 AMDGPU::OpName::src2 };
8476 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8477 ++SrcNum)
8478 ;
8479 assert(SrcNum > 0);
8480
8481 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8482
8483 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
8484 if (DstIdx == -1)
8485 return;
8486
8487 const MCOperand &DstOp = Inst.getOperand(DstIdx);
8488 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8489 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8490 if (DstOp.isReg() &&
8491 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
8492 if (AMDGPU::isHi(DstOp.getReg(), MRI))
8493 ModVal |= SISrcMods::DST_OP_SEL;
8494 } else {
8495 if ((OpSel & (1 << SrcNum)) != 0)
8496 ModVal |= SISrcMods::DST_OP_SEL;
8497 }
8498 Inst.getOperand(ModIdx).setImm(ModVal);
8499}
8500
8501void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8502 const OperandVector &Operands) {
8503 cvtVOP3P(Inst, Operands);
8504 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8505}
8506
8507void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8508 OptionalImmIndexMap &OptionalIdx) {
8509 cvtVOP3P(Inst, Operands, OptionalIdx);
8510 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8511}
8512
8513static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8514 return
8515 // 1. This operand is input modifiers
8516 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8517 // 2. This is not last operand
8518 && Desc.NumOperands > (OpNum + 1)
8519 // 3. Next operand is register class
8520 && Desc.operands()[OpNum + 1].RegClass != -1
8521 // 4. Next register is not tied to any other operand
8522 && Desc.getOperandConstraint(OpNum + 1,
8523 MCOI::OperandConstraint::TIED_TO) == -1;
8524}
8525
8526void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8527{
8528 OptionalImmIndexMap OptionalIdx;
8529 unsigned Opc = Inst.getOpcode();
8530
8531 unsigned I = 1;
8532 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8533 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8534 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8535 }
8536
8537 for (unsigned E = Operands.size(); I != E; ++I) {
8538 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8540 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8541 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
8542 Op.isInterpAttrChan()) {
8543 Inst.addOperand(MCOperand::createImm(Op.getImm()));
8544 } else if (Op.isImmModifier()) {
8545 OptionalIdx[Op.getImmTy()] = I;
8546 } else {
8547 llvm_unreachable("unhandled operand type");
8548 }
8549 }
8550
8551 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
8552 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8553 AMDGPUOperand::ImmTyHigh);
8554
8555 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8556 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8557 AMDGPUOperand::ImmTyClampSI);
8558
8559 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8560 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8561 AMDGPUOperand::ImmTyOModSI);
8562}
8563
8564void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8565{
8566 OptionalImmIndexMap OptionalIdx;
8567 unsigned Opc = Inst.getOpcode();
8568
8569 unsigned I = 1;
8570 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8571 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8572 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8573 }
8574
8575 for (unsigned E = Operands.size(); I != E; ++I) {
8576 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8578 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8579 } else if (Op.isImmModifier()) {
8580 OptionalIdx[Op.getImmTy()] = I;
8581 } else {
8582 llvm_unreachable("unhandled operand type");
8583 }
8584 }
8585
8586 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8587
8588 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8589 if (OpSelIdx != -1)
8590 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8591
8592 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8593
8594 if (OpSelIdx == -1)
8595 return;
8596
8597 const int Ops[] = { AMDGPU::OpName::src0,
8598 AMDGPU::OpName::src1,
8599 AMDGPU::OpName::src2 };
8600 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8601 AMDGPU::OpName::src1_modifiers,
8602 AMDGPU::OpName::src2_modifiers };
8603
8604 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8605
8606 for (int J = 0; J < 3; ++J) {
8607 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8608 if (OpIdx == -1)
8609 break;
8610
8611 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8612 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8613
8614 if ((OpSel & (1 << J)) != 0)
8615 ModVal |= SISrcMods::OP_SEL_0;
8616 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8617 (OpSel & (1 << 3)) != 0)
8618 ModVal |= SISrcMods::DST_OP_SEL;
8619
8620 Inst.getOperand(ModIdx).setImm(ModVal);
8621 }
8622}
8623
8624void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8625 OptionalImmIndexMap &OptionalIdx) {
8626 unsigned Opc = Inst.getOpcode();
8627
8628 unsigned I = 1;
8629 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8630 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8631 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8632 }
8633
8634 for (unsigned E = Operands.size(); I != E; ++I) {
8635 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8637 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8638 } else if (Op.isImmModifier()) {
8639 OptionalIdx[Op.getImmTy()] = I;
8640 } else if (Op.isRegOrImm()) {
8641 Op.addRegOrImmOperands(Inst, 1);
8642 } else {
8643 llvm_unreachable("unhandled operand type");
8644 }
8645 }
8646
8647 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8648 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8649 AMDGPUOperand::ImmTyClampSI);
8650
8651 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8652 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8653 AMDGPUOperand::ImmTyOModSI);
8654
8655 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8656 // it has src2 register operand that is tied to dst operand
8657 // we don't allow modifiers for this operand in assembler so src2_modifiers
8658 // should be 0.
8659 if (isMAC(Opc)) {
8660 auto it = Inst.begin();
8661 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8662 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8663 ++it;
8664 // Copy the operand to ensure it's not invalidated when Inst grows.
8665 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8666 }
8667}
8668
8669void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8670 OptionalImmIndexMap OptionalIdx;
8671 cvtVOP3(Inst, Operands, OptionalIdx);
8672}
8673
8674void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8675 OptionalImmIndexMap &OptIdx) {
8676 const int Opc = Inst.getOpcode();
8677 const MCInstrDesc &Desc = MII.get(Opc);
8678
8679 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8680
8681 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8682 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8683 Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_gfx12 ||
8684 Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_gfx12) {
8685 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8686 Inst.addOperand(Inst.getOperand(0));
8687 }
8688
8689 // Adding vdst_in operand is already covered for these DPP instructions in
8690 // cvtVOP3DPP.
8691 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
8692 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8693 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8694 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8695 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12)) {
8696 assert(!IsPacked);
8697 Inst.addOperand(Inst.getOperand(0));
8698 }
8699
8700 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8701 // instruction, and then figure out where to actually put the modifiers
8702
8703 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8704 if (OpSelIdx != -1) {
8705 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8706 }
8707
8708 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8709 if (OpSelHiIdx != -1) {
8710 int DefaultVal = IsPacked ? -1 : 0;
8711 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8712 DefaultVal);
8713 }
8714
8715 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8716 if (NegLoIdx != -1)
8717 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8718
8719 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8720 if (NegHiIdx != -1)
8721 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8722
8723 const int Ops[] = { AMDGPU::OpName::src0,
8724 AMDGPU::OpName::src1,
8725 AMDGPU::OpName::src2 };
8726 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8727 AMDGPU::OpName::src1_modifiers,
8728 AMDGPU::OpName::src2_modifiers };
8729
8730 unsigned OpSel = 0;
8731 unsigned OpSelHi = 0;
8732 unsigned NegLo = 0;
8733 unsigned NegHi = 0;
8734
8735 if (OpSelIdx != -1)
8736 OpSel = Inst.getOperand(OpSelIdx).getImm();
8737
8738 if (OpSelHiIdx != -1)
8739 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8740
8741 if (NegLoIdx != -1)
8742 NegLo = Inst.getOperand(NegLoIdx).getImm();
8743
8744 if (NegHiIdx != -1)
8745 NegHi = Inst.getOperand(NegHiIdx).getImm();
8746
8747 for (int J = 0; J < 3; ++J) {
8748 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8749 if (OpIdx == -1)
8750 break;
8751
8752 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8753
8754 if (ModIdx == -1)
8755 continue;
8756
8757 uint32_t ModVal = 0;
8758
8759 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
8760 if (SrcOp.isReg() && getMRI()
8761 ->getRegClass(AMDGPU::VGPR_16RegClassID)
8762 .contains(SrcOp.getReg())) {
8763 bool VGPRSuffixIsHi = AMDGPU::isHi(SrcOp.getReg(), *getMRI());
8764 if (VGPRSuffixIsHi)
8765 ModVal |= SISrcMods::OP_SEL_0;
8766 } else {
8767 if ((OpSel & (1 << J)) != 0)
8768 ModVal |= SISrcMods::OP_SEL_0;
8769 }
8770
8771 if ((OpSelHi & (1 << J)) != 0)
8772 ModVal |= SISrcMods::OP_SEL_1;
8773
8774 if ((NegLo & (1 << J)) != 0)
8775 ModVal |= SISrcMods::NEG;
8776
8777 if ((NegHi & (1 << J)) != 0)
8778 ModVal |= SISrcMods::NEG_HI;
8779
8780 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8781 }
8782}
8783
8784void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8785 OptionalImmIndexMap OptIdx;
8786 cvtVOP3(Inst, Operands, OptIdx);
8787 cvtVOP3P(Inst, Operands, OptIdx);
8788}
8789
8791 unsigned i, unsigned Opc, unsigned OpName) {
8792 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
8793 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8794 else
8795 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
8796}
8797
8798void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
8799 unsigned Opc = Inst.getOpcode();
8800
8801 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
8802 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
8803 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
8804 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
8805 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
8806
8807 OptionalImmIndexMap OptIdx;
8808 for (unsigned i = 5; i < Operands.size(); ++i) {
8809 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8810 OptIdx[Op.getImmTy()] = i;
8811 }
8812
8813 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
8814 addOptionalImmOperand(Inst, Operands, OptIdx,
8815 AMDGPUOperand::ImmTyIndexKey8bit);
8816
8817 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
8818 addOptionalImmOperand(Inst, Operands, OptIdx,
8819 AMDGPUOperand::ImmTyIndexKey16bit);
8820
8821 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8822 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI);
8823
8824 cvtVOP3P(Inst, Operands, OptIdx);
8825}
8826
8827//===----------------------------------------------------------------------===//
8828// VOPD
8829//===----------------------------------------------------------------------===//
8830
8831ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8832 if (!hasVOPD(getSTI()))
8833 return ParseStatus::NoMatch;
8834
8835 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8836 SMLoc S = getLoc();
8837 lex();
8838 lex();
8839 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8840 SMLoc OpYLoc = getLoc();
8841 StringRef OpYName;
8842 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8843 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8844 return ParseStatus::Success;
8845 }
8846 return Error(OpYLoc, "expected a VOPDY instruction after ::");
8847 }
8848 return ParseStatus::NoMatch;
8849}
8850
8851// Create VOPD MCInst operands using parsed assembler operands.
8852void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8853 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8854 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8855 if (Op.isReg()) {
8856 Op.addRegOperands(Inst, 1);
8857 return;
8858 }
8859 if (Op.isImm()) {
8860 Op.addImmOperands(Inst, 1);
8861 return;
8862 }
8863 llvm_unreachable("Unhandled operand type in cvtVOPD");
8864 };
8865
8866 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8867
8868 // MCInst operands are ordered as follows:
8869 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8870
8871 for (auto CompIdx : VOPD::COMPONENTS) {
8872 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8873 }
8874
8875 for (auto CompIdx : VOPD::COMPONENTS) {
8876 const auto &CInfo = InstInfo[CompIdx];
8877 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8878 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8879 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8880 if (CInfo.hasSrc2Acc())
8881 addOp(CInfo.getIndexOfDstInParsedOperands());
8882 }
8883}
8884
8885//===----------------------------------------------------------------------===//
8886// dpp
8887//===----------------------------------------------------------------------===//
8888
8889bool AMDGPUOperand::isDPP8() const {
8890 return isImmTy(ImmTyDPP8);
8891}
8892
8893bool AMDGPUOperand::isDPPCtrl() const {
8894 using namespace AMDGPU::DPP;
8895
8896 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8897 if (result) {
8898 int64_t Imm = getImm();
8899 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8900 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8901 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8902 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8903 (Imm == DppCtrl::WAVE_SHL1) ||
8904 (Imm == DppCtrl::WAVE_ROL1) ||
8905 (Imm == DppCtrl::WAVE_SHR1) ||
8906 (Imm == DppCtrl::WAVE_ROR1) ||
8907 (Imm == DppCtrl::ROW_MIRROR) ||
8908 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8909 (Imm == DppCtrl::BCAST15) ||
8910 (Imm == DppCtrl::BCAST31) ||
8911 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8912 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8913 }
8914 return false;
8915}
8916
8917//===----------------------------------------------------------------------===//
8918// mAI
8919//===----------------------------------------------------------------------===//
8920
8921bool AMDGPUOperand::isBLGP() const {
8922 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8923}
8924
8925bool AMDGPUOperand::isCBSZ() const {
8926 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8927}
8928
8929bool AMDGPUOperand::isABID() const {
8930 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8931}
8932
8933bool AMDGPUOperand::isS16Imm() const {
8934 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8935}
8936
8937bool AMDGPUOperand::isU16Imm() const {
8938 return isImmLiteral() && isUInt<16>(getImm());
8939}
8940
8941//===----------------------------------------------------------------------===//
8942// dim
8943//===----------------------------------------------------------------------===//
8944
8945bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8946 // We want to allow "dim:1D" etc.,
8947 // but the initial 1 is tokenized as an integer.
8948 std::string Token;
8949 if (isToken(AsmToken::Integer)) {
8950 SMLoc Loc = getToken().getEndLoc();
8951 Token = std::string(getTokenStr());
8952 lex();
8953 if (getLoc() != Loc)
8954 return false;
8955 }
8956
8957 StringRef Suffix;
8958 if (!parseId(Suffix))
8959 return false;
8960 Token += Suffix;
8961
8962 StringRef DimId = Token;
8963 if (DimId.starts_with("SQ_RSRC_IMG_"))
8964 DimId = DimId.drop_front(12);
8965
8967 if (!DimInfo)
8968 return false;
8969
8970 Encoding = DimInfo->Encoding;
8971 return true;
8972}
8973
8974ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8975 if (!isGFX10Plus())
8976 return ParseStatus::NoMatch;
8977
8978 SMLoc S = getLoc();
8979
8980 if (!trySkipId("dim", AsmToken::Colon))
8981 return ParseStatus::NoMatch;
8982
8983 unsigned Encoding;
8984 SMLoc Loc = getLoc();
8985 if (!parseDimId(Encoding))
8986 return Error(Loc, "invalid dim value");
8987
8988 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8989 AMDGPUOperand::ImmTyDim));
8990 return ParseStatus::Success;
8991}
8992
8993//===----------------------------------------------------------------------===//
8994// dpp
8995//===----------------------------------------------------------------------===//
8996
8997ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8998 SMLoc S = getLoc();
8999
9000 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9001 return ParseStatus::NoMatch;
9002
9003 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9004
9005 int64_t Sels[8];
9006
9007 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9008 return ParseStatus::Failure;
9009
9010 for (size_t i = 0; i < 8; ++i) {
9011 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9012 return ParseStatus::Failure;
9013
9014 SMLoc Loc = getLoc();
9015 if (getParser().parseAbsoluteExpression(Sels[i]))
9016 return ParseStatus::Failure;
9017 if (0 > Sels[i] || 7 < Sels[i])
9018 return Error(Loc, "expected a 3-bit value");
9019 }
9020
9021 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9022 return ParseStatus::Failure;
9023
9024 unsigned DPP8 = 0;
9025 for (size_t i = 0; i < 8; ++i)
9026 DPP8 |= (Sels[i] << (i * 3));
9027
9028 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9029 return ParseStatus::Success;
9030}
9031
9032bool
9033AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9034 const OperandVector &Operands) {
9035 if (Ctrl == "row_newbcast")
9036 return isGFX90A();
9037
9038 if (Ctrl == "row_share" ||
9039 Ctrl == "row_xmask")
9040 return isGFX10Plus();
9041
9042 if (Ctrl == "wave_shl" ||
9043 Ctrl == "wave_shr" ||
9044 Ctrl == "wave_rol" ||
9045 Ctrl == "wave_ror" ||
9046 Ctrl == "row_bcast")
9047 return isVI() || isGFX9();
9048
9049 return Ctrl == "row_mirror" ||
9050 Ctrl == "row_half_mirror" ||
9051 Ctrl == "quad_perm" ||
9052 Ctrl == "row_shl" ||
9053 Ctrl == "row_shr" ||
9054 Ctrl == "row_ror";
9055}
9056
9057int64_t
9058AMDGPUAsmParser::parseDPPCtrlPerm() {
9059 // quad_perm:[%d,%d,%d,%d]
9060
9061 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9062 return -1;
9063
9064 int64_t Val = 0;
9065 for (int i = 0; i < 4; ++i) {
9066 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9067 return -1;
9068
9069 int64_t Temp;
9070 SMLoc Loc = getLoc();
9071 if (getParser().parseAbsoluteExpression(Temp))
9072 return -1;
9073 if (Temp < 0 || Temp > 3) {
9074 Error(Loc, "expected a 2-bit value");
9075 return -1;
9076 }
9077
9078 Val += (Temp << i * 2);
9079 }
9080
9081 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9082 return -1;
9083
9084 return Val;
9085}
9086
9087int64_t
9088AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9089 using namespace AMDGPU::DPP;
9090
9091 // sel:%d
9092
9093 int64_t Val;
9094 SMLoc Loc = getLoc();
9095
9096 if (getParser().parseAbsoluteExpression(Val))
9097 return -1;
9098
9099 struct DppCtrlCheck {
9100 int64_t Ctrl;
9101 int Lo;
9102 int Hi;
9103 };
9104
9105 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9106 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9107 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9108 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9109 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9110 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9111 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9112 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9113 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9114 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9115 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9116 .Default({-1, 0, 0});
9117
9118 bool Valid;
9119 if (Check.Ctrl == -1) {
9120 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
9121 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9122 } else {
9123 Valid = Check.Lo <= Val && Val <= Check.Hi;
9124 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
9125 }
9126
9127 if (!Valid) {
9128 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
9129 return -1;
9130 }
9131
9132 return Val;
9133}
9134
9135ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
9136 using namespace AMDGPU::DPP;
9137
9138 if (!isToken(AsmToken::Identifier) ||
9139 !isSupportedDPPCtrl(getTokenStr(), Operands))
9140 return ParseStatus::NoMatch;
9141
9142 SMLoc S = getLoc();
9143 int64_t Val = -1;
9145
9146 parseId(Ctrl);
9147
9148 if (Ctrl == "row_mirror") {
9149 Val = DppCtrl::ROW_MIRROR;
9150 } else if (Ctrl == "row_half_mirror") {
9151 Val = DppCtrl::ROW_HALF_MIRROR;
9152 } else {
9153 if (skipToken(AsmToken::Colon, "expected a colon")) {
9154 if (Ctrl == "quad_perm") {
9155 Val = parseDPPCtrlPerm();
9156 } else {
9157 Val = parseDPPCtrlSel(Ctrl);
9158 }
9159 }
9160 }
9161
9162 if (Val == -1)
9163 return ParseStatus::Failure;
9164
9165 Operands.push_back(
9166 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9167 return ParseStatus::Success;
9168}
9169
9170void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
9171 bool IsDPP8) {
9172 OptionalImmIndexMap OptionalIdx;
9173 unsigned Opc = Inst.getOpcode();
9174 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9175
9176 // MAC instructions are special because they have 'old'
9177 // operand which is not tied to dst (but assumed to be).
9178 // They also have dummy unused src2_modifiers.
9179 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
9180 int Src2ModIdx =
9181 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
9182 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9183 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
9184
9185 unsigned I = 1;
9186 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9187 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9188 }
9189
9190 int Fi = 0;
9191 for (unsigned E = Operands.size(); I != E; ++I) {
9192
9193 if (IsMAC) {
9194 int NumOperands = Inst.getNumOperands();
9195 if (OldIdx == NumOperands) {
9196 // Handle old operand
9197 constexpr int DST_IDX = 0;
9198 Inst.addOperand(Inst.getOperand(DST_IDX));
9199 } else if (Src2ModIdx == NumOperands) {
9200 // Add unused dummy src2_modifiers
9202 }
9203 }
9204
9205 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9206 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
9207 Inst.addOperand(Inst.getOperand(0));
9208 }
9209
9210 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
9211 Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12 ||
9212 Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
9213 Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12;
9214 if (IsVOP3CvtSrDpp) {
9215 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
9218 }
9219 }
9220
9221 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9223 if (TiedTo != -1) {
9224 assert((unsigned)TiedTo < Inst.getNumOperands());
9225 // handle tied old or src2 for MAC instructions
9226 Inst.addOperand(Inst.getOperand(TiedTo));
9227 }
9228 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9229 // Add the register arguments
9230 if (IsDPP8 && Op.isDppFI()) {
9231 Fi = Op.getImm();
9232 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9233 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9234 } else if (Op.isReg()) {
9235 Op.addRegOperands(Inst, 1);
9236 } else if (Op.isImm() &&
9237 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
9238 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
9239 Op.addImmOperands(Inst, 1);
9240 } else if (Op.isImm()) {
9241 OptionalIdx[Op.getImmTy()] = I;
9242 } else {
9243 llvm_unreachable("unhandled operand type");
9244 }
9245 }
9246 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9247 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
9248
9249 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9250 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
9251
9252 if (Desc.TSFlags & SIInstrFlags::VOP3P)
9253 cvtVOP3P(Inst, Operands, OptionalIdx);
9254 else if (Desc.TSFlags & SIInstrFlags::VOP3)
9255 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
9256 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9257 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9258 }
9259
9260 if (IsDPP8) {
9261 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
9262 using namespace llvm::AMDGPU::DPP;
9263 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9264 } else {
9265 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
9266 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9267 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9268 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9269
9270 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
9271 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9272 AMDGPUOperand::ImmTyDppFI);
9273 }
9274}
9275
9276void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
9277 OptionalImmIndexMap OptionalIdx;
9278
9279 unsigned I = 1;
9280 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9281 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9282 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9283 }
9284
9285 int Fi = 0;
9286 for (unsigned E = Operands.size(); I != E; ++I) {
9287 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9289 if (TiedTo != -1) {
9290 assert((unsigned)TiedTo < Inst.getNumOperands());
9291 // handle tied old or src2 for MAC instructions
9292 Inst.addOperand(Inst.getOperand(TiedTo));
9293 }
9294 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9295 // Add the register arguments
9296 if (Op.isReg() && validateVccOperand(Op.getReg())) {
9297 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
9298 // Skip it.
9299 continue;
9300 }
9301
9302 if (IsDPP8) {
9303 if (Op.isDPP8()) {
9304 Op.addImmOperands(Inst, 1);
9305 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9306 Op.addRegWithFPInputModsOperands(Inst, 2);
9307 } else if (Op.isDppFI()) {
9308 Fi = Op.getImm();
9309 } else if (Op.isReg()) {
9310 Op.addRegOperands(Inst, 1);
9311 } else {
9312 llvm_unreachable("Invalid operand type");
9313 }
9314 } else {
9316 Op.addRegWithFPInputModsOperands(Inst, 2);
9317 } else if (Op.isReg()) {
9318 Op.addRegOperands(Inst, 1);
9319 } else if (Op.isDPPCtrl()) {
9320 Op.addImmOperands(Inst, 1);
9321 } else if (Op.isImm()) {
9322 // Handle optional arguments
9323 OptionalIdx[Op.getImmTy()] = I;
9324 } else {
9325 llvm_unreachable("Invalid operand type");
9326 }
9327 }
9328 }
9329
9330 if (IsDPP8) {
9331 using namespace llvm::AMDGPU::DPP;
9332 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9333 } else {
9334 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9335 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9336 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9337 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
9338 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9339 AMDGPUOperand::ImmTyDppFI);
9340 }
9341 }
9342}
9343
9344//===----------------------------------------------------------------------===//
9345// sdwa
9346//===----------------------------------------------------------------------===//
9347
9348ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9349 StringRef Prefix,
9350 AMDGPUOperand::ImmTy Type) {
9351 using namespace llvm::AMDGPU::SDWA;
9352
9353 SMLoc S = getLoc();
9355
9356 SMLoc StringLoc;
9357 ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
9358 if (!Res.isSuccess())
9359 return Res;
9360
9361 int64_t Int;
9363 .Case("BYTE_0", SdwaSel::BYTE_0)
9364 .Case("BYTE_1", SdwaSel::BYTE_1)
9365 .Case("BYTE_2", SdwaSel::BYTE_2)
9366 .Case("BYTE_3", SdwaSel::BYTE_3)
9367 .Case("WORD_0", SdwaSel::WORD_0)
9368 .Case("WORD_1", SdwaSel::WORD_1)
9369 .Case("DWORD", SdwaSel::DWORD)
9370 .Default(0xffffffff);
9371
9372 if (Int == 0xffffffff)
9373 return Error(StringLoc, "invalid " + Twine(Prefix) + " value");
9374
9375 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
9376 return ParseStatus::Success;
9377}
9378
9379ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9380 using namespace llvm::AMDGPU::SDWA;
9381
9382 SMLoc S = getLoc();
9384
9385 SMLoc StringLoc;
9386 ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc);
9387 if (!Res.isSuccess())
9388 return Res;
9389
9390 int64_t Int;
9392 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
9393 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
9394 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
9395 .Default(0xffffffff);
9396
9397 if (Int == 0xffffffff)
9398 return Error(StringLoc, "invalid dst_unused value");
9399
9400 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused));
9401 return ParseStatus::Success;
9402}
9403
9404void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9405 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
9406}
9407
9408void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9409 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
9410}
9411
9412void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9413 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
9414}
9415
9416void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9417 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
9418}
9419
9420void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9421 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9422}
9423
9424void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9425 uint64_t BasicInstType,
9426 bool SkipDstVcc,
9427 bool SkipSrcVcc) {
9428 using namespace llvm::AMDGPU::SDWA;
9429
9430 OptionalImmIndexMap OptionalIdx;
9431 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9432 bool SkippedVcc = false;
9433
9434 unsigned I = 1;
9435 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9436 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9437 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9438 }
9439
9440 for (unsigned E = Operands.size(); I != E; ++I) {
9441 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9442 if (SkipVcc && !SkippedVcc && Op.isReg() &&
9443 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9444 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9445 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9446 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9447 // Skip VCC only if we didn't skip it on previous iteration.
9448 // Note that src0 and src1 occupy 2 slots each because of modifiers.
9449 if (BasicInstType == SIInstrFlags::VOP2 &&
9450 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9451 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9452 SkippedVcc = true;
9453 continue;
9454 } else if (BasicInstType == SIInstrFlags::VOPC &&
9455 Inst.getNumOperands() == 0) {
9456 SkippedVcc = true;
9457 continue;
9458 }
9459 }
9461 Op.addRegOrImmWithInputModsOperands(Inst, 2);
9462 } else if (Op.isImm()) {
9463 // Handle optional arguments
9464 OptionalIdx[Op.getImmTy()] = I;
9465 } else {
9466 llvm_unreachable("Invalid operand type");
9467 }
9468 SkippedVcc = false;
9469 }
9470
9471 const unsigned Opc = Inst.getOpcode();
9472 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9473 Opc != AMDGPU::V_NOP_sdwa_vi) {
9474 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9475 switch (BasicInstType) {
9476 case SIInstrFlags::VOP1:
9477 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9478 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9479 AMDGPUOperand::ImmTyClampSI, 0);
9480
9481 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9482 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9483 AMDGPUOperand::ImmTyOModSI, 0);
9484
9485 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
9486 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9487 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9488
9489 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
9490 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9491 AMDGPUOperand::ImmTySDWADstUnused,
9492 DstUnused::UNUSED_PRESERVE);
9493
9494 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9495 break;
9496
9497 case SIInstrFlags::VOP2:
9498 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9499
9500 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
9501 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9502
9503 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9504 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
9505 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9506 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9507 break;
9508
9509 case SIInstrFlags::VOPC:
9510 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
9511 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9512 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9513 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9514 break;
9515
9516 default:
9517 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9518 }
9519 }
9520
9521 // special case v_mac_{f16, f32}:
9522 // it has src2 register operand that is tied to dst operand
9523 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9524 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9525 auto it = Inst.begin();
9526 std::advance(
9527 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9528 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9529 }
9530}
9531
9532/// Force static initialization.
9536}
9537
9538#define GET_REGISTER_MATCHER
9539#define GET_MATCHER_IMPLEMENTATION
9540#define GET_MNEMONIC_SPELL_CHECKER
9541#define GET_MNEMONIC_CHECKER
9542#include "AMDGPUGenAsmMatcher.inc"
9543
9544ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9545 unsigned MCK) {
9546 switch (MCK) {
9547 case MCK_addr64:
9548 return parseTokenOp("addr64", Operands);
9549 case MCK_done:
9550 return parseTokenOp("done", Operands);
9551 case MCK_idxen:
9552 return parseTokenOp("idxen", Operands);
9553 case MCK_lds:
9554 return parseTokenOp("lds", Operands);
9555 case MCK_offen:
9556 return parseTokenOp("offen", Operands);
9557 case MCK_off:
9558 return parseTokenOp("off", Operands);
9559 case MCK_row_95_en:
9560 return parseTokenOp("row_en", Operands);
9561 case MCK_gds:
9562 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
9563 case MCK_tfe:
9564 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
9565 }
9566 return tryCustomParseOperand(Operands, MCK);
9567}
9568
9569// This function should be defined after auto-generated include so that we have
9570// MatchClassKind enum defined
9571unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9572 unsigned Kind) {
9573 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9574 // But MatchInstructionImpl() expects to meet token and fails to validate
9575 // operand. This method checks if we are given immediate operand but expect to
9576 // get corresponding token.
9577 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9578 switch (Kind) {
9579 case MCK_addr64:
9580 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9581 case MCK_gds:
9582 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9583 case MCK_lds:
9584 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9585 case MCK_idxen:
9586 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9587 case MCK_offen:
9588 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9589 case MCK_tfe:
9590 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9591 case MCK_SSrc_b32:
9592 // When operands have expression values, they will return true for isToken,
9593 // because it is not possible to distinguish between a token and an
9594 // expression at parse time. MatchInstructionImpl() will always try to
9595 // match an operand as a token, when isToken returns true, and when the
9596 // name of the expression is not a valid token, the match will fail,
9597 // so we need to handle it here.
9598 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9599 case MCK_SSrc_f32:
9600 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9601 case MCK_SOPPBrTarget:
9602 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9603 case MCK_VReg32OrOff:
9604 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9605 case MCK_InterpSlot:
9606 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9607 case MCK_InterpAttr:
9608 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9609 case MCK_InterpAttrChan:
9610 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9611 case MCK_SReg_64:
9612 case MCK_SReg_64_XEXEC:
9613 // Null is defined as a 32-bit register but
9614 // it should also be enabled with 64-bit operands.
9615 // The following code enables it for SReg_64 operands
9616 // used as source and destination. Remaining source
9617 // operands are handled in isInlinableImm.
9618 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9619 default:
9620 return Match_InvalidOperand;
9621 }
9622}
9623
9624//===----------------------------------------------------------------------===//
9625// endpgm
9626//===----------------------------------------------------------------------===//
9627
9628ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9629 SMLoc S = getLoc();
9630 int64_t Imm = 0;
9631
9632 if (!parseExpr(Imm)) {
9633 // The operand is optional, if not present default to 0
9634 Imm = 0;
9635 }
9636
9637 if (!isUInt<16>(Imm))
9638 return Error(S, "expected a 16-bit value");
9639
9640 Operands.push_back(
9641 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9642 return ParseStatus::Success;
9643}
9644
9645bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9646
9647//===----------------------------------------------------------------------===//
9648// LDSDIR
9649//===----------------------------------------------------------------------===//
9650
9651bool AMDGPUOperand::isWaitVDST() const {
9652 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
9653}
9654
9655bool AMDGPUOperand::isWaitVAVDst() const {
9656 return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm());
9657}
9658
9659bool AMDGPUOperand::isWaitVMVSrc() const {
9660 return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm());
9661}
9662
9663//===----------------------------------------------------------------------===//
9664// VINTERP
9665//===----------------------------------------------------------------------===//
9666
9667bool AMDGPUOperand::isWaitEXP() const {
9668 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
9669}
9670
9671//===----------------------------------------------------------------------===//
9672// Split Barrier
9673//===----------------------------------------------------------------------===//
9674
9675bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static unsigned getSpecialRegForName(StringRef RegName)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, unsigned OpName)
static bool IsRevOpcode(const unsigned Opcode)
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_READNONE
Definition: Compiler.h:220
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
@ Default
Definition: DwarfDebug.cpp:87
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
Definition: InlineInfo.cpp:180
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
LLVMContext & Context
#define P(N)
#define G_00B848_FWD_PROGRESS(x)
Definition: SIDefines.h:1157
#define G_00B848_MEM_ORDERED(x)
Definition: SIDefines.h:1154
#define G_00B848_IEEE_MODE(x)
Definition: SIDefines.h:1148
#define G_00B848_DX10_CLAMP(x)
Definition: SIDefines.h:1139
#define G_00B848_WGP_MODE(x)
Definition: SIDefines.h:1151
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
unsigned unsigned DefaultVal
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
static bool Enabled
Definition: Statistic.cpp:46
StringSet - A set-like wrapper for the StringMap.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
BinaryOperator * Mul
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
support::ulittle16_t & Hi
Definition: aarch32.cpp:205
static const AMDGPUVariadicMCExpr * create(VariadicKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5196
Class for arbitrary precision integers.
Definition: APInt.h:76
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmMacro.h:110
bool is(TokenKind K) const
Definition: MCAsmMacro.h:82
TokenKind getKind() const
Definition: MCAsmMacro.h:81
This class represents an Operation in the Expression.
Register getReg() const
Base class for user error types.
Definition: Error.h:352
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:474
Class representing an expression and its matching format.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:123
virtual MCStreamer & getStreamer()=0
Return the output streamer for the assembler.
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
Context object for machine code objects.
Definition: MCContext.h:81
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:455
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:201
const MCSubtargetInfo * getSubtargetInfo() const
Definition: MCContext.h:459
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
SMLoc getLoc() const
Definition: MCInst.h:204
void setLoc(SMLoc loc)
Definition: MCInst.h:203
unsigned getOpcode() const
Definition: MCInst.h:198
iterator insert(iterator I, const MCOperand &Op)
Definition: MCInst.h:224
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
iterator begin()
Definition: MCInst.h:219
size_t size() const
Definition: MCInst.h:218
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:444
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
void setImm(int64_t Val)
Definition: MCInst.h:85
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
void setReg(unsigned Reg)
Set the register number.
Definition: MCInst.h:75
int64_t getImm() const
Definition: MCInst.h:80
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
bool isImm() const
Definition: MCInst.h:62
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
bool isReg() const
Definition: MCInst.h:61
bool isExpr() const
Definition: MCInst.h:65
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual bool isReg() const =0
isReg - Is this a register operand?
virtual bool isMem() const =0
isMem - Is this a memory operand?
virtual MCRegister getReg() const =0
virtual bool isToken() const =0
isToken - Is this a token operand?
virtual bool isImm() const =0
isImm - Is this an immediate operand?
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
Streaming machine code generation interface.
Definition: MCStreamer.h:212
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Definition: MCStreamer.h:304
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:40
void setVariableValue(const MCExpr *Value)
Definition: MCSymbol.cpp:47
MCTargetAsmParser - Generic interface to target specific assembly parsers.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
const MCInstrInfo & MII
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn't/can't handle effec...
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
ParseInstruction - Parse one assembly instruction.
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
MatchAndEmitInstruction - Recognize a series of operands of a parsed instruction as an actual MCInst ...
Target specific streamer interface.
Definition: MCStreamer.h:93
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Represents a location in source code.
Definition: SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
constexpr const char * getPointer() const
Definition: SMLoc.h:34
constexpr bool isValid() const
Definition: SMLoc.h:29
Represents a range in source code.
Definition: SMLoc.h:48
SMLoc Start
Definition: SMLoc.h:50
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:290
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Register getReg() const
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:849
StringMapEntry - This is used to represent one value that is inserted into a StringMap.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition: StringRef.h:651
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:605
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:271
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
bool contains(StringRef key) const
Check if the set contains the given key.
Definition: StringSet.h:55
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:690
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
const CustomOperand< const MCSubtargetInfo & > Opr[]
int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
Key
PAL metadata keys.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
int64_t getMsgOpId(int64_t MsgId, const StringRef Name)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
const CustomOperand< const MCSubtargetInfo & > Msg[]
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned COMPONENTS[]
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool isGFX9(const MCSubtargetInfo &STI)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition: SIDefines.h:201
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:211
@ OPERAND_REG_INLINE_C_V2INT32
Definition: SIDefines.h:227
@ OPERAND_REG_INLINE_C_FP64
Definition: SIDefines.h:223
@ OPERAND_REG_INLINE_C_BF16
Definition: SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2BF16
Definition: SIDefines.h:225
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:212
@ OPERAND_REG_IMM_BF16
Definition: SIDefines.h:205
@ OPERAND_REG_INLINE_AC_V2FP16
Definition: SIDefines.h:246
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
Definition: SIDefines.h:200
@ OPERAND_REG_IMM_V2BF16
Definition: SIDefines.h:210
@ OPERAND_REG_IMM_BF16_DEFERRED
Definition: SIDefines.h:207
@ OPERAND_REG_IMM_FP16
Definition: SIDefines.h:206
@ OPERAND_REG_INLINE_C_INT64
Definition: SIDefines.h:219
@ OPERAND_REG_INLINE_AC_BF16
Definition: SIDefines.h:240
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition: SIDefines.h:217
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
Definition: SIDefines.h:238
@ OPERAND_REG_IMM_FP64
Definition: SIDefines.h:204
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:226
@ OPERAND_REG_INLINE_AC_V2INT16
Definition: SIDefines.h:244
@ OPERAND_REG_INLINE_AC_FP16
Definition: SIDefines.h:241
@ OPERAND_REG_INLINE_AC_INT32
Definition: SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition: SIDefines.h:242
@ OPERAND_REG_INLINE_AC_V2BF16
Definition: SIDefines.h:245
@ OPERAND_REG_IMM_V2INT32
Definition: SIDefines.h:213
@ OPERAND_REG_IMM_FP32
Definition: SIDefines.h:203
@ OPERAND_INPUT_MODS
Definition: SIDefines.h:251
@ OPERAND_REG_INLINE_C_FP32
Definition: SIDefines.h:222
@ OPERAND_REG_INLINE_C_INT32
Definition: SIDefines.h:218
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:224
@ OPERAND_REG_IMM_V2FP32
Definition: SIDefines.h:214
@ OPERAND_REG_INLINE_AC_FP64
Definition: SIDefines.h:243
@ OPERAND_REG_INLINE_C_FP16
Definition: SIDefines.h:221
@ OPERAND_REG_IMM_INT16
Definition: SIDefines.h:202
@ OPERAND_REG_INLINE_C_V2FP32
Definition: SIDefines.h:228
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition: SIDefines.h:231
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isVI(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1336
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
const uint64_t Version
Definition: InstrProf.h:1153
@ OPERAND_IMMEDIATE
Definition: MCInstrDesc.h:60
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Reg
All possible values of the reg field in the ModR/M byte.
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
Format
The format used for serializing/deserializing remarks.
Definition: RemarkFormat.h:25
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition: Error.h:1071
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition: Alignment.h:217
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:428
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:239
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
void PrintError(const Twine &Msg)
Definition: Error.cpp:101
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
Target & getTheR600Target()
The target for R600 GPUs.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:138
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:177
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:143
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:244
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
unsigned M0(unsigned Val)
Definition: VE.h:375
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
bool parseAmdKernelCodeField(StringRef ID, MCAsmParser &Parser, amd_kernel_code_t &C, raw_ostream &Err)
#define N
RegisterKind Kind
StringLiteral Name
AMD Kernel Code Object (amd_kernel_code_t).
Instruction set architecture version.
Definition: TargetParser.h:125
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
Represents the counter values to wait for in an s_waitcnt instruction.
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:249
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:247
static const fltSemantics & BFloat() LLVM_READNONE
Definition: APFloat.cpp:248
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:246
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...