LLVM 18.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
12#include "SIDefines.h"
13#include "SIInstrInfo.h"
14#include "SIRegisterInfo.h"
19#include "llvm/ADT/APFloat.h"
21#include "llvm/ADT/StringSet.h"
22#include "llvm/ADT/Twine.h"
25#include "llvm/MC/MCAsmInfo.h"
26#include "llvm/MC/MCContext.h"
27#include "llvm/MC/MCExpr.h"
28#include "llvm/MC/MCInst.h"
29#include "llvm/MC/MCInstrDesc.h"
34#include "llvm/MC/MCSymbol.h"
41#include <optional>
42
43using namespace llvm;
44using namespace llvm::AMDGPU;
45using namespace llvm::amdhsa;
46
47namespace {
48
49class AMDGPUAsmParser;
50
51enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
52
53//===----------------------------------------------------------------------===//
54// Operand
55//===----------------------------------------------------------------------===//
56
57class AMDGPUOperand : public MCParsedAsmOperand {
58 enum KindTy {
59 Token,
60 Immediate,
63 } Kind;
64
65 SMLoc StartLoc, EndLoc;
66 const AMDGPUAsmParser *AsmParser;
67
68public:
69 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
70 : Kind(Kind_), AsmParser(AsmParser_) {}
71
72 using Ptr = std::unique_ptr<AMDGPUOperand>;
73
74 struct Modifiers {
75 bool Abs = false;
76 bool Neg = false;
77 bool Sext = false;
78 bool Lit = false;
79
80 bool hasFPModifiers() const { return Abs || Neg; }
81 bool hasIntModifiers() const { return Sext; }
82 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
83
84 int64_t getFPModifiersOperand() const {
85 int64_t Operand = 0;
86 Operand |= Abs ? SISrcMods::ABS : 0u;
87 Operand |= Neg ? SISrcMods::NEG : 0u;
88 return Operand;
89 }
90
91 int64_t getIntModifiersOperand() const {
92 int64_t Operand = 0;
93 Operand |= Sext ? SISrcMods::SEXT : 0u;
94 return Operand;
95 }
96
97 int64_t getModifiersOperand() const {
98 assert(!(hasFPModifiers() && hasIntModifiers())
99 && "fp and int modifiers should not be used simultaneously");
100 if (hasFPModifiers()) {
101 return getFPModifiersOperand();
102 } else if (hasIntModifiers()) {
103 return getIntModifiersOperand();
104 } else {
105 return 0;
106 }
107 }
108
109 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
110 };
111
112 enum ImmTy {
113 ImmTyNone,
114 ImmTyGDS,
115 ImmTyLDS,
116 ImmTyOffen,
117 ImmTyIdxen,
118 ImmTyAddr64,
119 ImmTyOffset,
120 ImmTyInstOffset,
121 ImmTyOffset0,
122 ImmTyOffset1,
123 ImmTySMEMOffsetMod,
124 ImmTyCPol,
125 ImmTyTFE,
126 ImmTyD16,
127 ImmTyClampSI,
128 ImmTyOModSI,
129 ImmTySDWADstSel,
130 ImmTySDWASrc0Sel,
131 ImmTySDWASrc1Sel,
132 ImmTySDWADstUnused,
133 ImmTyDMask,
134 ImmTyDim,
135 ImmTyUNorm,
136 ImmTyDA,
137 ImmTyR128A16,
138 ImmTyA16,
139 ImmTyLWE,
140 ImmTyExpTgt,
141 ImmTyExpCompr,
142 ImmTyExpVM,
143 ImmTyFORMAT,
144 ImmTyHwreg,
145 ImmTyOff,
146 ImmTySendMsg,
147 ImmTyInterpSlot,
148 ImmTyInterpAttr,
149 ImmTyInterpAttrChan,
150 ImmTyOpSel,
151 ImmTyOpSelHi,
152 ImmTyNegLo,
153 ImmTyNegHi,
154 ImmTyDPP8,
155 ImmTyDppCtrl,
156 ImmTyDppRowMask,
157 ImmTyDppBankMask,
158 ImmTyDppBoundCtrl,
159 ImmTyDppFI,
160 ImmTySwizzle,
161 ImmTyGprIdxMode,
162 ImmTyHigh,
163 ImmTyBLGP,
164 ImmTyCBSZ,
165 ImmTyABID,
166 ImmTyEndpgm,
167 ImmTyWaitVDST,
168 ImmTyWaitEXP,
169 };
170
171 // Immediate operand kind.
172 // It helps to identify the location of an offending operand after an error.
173 // Note that regular literals and mandatory literals (KImm) must be handled
174 // differently. When looking for an offending operand, we should usually
175 // ignore mandatory literals because they are part of the instruction and
176 // cannot be changed. Report location of mandatory operands only for VOPD,
177 // when both OpX and OpY have a KImm and there are no other literals.
178 enum ImmKindTy {
179 ImmKindTyNone,
180 ImmKindTyLiteral,
181 ImmKindTyMandatoryLiteral,
182 ImmKindTyConst,
183 };
184
185private:
186 struct TokOp {
187 const char *Data;
188 unsigned Length;
189 };
190
191 struct ImmOp {
192 int64_t Val;
193 ImmTy Type;
194 bool IsFPImm;
195 mutable ImmKindTy Kind;
196 Modifiers Mods;
197 };
198
199 struct RegOp {
200 unsigned RegNo;
201 Modifiers Mods;
202 };
203
204 union {
205 TokOp Tok;
206 ImmOp Imm;
207 RegOp Reg;
208 const MCExpr *Expr;
209 };
210
211public:
212 bool isToken() const override { return Kind == Token; }
213
214 bool isSymbolRefExpr() const {
215 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
216 }
217
218 bool isImm() const override {
219 return Kind == Immediate;
220 }
221
222 void setImmKindNone() const {
223 assert(isImm());
224 Imm.Kind = ImmKindTyNone;
225 }
226
227 void setImmKindLiteral() const {
228 assert(isImm());
229 Imm.Kind = ImmKindTyLiteral;
230 }
231
232 void setImmKindMandatoryLiteral() const {
233 assert(isImm());
234 Imm.Kind = ImmKindTyMandatoryLiteral;
235 }
236
237 void setImmKindConst() const {
238 assert(isImm());
239 Imm.Kind = ImmKindTyConst;
240 }
241
242 bool IsImmKindLiteral() const {
243 return isImm() && Imm.Kind == ImmKindTyLiteral;
244 }
245
246 bool IsImmKindMandatoryLiteral() const {
247 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
248 }
249
250 bool isImmKindConst() const {
251 return isImm() && Imm.Kind == ImmKindTyConst;
252 }
253
254 bool isInlinableImm(MVT type) const;
255 bool isLiteralImm(MVT type) const;
256
257 bool isRegKind() const {
258 return Kind == Register;
259 }
260
261 bool isReg() const override {
262 return isRegKind() && !hasModifiers();
263 }
264
265 bool isRegOrInline(unsigned RCID, MVT type) const {
266 return isRegClass(RCID) || isInlinableImm(type);
267 }
268
269 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
270 return isRegOrInline(RCID, type) || isLiteralImm(type);
271 }
272
273 bool isRegOrImmWithInt16InputMods() const {
274 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
275 }
276
277 bool isRegOrImmWithIntT16InputMods() const {
278 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
279 }
280
281 bool isRegOrImmWithInt32InputMods() const {
282 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
283 }
284
285 bool isRegOrInlineImmWithInt16InputMods() const {
286 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
287 }
288
289 bool isRegOrInlineImmWithInt32InputMods() const {
290 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
291 }
292
293 bool isRegOrImmWithInt64InputMods() const {
294 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
295 }
296
297 bool isRegOrImmWithFP16InputMods() const {
298 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
299 }
300
301 bool isRegOrImmWithFPT16InputMods() const {
302 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
303 }
304
305 bool isRegOrImmWithFP32InputMods() const {
306 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
307 }
308
309 bool isRegOrImmWithFP64InputMods() const {
310 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
311 }
312
313 bool isRegOrInlineImmWithFP16InputMods() const {
314 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
315 }
316
317 bool isRegOrInlineImmWithFP32InputMods() const {
318 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
319 }
320
321
322 bool isVReg() const {
323 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
324 isRegClass(AMDGPU::VReg_64RegClassID) ||
325 isRegClass(AMDGPU::VReg_96RegClassID) ||
326 isRegClass(AMDGPU::VReg_128RegClassID) ||
327 isRegClass(AMDGPU::VReg_160RegClassID) ||
328 isRegClass(AMDGPU::VReg_192RegClassID) ||
329 isRegClass(AMDGPU::VReg_256RegClassID) ||
330 isRegClass(AMDGPU::VReg_512RegClassID) ||
331 isRegClass(AMDGPU::VReg_1024RegClassID);
332 }
333
334 bool isVReg32() const {
335 return isRegClass(AMDGPU::VGPR_32RegClassID);
336 }
337
338 bool isVReg32OrOff() const {
339 return isOff() || isVReg32();
340 }
341
342 bool isNull() const {
343 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
344 }
345
346 bool isVRegWithInputMods() const;
347 bool isT16VRegWithInputMods() const;
348
349 bool isSDWAOperand(MVT type) const;
350 bool isSDWAFP16Operand() const;
351 bool isSDWAFP32Operand() const;
352 bool isSDWAInt16Operand() const;
353 bool isSDWAInt32Operand() const;
354
355 bool isImmTy(ImmTy ImmT) const {
356 return isImm() && Imm.Type == ImmT;
357 }
358
359 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
360
361 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
362
363 bool isImmModifier() const {
364 return isImm() && Imm.Type != ImmTyNone;
365 }
366
367 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
368 bool isDMask() const { return isImmTy(ImmTyDMask); }
369 bool isDim() const { return isImmTy(ImmTyDim); }
370 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
371 bool isOff() const { return isImmTy(ImmTyOff); }
372 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
373 bool isOffen() const { return isImmTy(ImmTyOffen); }
374 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
375 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
376 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
377 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
378 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
379 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
380 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
381 bool isGDS() const { return isImmTy(ImmTyGDS); }
382 bool isLDS() const { return isImmTy(ImmTyLDS); }
383 bool isCPol() const { return isImmTy(ImmTyCPol); }
384 bool isTFE() const { return isImmTy(ImmTyTFE); }
385 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
386 bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); }
387 bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); }
388 bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
389 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
390 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
391 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
392 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
393 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
394 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
395 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
396 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
397 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
398 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
399 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
400 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
401
402 bool isRegOrImm() const {
403 return isReg() || isImm();
404 }
405
406 bool isRegClass(unsigned RCID) const;
407
408 bool isInlineValue() const;
409
410 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
411 return isRegOrInline(RCID, type) && !hasModifiers();
412 }
413
414 bool isSCSrcB16() const {
415 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
416 }
417
418 bool isSCSrcV2B16() const {
419 return isSCSrcB16();
420 }
421
422 bool isSCSrcB32() const {
423 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
424 }
425
426 bool isSCSrcB64() const {
427 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
428 }
429
430 bool isBoolReg() const;
431
432 bool isSCSrcF16() const {
433 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
434 }
435
436 bool isSCSrcV2F16() const {
437 return isSCSrcF16();
438 }
439
440 bool isSCSrcF32() const {
441 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
442 }
443
444 bool isSCSrcF64() const {
445 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
446 }
447
448 bool isSSrcB32() const {
449 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
450 }
451
452 bool isSSrcB16() const {
453 return isSCSrcB16() || isLiteralImm(MVT::i16);
454 }
455
456 bool isSSrcV2B16() const {
457 llvm_unreachable("cannot happen");
458 return isSSrcB16();
459 }
460
461 bool isSSrcB64() const {
462 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
463 // See isVSrc64().
464 return isSCSrcB64() || isLiteralImm(MVT::i64);
465 }
466
467 bool isSSrcF32() const {
468 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
469 }
470
471 bool isSSrcF64() const {
472 return isSCSrcB64() || isLiteralImm(MVT::f64);
473 }
474
475 bool isSSrcF16() const {
476 return isSCSrcB16() || isLiteralImm(MVT::f16);
477 }
478
479 bool isSSrcV2F16() const {
480 llvm_unreachable("cannot happen");
481 return isSSrcF16();
482 }
483
484 bool isSSrcV2FP32() const {
485 llvm_unreachable("cannot happen");
486 return isSSrcF32();
487 }
488
489 bool isSCSrcV2FP32() const {
490 llvm_unreachable("cannot happen");
491 return isSCSrcF32();
492 }
493
494 bool isSSrcV2INT32() const {
495 llvm_unreachable("cannot happen");
496 return isSSrcB32();
497 }
498
499 bool isSCSrcV2INT32() const {
500 llvm_unreachable("cannot happen");
501 return isSCSrcB32();
502 }
503
504 bool isSSrcOrLdsB32() const {
505 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
506 isLiteralImm(MVT::i32) || isExpr();
507 }
508
509 bool isVCSrcB32() const {
510 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
511 }
512
513 bool isVCSrcB64() const {
514 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
515 }
516
517 bool isVCSrcTB16() const {
518 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
519 }
520
521 bool isVCSrcTB16_Lo128() const {
522 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
523 }
524
525 bool isVCSrcFake16B16_Lo128() const {
526 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
527 }
528
529 bool isVCSrcB16() const {
530 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
531 }
532
533 bool isVCSrcV2B16() const {
534 return isVCSrcB16();
535 }
536
537 bool isVCSrcF32() const {
538 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
539 }
540
541 bool isVCSrcF64() const {
542 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
543 }
544
545 bool isVCSrcTF16() const {
546 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
547 }
548
549 bool isVCSrcTF16_Lo128() const {
550 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
551 }
552
553 bool isVCSrcFake16F16_Lo128() const {
554 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
555 }
556
557 bool isVCSrcF16() const {
558 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
559 }
560
561 bool isVCSrcV2F16() const {
562 return isVCSrcF16();
563 }
564
565 bool isVSrcB32() const {
566 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
567 }
568
569 bool isVSrcB64() const {
570 return isVCSrcF64() || isLiteralImm(MVT::i64);
571 }
572
573 bool isVSrcTB16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
574
575 bool isVSrcTB16_Lo128() const {
576 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
577 }
578
579 bool isVSrcFake16B16_Lo128() const {
580 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
581 }
582
583 bool isVSrcB16() const {
584 return isVCSrcB16() || isLiteralImm(MVT::i16);
585 }
586
587 bool isVSrcV2B16() const {
588 return isVSrcB16() || isLiteralImm(MVT::v2i16);
589 }
590
591 bool isVCSrcV2FP32() const {
592 return isVCSrcF64();
593 }
594
595 bool isVSrcV2FP32() const {
596 return isVSrcF64() || isLiteralImm(MVT::v2f32);
597 }
598
599 bool isVCSrcV2INT32() const {
600 return isVCSrcB64();
601 }
602
603 bool isVSrcV2INT32() const {
604 return isVSrcB64() || isLiteralImm(MVT::v2i32);
605 }
606
607 bool isVSrcF32() const {
608 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
609 }
610
611 bool isVSrcF64() const {
612 return isVCSrcF64() || isLiteralImm(MVT::f64);
613 }
614
615 bool isVSrcTF16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
616
617 bool isVSrcTF16_Lo128() const {
618 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
619 }
620
621 bool isVSrcFake16F16_Lo128() const {
622 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
623 }
624
625 bool isVSrcF16() const {
626 return isVCSrcF16() || isLiteralImm(MVT::f16);
627 }
628
629 bool isVSrcV2F16() const {
630 return isVSrcF16() || isLiteralImm(MVT::v2f16);
631 }
632
633 bool isVISrcB32() const {
634 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
635 }
636
637 bool isVISrcB16() const {
638 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
639 }
640
641 bool isVISrcV2B16() const {
642 return isVISrcB16();
643 }
644
645 bool isVISrcF32() const {
646 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
647 }
648
649 bool isVISrcF16() const {
650 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
651 }
652
653 bool isVISrcV2F16() const {
654 return isVISrcF16() || isVISrcB32();
655 }
656
657 bool isVISrc_64B64() const {
658 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
659 }
660
661 bool isVISrc_64F64() const {
662 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
663 }
664
665 bool isVISrc_64V2FP32() const {
666 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
667 }
668
669 bool isVISrc_64V2INT32() const {
670 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
671 }
672
673 bool isVISrc_256B64() const {
674 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
675 }
676
677 bool isVISrc_256F64() const {
678 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
679 }
680
681 bool isVISrc_128B16() const {
682 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
683 }
684
685 bool isVISrc_128V2B16() const {
686 return isVISrc_128B16();
687 }
688
689 bool isVISrc_128B32() const {
690 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
691 }
692
693 bool isVISrc_128F32() const {
694 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
695 }
696
697 bool isVISrc_256V2FP32() const {
698 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
699 }
700
701 bool isVISrc_256V2INT32() const {
702 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
703 }
704
705 bool isVISrc_512B32() const {
706 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
707 }
708
709 bool isVISrc_512B16() const {
710 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
711 }
712
713 bool isVISrc_512V2B16() const {
714 return isVISrc_512B16();
715 }
716
717 bool isVISrc_512F32() const {
718 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
719 }
720
721 bool isVISrc_512F16() const {
722 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
723 }
724
725 bool isVISrc_512V2F16() const {
726 return isVISrc_512F16() || isVISrc_512B32();
727 }
728
729 bool isVISrc_1024B32() const {
730 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
731 }
732
733 bool isVISrc_1024B16() const {
734 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
735 }
736
737 bool isVISrc_1024V2B16() const {
738 return isVISrc_1024B16();
739 }
740
741 bool isVISrc_1024F32() const {
742 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
743 }
744
745 bool isVISrc_1024F16() const {
746 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
747 }
748
749 bool isVISrc_1024V2F16() const {
750 return isVISrc_1024F16() || isVISrc_1024B32();
751 }
752
753 bool isAISrcB32() const {
754 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
755 }
756
757 bool isAISrcB16() const {
758 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
759 }
760
761 bool isAISrcV2B16() const {
762 return isAISrcB16();
763 }
764
765 bool isAISrcF32() const {
766 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
767 }
768
769 bool isAISrcF16() const {
770 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
771 }
772
773 bool isAISrcV2F16() const {
774 return isAISrcF16() || isAISrcB32();
775 }
776
777 bool isAISrc_64B64() const {
778 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
779 }
780
781 bool isAISrc_64F64() const {
782 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
783 }
784
785 bool isAISrc_128B32() const {
786 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
787 }
788
789 bool isAISrc_128B16() const {
790 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
791 }
792
793 bool isAISrc_128V2B16() const {
794 return isAISrc_128B16();
795 }
796
797 bool isAISrc_128F32() const {
798 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
799 }
800
801 bool isAISrc_128F16() const {
802 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
803 }
804
805 bool isAISrc_128V2F16() const {
806 return isAISrc_128F16() || isAISrc_128B32();
807 }
808
809 bool isVISrc_128F16() const {
810 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
811 }
812
813 bool isVISrc_128V2F16() const {
814 return isVISrc_128F16() || isVISrc_128B32();
815 }
816
817 bool isAISrc_256B64() const {
818 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
819 }
820
821 bool isAISrc_256F64() const {
822 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
823 }
824
825 bool isAISrc_512B32() const {
826 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
827 }
828
829 bool isAISrc_512B16() const {
830 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
831 }
832
833 bool isAISrc_512V2B16() const {
834 return isAISrc_512B16();
835 }
836
837 bool isAISrc_512F32() const {
838 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
839 }
840
841 bool isAISrc_512F16() const {
842 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
843 }
844
845 bool isAISrc_512V2F16() const {
846 return isAISrc_512F16() || isAISrc_512B32();
847 }
848
849 bool isAISrc_1024B32() const {
850 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
851 }
852
853 bool isAISrc_1024B16() const {
854 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
855 }
856
857 bool isAISrc_1024V2B16() const {
858 return isAISrc_1024B16();
859 }
860
861 bool isAISrc_1024F32() const {
862 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
863 }
864
865 bool isAISrc_1024F16() const {
866 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
867 }
868
869 bool isAISrc_1024V2F16() const {
870 return isAISrc_1024F16() || isAISrc_1024B32();
871 }
872
873 bool isKImmFP32() const {
874 return isLiteralImm(MVT::f32);
875 }
876
877 bool isKImmFP16() const {
878 return isLiteralImm(MVT::f16);
879 }
880
881 bool isMem() const override {
882 return false;
883 }
884
885 bool isExpr() const {
886 return Kind == Expression;
887 }
888
889 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
890
891 bool isSWaitCnt() const;
892 bool isDepCtr() const;
893 bool isSDelayALU() const;
894 bool isHwreg() const;
895 bool isSendMsg() const;
896 bool isSwizzle() const;
897 bool isSMRDOffset8() const;
898 bool isSMEMOffset() const;
899 bool isSMRDLiteralOffset() const;
900 bool isDPP8() const;
901 bool isDPPCtrl() const;
902 bool isBLGP() const;
903 bool isCBSZ() const;
904 bool isABID() const;
905 bool isGPRIdxMode() const;
906 bool isS16Imm() const;
907 bool isU16Imm() const;
908 bool isEndpgm() const;
909 bool isWaitVDST() const;
910 bool isWaitEXP() const;
911
912 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
913 return std::bind(P, *this);
914 }
915
916 StringRef getToken() const {
917 assert(isToken());
918 return StringRef(Tok.Data, Tok.Length);
919 }
920
921 int64_t getImm() const {
922 assert(isImm());
923 return Imm.Val;
924 }
925
926 void setImm(int64_t Val) {
927 assert(isImm());
928 Imm.Val = Val;
929 }
930
931 ImmTy getImmTy() const {
932 assert(isImm());
933 return Imm.Type;
934 }
935
936 unsigned getReg() const override {
937 assert(isRegKind());
938 return Reg.RegNo;
939 }
940
941 SMLoc getStartLoc() const override {
942 return StartLoc;
943 }
944
945 SMLoc getEndLoc() const override {
946 return EndLoc;
947 }
948
949 SMRange getLocRange() const {
950 return SMRange(StartLoc, EndLoc);
951 }
952
953 Modifiers getModifiers() const {
954 assert(isRegKind() || isImmTy(ImmTyNone));
955 return isRegKind() ? Reg.Mods : Imm.Mods;
956 }
957
958 void setModifiers(Modifiers Mods) {
959 assert(isRegKind() || isImmTy(ImmTyNone));
960 if (isRegKind())
961 Reg.Mods = Mods;
962 else
963 Imm.Mods = Mods;
964 }
965
966 bool hasModifiers() const {
967 return getModifiers().hasModifiers();
968 }
969
970 bool hasFPModifiers() const {
971 return getModifiers().hasFPModifiers();
972 }
973
974 bool hasIntModifiers() const {
975 return getModifiers().hasIntModifiers();
976 }
977
978 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
979
980 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
981
982 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
983
984 void addRegOperands(MCInst &Inst, unsigned N) const;
985
986 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
987 if (isRegKind())
988 addRegOperands(Inst, N);
989 else
990 addImmOperands(Inst, N);
991 }
992
993 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
994 Modifiers Mods = getModifiers();
995 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
996 if (isRegKind()) {
997 addRegOperands(Inst, N);
998 } else {
999 addImmOperands(Inst, N, false);
1000 }
1001 }
1002
1003 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1004 assert(!hasIntModifiers());
1005 addRegOrImmWithInputModsOperands(Inst, N);
1006 }
1007
1008 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1009 assert(!hasFPModifiers());
1010 addRegOrImmWithInputModsOperands(Inst, N);
1011 }
1012
1013 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1014 Modifiers Mods = getModifiers();
1015 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1016 assert(isRegKind());
1017 addRegOperands(Inst, N);
1018 }
1019
1020 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1021 assert(!hasIntModifiers());
1022 addRegWithInputModsOperands(Inst, N);
1023 }
1024
1025 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1026 assert(!hasFPModifiers());
1027 addRegWithInputModsOperands(Inst, N);
1028 }
1029
1030 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1031 switch (Type) {
1032 case ImmTyNone: OS << "None"; break;
1033 case ImmTyGDS: OS << "GDS"; break;
1034 case ImmTyLDS: OS << "LDS"; break;
1035 case ImmTyOffen: OS << "Offen"; break;
1036 case ImmTyIdxen: OS << "Idxen"; break;
1037 case ImmTyAddr64: OS << "Addr64"; break;
1038 case ImmTyOffset: OS << "Offset"; break;
1039 case ImmTyInstOffset: OS << "InstOffset"; break;
1040 case ImmTyOffset0: OS << "Offset0"; break;
1041 case ImmTyOffset1: OS << "Offset1"; break;
1042 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1043 case ImmTyCPol: OS << "CPol"; break;
1044 case ImmTyTFE: OS << "TFE"; break;
1045 case ImmTyD16: OS << "D16"; break;
1046 case ImmTyFORMAT: OS << "FORMAT"; break;
1047 case ImmTyClampSI: OS << "ClampSI"; break;
1048 case ImmTyOModSI: OS << "OModSI"; break;
1049 case ImmTyDPP8: OS << "DPP8"; break;
1050 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1051 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1052 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1053 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1054 case ImmTyDppFI: OS << "DppFI"; break;
1055 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1056 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1057 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1058 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1059 case ImmTyDMask: OS << "DMask"; break;
1060 case ImmTyDim: OS << "Dim"; break;
1061 case ImmTyUNorm: OS << "UNorm"; break;
1062 case ImmTyDA: OS << "DA"; break;
1063 case ImmTyR128A16: OS << "R128A16"; break;
1064 case ImmTyA16: OS << "A16"; break;
1065 case ImmTyLWE: OS << "LWE"; break;
1066 case ImmTyOff: OS << "Off"; break;
1067 case ImmTyExpTgt: OS << "ExpTgt"; break;
1068 case ImmTyExpCompr: OS << "ExpCompr"; break;
1069 case ImmTyExpVM: OS << "ExpVM"; break;
1070 case ImmTyHwreg: OS << "Hwreg"; break;
1071 case ImmTySendMsg: OS << "SendMsg"; break;
1072 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1073 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1074 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1075 case ImmTyOpSel: OS << "OpSel"; break;
1076 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1077 case ImmTyNegLo: OS << "NegLo"; break;
1078 case ImmTyNegHi: OS << "NegHi"; break;
1079 case ImmTySwizzle: OS << "Swizzle"; break;
1080 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1081 case ImmTyHigh: OS << "High"; break;
1082 case ImmTyBLGP: OS << "BLGP"; break;
1083 case ImmTyCBSZ: OS << "CBSZ"; break;
1084 case ImmTyABID: OS << "ABID"; break;
1085 case ImmTyEndpgm: OS << "Endpgm"; break;
1086 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1087 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1088 }
1089 }
1090
1091 void print(raw_ostream &OS) const override {
1092 switch (Kind) {
1093 case Register:
1094 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1095 break;
1096 case Immediate:
1097 OS << '<' << getImm();
1098 if (getImmTy() != ImmTyNone) {
1099 OS << " type: "; printImmTy(OS, getImmTy());
1100 }
1101 OS << " mods: " << Imm.Mods << '>';
1102 break;
1103 case Token:
1104 OS << '\'' << getToken() << '\'';
1105 break;
1106 case Expression:
1107 OS << "<expr " << *Expr << '>';
1108 break;
1109 }
1110 }
1111
1112 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1113 int64_t Val, SMLoc Loc,
1114 ImmTy Type = ImmTyNone,
1115 bool IsFPImm = false) {
1116 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1117 Op->Imm.Val = Val;
1118 Op->Imm.IsFPImm = IsFPImm;
1119 Op->Imm.Kind = ImmKindTyNone;
1120 Op->Imm.Type = Type;
1121 Op->Imm.Mods = Modifiers();
1122 Op->StartLoc = Loc;
1123 Op->EndLoc = Loc;
1124 return Op;
1125 }
1126
1127 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1128 StringRef Str, SMLoc Loc,
1129 bool HasExplicitEncodingSize = true) {
1130 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1131 Res->Tok.Data = Str.data();
1132 Res->Tok.Length = Str.size();
1133 Res->StartLoc = Loc;
1134 Res->EndLoc = Loc;
1135 return Res;
1136 }
1137
1138 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1139 unsigned RegNo, SMLoc S,
1140 SMLoc E) {
1141 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1142 Op->Reg.RegNo = RegNo;
1143 Op->Reg.Mods = Modifiers();
1144 Op->StartLoc = S;
1145 Op->EndLoc = E;
1146 return Op;
1147 }
1148
1149 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1150 const class MCExpr *Expr, SMLoc S) {
1151 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1152 Op->Expr = Expr;
1153 Op->StartLoc = S;
1154 Op->EndLoc = S;
1155 return Op;
1156 }
1157};
1158
1159raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1160 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1161 return OS;
1162}
1163
1164//===----------------------------------------------------------------------===//
1165// AsmParser
1166//===----------------------------------------------------------------------===//
1167
1168// Holds info related to the current kernel, e.g. count of SGPRs used.
1169// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1170// .amdgpu_hsa_kernel or at EOF.
1171class KernelScopeInfo {
1172 int SgprIndexUnusedMin = -1;
1173 int VgprIndexUnusedMin = -1;
1174 int AgprIndexUnusedMin = -1;
1175 MCContext *Ctx = nullptr;
1176 MCSubtargetInfo const *MSTI = nullptr;
1177
1178 void usesSgprAt(int i) {
1179 if (i >= SgprIndexUnusedMin) {
1180 SgprIndexUnusedMin = ++i;
1181 if (Ctx) {
1182 MCSymbol* const Sym =
1183 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1184 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1185 }
1186 }
1187 }
1188
1189 void usesVgprAt(int i) {
1190 if (i >= VgprIndexUnusedMin) {
1191 VgprIndexUnusedMin = ++i;
1192 if (Ctx) {
1193 MCSymbol* const Sym =
1194 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1195 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1196 VgprIndexUnusedMin);
1197 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1198 }
1199 }
1200 }
1201
1202 void usesAgprAt(int i) {
1203 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1204 if (!hasMAIInsts(*MSTI))
1205 return;
1206
1207 if (i >= AgprIndexUnusedMin) {
1208 AgprIndexUnusedMin = ++i;
1209 if (Ctx) {
1210 MCSymbol* const Sym =
1211 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1212 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1213
1214 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1215 MCSymbol* const vSym =
1216 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1217 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1218 VgprIndexUnusedMin);
1219 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1220 }
1221 }
1222 }
1223
1224public:
1225 KernelScopeInfo() = default;
1226
1227 void initialize(MCContext &Context) {
1228 Ctx = &Context;
1229 MSTI = Ctx->getSubtargetInfo();
1230
1231 usesSgprAt(SgprIndexUnusedMin = -1);
1232 usesVgprAt(VgprIndexUnusedMin = -1);
1233 if (hasMAIInsts(*MSTI)) {
1234 usesAgprAt(AgprIndexUnusedMin = -1);
1235 }
1236 }
1237
1238 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1239 unsigned RegWidth) {
1240 switch (RegKind) {
1241 case IS_SGPR:
1242 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1243 break;
1244 case IS_AGPR:
1245 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1246 break;
1247 case IS_VGPR:
1248 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1249 break;
1250 default:
1251 break;
1252 }
1253 }
1254};
1255
1256class AMDGPUAsmParser : public MCTargetAsmParser {
1257 MCAsmParser &Parser;
1258
1259 unsigned ForcedEncodingSize = 0;
1260 bool ForcedDPP = false;
1261 bool ForcedSDWA = false;
1262 KernelScopeInfo KernelScope;
1263
1264 /// @name Auto-generated Match Functions
1265 /// {
1266
1267#define GET_ASSEMBLER_HEADER
1268#include "AMDGPUGenAsmMatcher.inc"
1269
1270 /// }
1271
1272private:
1273 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1274 bool OutOfRangeError(SMRange Range);
1275 /// Calculate VGPR/SGPR blocks required for given target, reserved
1276 /// registers, and user-specified NextFreeXGPR values.
1277 ///
1278 /// \param Features [in] Target features, used for bug corrections.
1279 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1280 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1281 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1282 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1283 /// descriptor field, if valid.
1284 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1285 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1286 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1287 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1288 /// \param VGPRBlocks [out] Result VGPR block count.
1289 /// \param SGPRBlocks [out] Result SGPR block count.
1290 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1291 bool FlatScrUsed, bool XNACKUsed,
1292 std::optional<bool> EnableWavefrontSize32,
1293 unsigned NextFreeVGPR, SMRange VGPRRange,
1294 unsigned NextFreeSGPR, SMRange SGPRRange,
1295 unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1296 bool ParseDirectiveAMDGCNTarget();
1297 bool ParseDirectiveAMDHSAKernel();
1298 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1299 bool ParseDirectiveHSACodeObjectVersion();
1300 bool ParseDirectiveHSACodeObjectISA();
1301 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1302 bool ParseDirectiveAMDKernelCodeT();
1303 // TODO: Possibly make subtargetHasRegister const.
1304 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1305 bool ParseDirectiveAMDGPUHsaKernel();
1306
1307 bool ParseDirectiveISAVersion();
1308 bool ParseDirectiveHSAMetadata();
1309 bool ParseDirectivePALMetadataBegin();
1310 bool ParseDirectivePALMetadata();
1311 bool ParseDirectiveAMDGPULDS();
1312
1313 /// Common code to parse out a block of text (typically YAML) between start and
1314 /// end directives.
1315 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1316 const char *AssemblerDirectiveEnd,
1317 std::string &CollectString);
1318
1319 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1320 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1321 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1322 unsigned &RegNum, unsigned &RegWidth,
1323 bool RestoreOnFailure = false);
1324 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1325 unsigned &RegNum, unsigned &RegWidth,
1327 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1328 unsigned &RegWidth,
1330 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1331 unsigned &RegWidth,
1333 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1334 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1335 bool ParseRegRange(unsigned& Num, unsigned& Width);
1336 unsigned getRegularReg(RegisterKind RegKind,
1337 unsigned RegNum,
1338 unsigned RegWidth,
1339 SMLoc Loc);
1340
1341 bool isRegister();
1342 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1343 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1344 void initializeGprCountSymbol(RegisterKind RegKind);
1345 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1346 unsigned RegWidth);
1347 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1348 bool IsAtomic);
1349
1350public:
1351 enum AMDGPUMatchResultTy {
1352 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1353 };
1354 enum OperandMode {
1355 OperandMode_Default,
1356 OperandMode_NSA,
1357 };
1358
1359 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1360
1361 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1362 const MCInstrInfo &MII,
1363 const MCTargetOptions &Options)
1364 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1366
1367 if (getFeatureBits().none()) {
1368 // Set default features.
1369 copySTI().ToggleFeature("southern-islands");
1370 }
1371
1372 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1373
1374 {
1375 // TODO: make those pre-defined variables read-only.
1376 // Currently there is none suitable machinery in the core llvm-mc for this.
1377 // MCSymbol::isRedefinable is intended for another purpose, and
1378 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1380 MCContext &Ctx = getContext();
1381 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1382 MCSymbol *Sym =
1383 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1384 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1385 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1386 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1387 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1388 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1389 } else {
1390 MCSymbol *Sym =
1391 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1392 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1393 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1394 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1395 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1396 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1397 }
1398 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1399 initializeGprCountSymbol(IS_VGPR);
1400 initializeGprCountSymbol(IS_SGPR);
1401 } else
1402 KernelScope.initialize(getContext());
1403 }
1404 }
1405
1406 bool hasMIMG_R128() const {
1407 return AMDGPU::hasMIMG_R128(getSTI());
1408 }
1409
1410 bool hasPackedD16() const {
1411 return AMDGPU::hasPackedD16(getSTI());
1412 }
1413
1414 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1415
1416 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1417
1418 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1419
1420 bool isSI() const {
1421 return AMDGPU::isSI(getSTI());
1422 }
1423
1424 bool isCI() const {
1425 return AMDGPU::isCI(getSTI());
1426 }
1427
1428 bool isVI() const {
1429 return AMDGPU::isVI(getSTI());
1430 }
1431
1432 bool isGFX9() const {
1433 return AMDGPU::isGFX9(getSTI());
1434 }
1435
1436 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1437 bool isGFX90A() const {
1438 return AMDGPU::isGFX90A(getSTI());
1439 }
1440
1441 bool isGFX940() const {
1442 return AMDGPU::isGFX940(getSTI());
1443 }
1444
1445 bool isGFX9Plus() const {
1446 return AMDGPU::isGFX9Plus(getSTI());
1447 }
1448
1449 bool isGFX10() const {
1450 return AMDGPU::isGFX10(getSTI());
1451 }
1452
1453 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1454
1455 bool isGFX11() const {
1456 return AMDGPU::isGFX11(getSTI());
1457 }
1458
1459 bool isGFX11Plus() const {
1460 return AMDGPU::isGFX11Plus(getSTI());
1461 }
1462
1463 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1464
1465 bool isGFX10_BEncoding() const {
1467 }
1468
1469 bool hasInv2PiInlineImm() const {
1470 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1471 }
1472
1473 bool hasFlatOffsets() const {
1474 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1475 }
1476
1477 bool hasArchitectedFlatScratch() const {
1478 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1479 }
1480
1481 bool hasSGPR102_SGPR103() const {
1482 return !isVI() && !isGFX9();
1483 }
1484
1485 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1486
1487 bool hasIntClamp() const {
1488 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1489 }
1490
1491 bool hasPartialNSAEncoding() const {
1492 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1493 }
1494
1495 unsigned getNSAMaxSize() const {
1496 return AMDGPU::getNSAMaxSize(getSTI());
1497 }
1498
1499 unsigned getMaxNumUserSGPRs() const {
1501 }
1502
1503 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1504
1505 AMDGPUTargetStreamer &getTargetStreamer() {
1507 return static_cast<AMDGPUTargetStreamer &>(TS);
1508 }
1509
1510 const MCRegisterInfo *getMRI() const {
1511 // We need this const_cast because for some reason getContext() is not const
1512 // in MCAsmParser.
1513 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1514 }
1515
1516 const MCInstrInfo *getMII() const {
1517 return &MII;
1518 }
1519
1520 const FeatureBitset &getFeatureBits() const {
1521 return getSTI().getFeatureBits();
1522 }
1523
1524 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1525 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1526 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1527
1528 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1529 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1530 bool isForcedDPP() const { return ForcedDPP; }
1531 bool isForcedSDWA() const { return ForcedSDWA; }
1532 ArrayRef<unsigned> getMatchedVariants() const;
1533 StringRef getMatchedVariantName() const;
1534
1535 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1536 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1537 bool RestoreOnFailure);
1538 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1540 SMLoc &EndLoc) override;
1541 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1543 unsigned Kind) override;
1544 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1547 bool MatchingInlineAsm) override;
1548 bool ParseDirective(AsmToken DirectiveID) override;
1549 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1550 OperandMode Mode = OperandMode_Default);
1551 StringRef parseMnemonicSuffix(StringRef Name);
1553 SMLoc NameLoc, OperandVector &Operands) override;
1554 //bool ProcessInstruction(MCInst &Inst);
1555
1557
1558 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1559
1561 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1562 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1563 std::function<bool(int64_t &)> ConvertResult = nullptr);
1564
1565 ParseStatus parseOperandArrayWithPrefix(
1566 const char *Prefix, OperandVector &Operands,
1567 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1568 bool (*ConvertResult)(int64_t &) = nullptr);
1569
1571 parseNamedBit(StringRef Name, OperandVector &Operands,
1572 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1573 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1575 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1576 SMLoc &StringLoc);
1577
1578 bool isModifier();
1579 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1580 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1581 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1582 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1583 bool parseSP3NegModifier();
1584 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1585 bool HasLit = false);
1587 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1588 bool HasLit = false);
1589 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1590 bool AllowImm = true);
1591 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1592 bool AllowImm = true);
1593 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1594 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1595 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1596 ParseStatus parseDfmtNfmt(int64_t &Format);
1597 ParseStatus parseUfmt(int64_t &Format);
1598 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1599 int64_t &Format);
1600 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1601 int64_t &Format);
1602 ParseStatus parseFORMAT(OperandVector &Operands);
1603 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1604 ParseStatus parseNumericFormat(int64_t &Format);
1605 ParseStatus parseFlatOffset(OperandVector &Operands);
1606 ParseStatus parseR128A16(OperandVector &Operands);
1608 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1609 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1610
1611 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1612
1613 bool parseCnt(int64_t &IntVal);
1614 ParseStatus parseSWaitCnt(OperandVector &Operands);
1615
1616 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1617 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1618 ParseStatus parseDepCtr(OperandVector &Operands);
1619
1620 bool parseDelay(int64_t &Delay);
1621 ParseStatus parseSDelayALU(OperandVector &Operands);
1622
1623 ParseStatus parseHwreg(OperandVector &Operands);
1624
1625private:
1626 struct OperandInfoTy {
1627 SMLoc Loc;
1628 int64_t Id;
1629 bool IsSymbolic = false;
1630 bool IsDefined = false;
1631
1632 OperandInfoTy(int64_t Id_) : Id(Id_) {}
1633 };
1634
1635 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1636 bool validateSendMsg(const OperandInfoTy &Msg,
1637 const OperandInfoTy &Op,
1638 const OperandInfoTy &Stream);
1639
1640 bool parseHwregBody(OperandInfoTy &HwReg,
1641 OperandInfoTy &Offset,
1642 OperandInfoTy &Width);
1643 bool validateHwreg(const OperandInfoTy &HwReg,
1644 const OperandInfoTy &Offset,
1645 const OperandInfoTy &Width);
1646
1647 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1648 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1649 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1650
1651 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1652 const OperandVector &Operands) const;
1653 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1654 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1655 SMLoc getLitLoc(const OperandVector &Operands,
1656 bool SearchMandatoryLiterals = false) const;
1657 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1658 SMLoc getConstLoc(const OperandVector &Operands) const;
1659 SMLoc getInstLoc(const OperandVector &Operands) const;
1660
1661 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1662 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1663 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1664 bool validateSOPLiteral(const MCInst &Inst) const;
1665 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1666 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1667 const OperandVector &Operands);
1668 bool validateIntClampSupported(const MCInst &Inst);
1669 bool validateMIMGAtomicDMask(const MCInst &Inst);
1670 bool validateMIMGGatherDMask(const MCInst &Inst);
1671 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1672 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1673 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1674 bool validateMIMGD16(const MCInst &Inst);
1675 bool validateMIMGMSAA(const MCInst &Inst);
1676 bool validateOpSel(const MCInst &Inst);
1677 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1678 bool validateVccOperand(unsigned Reg) const;
1679 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1680 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1681 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1682 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1683 bool validateAGPRLdSt(const MCInst &Inst) const;
1684 bool validateVGPRAlign(const MCInst &Inst) const;
1685 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1686 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1687 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1688 bool validateDivScale(const MCInst &Inst);
1689 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1690 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1691 const SMLoc &IDLoc);
1692 bool validateExeczVcczOperands(const OperandVector &Operands);
1693 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1694 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1695 unsigned getConstantBusLimit(unsigned Opcode) const;
1696 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1697 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1698 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1699
1700 bool isSupportedMnemo(StringRef Mnemo,
1701 const FeatureBitset &FBS);
1702 bool isSupportedMnemo(StringRef Mnemo,
1703 const FeatureBitset &FBS,
1704 ArrayRef<unsigned> Variants);
1705 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1706
1707 bool isId(const StringRef Id) const;
1708 bool isId(const AsmToken &Token, const StringRef Id) const;
1709 bool isToken(const AsmToken::TokenKind Kind) const;
1710 StringRef getId() const;
1711 bool trySkipId(const StringRef Id);
1712 bool trySkipId(const StringRef Pref, const StringRef Id);
1713 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1714 bool trySkipToken(const AsmToken::TokenKind Kind);
1715 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1716 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1717 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1718
1719 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1720 AsmToken::TokenKind getTokenKind() const;
1721 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1722 bool parseExpr(OperandVector &Operands);
1723 StringRef getTokenStr() const;
1724 AsmToken peekToken(bool ShouldSkipSpace = true);
1725 AsmToken getToken() const;
1726 SMLoc getLoc() const;
1727 void lex();
1728
1729public:
1730 void onBeginOfFile() override;
1731
1732 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1733
1734 ParseStatus parseExpTgt(OperandVector &Operands);
1735 ParseStatus parseSendMsg(OperandVector &Operands);
1736 ParseStatus parseInterpSlot(OperandVector &Operands);
1737 ParseStatus parseInterpAttr(OperandVector &Operands);
1738 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1739 ParseStatus parseBoolReg(OperandVector &Operands);
1740
1741 bool parseSwizzleOperand(int64_t &Op,
1742 const unsigned MinVal,
1743 const unsigned MaxVal,
1744 const StringRef ErrMsg,
1745 SMLoc &Loc);
1746 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1747 const unsigned MinVal,
1748 const unsigned MaxVal,
1749 const StringRef ErrMsg);
1750 ParseStatus parseSwizzle(OperandVector &Operands);
1751 bool parseSwizzleOffset(int64_t &Imm);
1752 bool parseSwizzleMacro(int64_t &Imm);
1753 bool parseSwizzleQuadPerm(int64_t &Imm);
1754 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1755 bool parseSwizzleBroadcast(int64_t &Imm);
1756 bool parseSwizzleSwap(int64_t &Imm);
1757 bool parseSwizzleReverse(int64_t &Imm);
1758
1759 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1760 int64_t parseGPRIdxMacro();
1761
1762 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1763 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1764
1765 ParseStatus parseOModSI(OperandVector &Operands);
1766
1767 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1768 OptionalImmIndexMap &OptionalIdx);
1769 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1770 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1771 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1772 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1773 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1774 OptionalImmIndexMap &OptionalIdx);
1775 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1776 OptionalImmIndexMap &OptionalIdx);
1777
1778 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1779 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1780
1781 bool parseDimId(unsigned &Encoding);
1783 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1785 ParseStatus parseDPPCtrl(OperandVector &Operands);
1786 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1787 int64_t parseDPPCtrlSel(StringRef Ctrl);
1788 int64_t parseDPPCtrlPerm();
1789 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1790 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1791 cvtDPP(Inst, Operands, true);
1792 }
1793 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1794 bool IsDPP8 = false);
1795 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1796 cvtVOP3DPP(Inst, Operands, true);
1797 }
1798
1799 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1800 AMDGPUOperand::ImmTy Type);
1801 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1802 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1803 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1804 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1805 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1806 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1807 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1808 uint64_t BasicInstType,
1809 bool SkipDstVcc = false,
1810 bool SkipSrcVcc = false);
1811
1812 ParseStatus parseEndpgm(OperandVector &Operands);
1813
1815};
1816
1817} // end anonymous namespace
1818
1819// May be called with integer type with equivalent bitwidth.
1820static const fltSemantics *getFltSemantics(unsigned Size) {
1821 switch (Size) {
1822 case 4:
1823 return &APFloat::IEEEsingle();
1824 case 8:
1825 return &APFloat::IEEEdouble();
1826 case 2:
1827 return &APFloat::IEEEhalf();
1828 default:
1829 llvm_unreachable("unsupported fp type");
1830 }
1831}
1832
1834 return getFltSemantics(VT.getSizeInBits() / 8);
1835}
1836
1838 switch (OperandType) {
1851 return &APFloat::IEEEsingle();
1857 return &APFloat::IEEEdouble();
1872 return &APFloat::IEEEhalf();
1873 default:
1874 llvm_unreachable("unsupported fp type");
1875 }
1876}
1877
1878//===----------------------------------------------------------------------===//
1879// Operand
1880//===----------------------------------------------------------------------===//
1881
1882static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1883 bool Lost;
1884
1885 // Convert literal to single precision
1887 APFloat::rmNearestTiesToEven,
1888 &Lost);
1889 // We allow precision lost but not overflow or underflow
1890 if (Status != APFloat::opOK &&
1891 Lost &&
1892 ((Status & APFloat::opOverflow) != 0 ||
1893 (Status & APFloat::opUnderflow) != 0)) {
1894 return false;
1895 }
1896
1897 return true;
1898}
1899
1900static bool isSafeTruncation(int64_t Val, unsigned Size) {
1901 return isUIntN(Size, Val) || isIntN(Size, Val);
1902}
1903
1904static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1905 if (VT.getScalarType() == MVT::i16) {
1906 // FP immediate values are broken.
1907 return isInlinableIntLiteral(Val);
1908 }
1909
1910 // f16/v2f16 operands work correctly for all values.
1911 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1912}
1913
1914bool AMDGPUOperand::isInlinableImm(MVT type) const {
1915
1916 // This is a hack to enable named inline values like
1917 // shared_base with both 32-bit and 64-bit operands.
1918 // Note that these values are defined as
1919 // 32-bit operands only.
1920 if (isInlineValue()) {
1921 return true;
1922 }
1923
1924 if (!isImmTy(ImmTyNone)) {
1925 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1926 return false;
1927 }
1928 // TODO: We should avoid using host float here. It would be better to
1929 // check the float bit values which is what a few other places do.
1930 // We've had bot failures before due to weird NaN support on mips hosts.
1931
1932 APInt Literal(64, Imm.Val);
1933
1934 if (Imm.IsFPImm) { // We got fp literal token
1935 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1937 AsmParser->hasInv2PiInlineImm());
1938 }
1939
1940 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1941 if (!canLosslesslyConvertToFPType(FPLiteral, type))
1942 return false;
1943
1944 if (type.getScalarSizeInBits() == 16) {
1946 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1947 type, AsmParser->hasInv2PiInlineImm());
1948 }
1949
1950 // Check if single precision literal is inlinable
1952 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1953 AsmParser->hasInv2PiInlineImm());
1954 }
1955
1956 // We got int literal token.
1957 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1959 AsmParser->hasInv2PiInlineImm());
1960 }
1961
1962 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1963 return false;
1964 }
1965
1966 if (type.getScalarSizeInBits() == 16) {
1968 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1969 type, AsmParser->hasInv2PiInlineImm());
1970 }
1971
1973 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1974 AsmParser->hasInv2PiInlineImm());
1975}
1976
1977bool AMDGPUOperand::isLiteralImm(MVT type) const {
1978 // Check that this immediate can be added as literal
1979 if (!isImmTy(ImmTyNone)) {
1980 return false;
1981 }
1982
1983 if (!Imm.IsFPImm) {
1984 // We got int literal token.
1985
1986 if (type == MVT::f64 && hasFPModifiers()) {
1987 // Cannot apply fp modifiers to int literals preserving the same semantics
1988 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1989 // disable these cases.
1990 return false;
1991 }
1992
1993 unsigned Size = type.getSizeInBits();
1994 if (Size == 64)
1995 Size = 32;
1996
1997 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1998 // types.
1999 return isSafeTruncation(Imm.Val, Size);
2000 }
2001
2002 // We got fp literal token
2003 if (type == MVT::f64) { // Expected 64-bit fp operand
2004 // We would set low 64-bits of literal to zeroes but we accept this literals
2005 return true;
2006 }
2007
2008 if (type == MVT::i64) { // Expected 64-bit int operand
2009 // We don't allow fp literals in 64-bit integer instructions. It is
2010 // unclear how we should encode them.
2011 return false;
2012 }
2013
2014 // We allow fp literals with f16x2 operands assuming that the specified
2015 // literal goes into the lower half and the upper half is zero. We also
2016 // require that the literal may be losslessly converted to f16.
2017 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
2018 (type == MVT::v2i16)? MVT::i16 :
2019 (type == MVT::v2f32)? MVT::f32 : type;
2020
2021 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2022 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2023}
2024
2025bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2026 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2027}
2028
2029bool AMDGPUOperand::isVRegWithInputMods() const {
2030 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2031 // GFX90A allows DPP on 64-bit operands.
2032 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2033 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2034}
2035
2036bool AMDGPUOperand::isT16VRegWithInputMods() const {
2037 return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID);
2038}
2039
2040bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2041 if (AsmParser->isVI())
2042 return isVReg32();
2043 else if (AsmParser->isGFX9Plus())
2044 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2045 else
2046 return false;
2047}
2048
2049bool AMDGPUOperand::isSDWAFP16Operand() const {
2050 return isSDWAOperand(MVT::f16);
2051}
2052
2053bool AMDGPUOperand::isSDWAFP32Operand() const {
2054 return isSDWAOperand(MVT::f32);
2055}
2056
2057bool AMDGPUOperand::isSDWAInt16Operand() const {
2058 return isSDWAOperand(MVT::i16);
2059}
2060
2061bool AMDGPUOperand::isSDWAInt32Operand() const {
2062 return isSDWAOperand(MVT::i32);
2063}
2064
2065bool AMDGPUOperand::isBoolReg() const {
2066 auto FB = AsmParser->getFeatureBits();
2067 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2068 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2069}
2070
2071uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2072{
2073 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2074 assert(Size == 2 || Size == 4 || Size == 8);
2075
2076 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2077
2078 if (Imm.Mods.Abs) {
2079 Val &= ~FpSignMask;
2080 }
2081 if (Imm.Mods.Neg) {
2082 Val ^= FpSignMask;
2083 }
2084
2085 return Val;
2086}
2087
2088void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2089 if (isExpr()) {
2091 return;
2092 }
2093
2094 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2095 Inst.getNumOperands())) {
2096 addLiteralImmOperand(Inst, Imm.Val,
2097 ApplyModifiers &
2098 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2099 } else {
2100 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2102 setImmKindNone();
2103 }
2104}
2105
2106void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2107 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2108 auto OpNum = Inst.getNumOperands();
2109 // Check that this operand accepts literals
2110 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2111
2112 if (ApplyModifiers) {
2113 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2114 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2115 Val = applyInputFPModifiers(Val, Size);
2116 }
2117
2118 APInt Literal(64, Val);
2119 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2120
2121 if (Imm.IsFPImm) { // We got fp literal token
2122 switch (OpTy) {
2128 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2129 AsmParser->hasInv2PiInlineImm())) {
2130 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2131 setImmKindConst();
2132 return;
2133 }
2134
2135 // Non-inlineable
2136 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2137 // For fp operands we check if low 32 bits are zeros
2138 if (Literal.getLoBits(32) != 0) {
2139 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2140 "Can't encode literal as exact 64-bit floating-point operand. "
2141 "Low 32-bits will be set to zero");
2142 Val &= 0xffffffff00000000u;
2143 }
2144
2146 setImmKindLiteral();
2147 return;
2148 }
2149
2150 // We don't allow fp literals in 64-bit integer instructions. It is
2151 // unclear how we should encode them. This case should be checked earlier
2152 // in predicate methods (isLiteralImm())
2153 llvm_unreachable("fp literal in 64-bit integer instruction.");
2154
2181 bool lost;
2182 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2183 // Convert literal to single precision
2184 FPLiteral.convert(*getOpFltSemantics(OpTy),
2185 APFloat::rmNearestTiesToEven, &lost);
2186 // We allow precision lost but not overflow or underflow. This should be
2187 // checked earlier in isLiteralImm()
2188
2189 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2190 Inst.addOperand(MCOperand::createImm(ImmVal));
2191 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2192 setImmKindMandatoryLiteral();
2193 } else {
2194 setImmKindLiteral();
2195 }
2196 return;
2197 }
2198 default:
2199 llvm_unreachable("invalid operand size");
2200 }
2201
2202 return;
2203 }
2204
2205 // We got int literal token.
2206 // Only sign extend inline immediates.
2207 switch (OpTy) {
2221 if (isSafeTruncation(Val, 32) &&
2222 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2223 AsmParser->hasInv2PiInlineImm())) {
2225 setImmKindConst();
2226 return;
2227 }
2228
2229 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2230 setImmKindLiteral();
2231 return;
2232
2238 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2240 setImmKindConst();
2241 return;
2242 }
2243
2244 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2245 : Lo_32(Val);
2246
2248 setImmKindLiteral();
2249 return;
2250
2258 if (isSafeTruncation(Val, 16) &&
2259 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2260 AsmParser->hasInv2PiInlineImm())) {
2262 setImmKindConst();
2263 return;
2264 }
2265
2266 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2267 setImmKindLiteral();
2268 return;
2269
2274 assert(isSafeTruncation(Val, 16));
2275 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2276 AsmParser->hasInv2PiInlineImm()));
2277
2279 return;
2280 }
2282 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2283 setImmKindMandatoryLiteral();
2284 return;
2286 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2287 setImmKindMandatoryLiteral();
2288 return;
2289 default:
2290 llvm_unreachable("invalid operand size");
2291 }
2292}
2293
2294void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2295 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2296}
2297
2298bool AMDGPUOperand::isInlineValue() const {
2299 return isRegKind() && ::isInlineValue(getReg());
2300}
2301
2302//===----------------------------------------------------------------------===//
2303// AsmParser
2304//===----------------------------------------------------------------------===//
2305
2306static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2307 if (Is == IS_VGPR) {
2308 switch (RegWidth) {
2309 default: return -1;
2310 case 32:
2311 return AMDGPU::VGPR_32RegClassID;
2312 case 64:
2313 return AMDGPU::VReg_64RegClassID;
2314 case 96:
2315 return AMDGPU::VReg_96RegClassID;
2316 case 128:
2317 return AMDGPU::VReg_128RegClassID;
2318 case 160:
2319 return AMDGPU::VReg_160RegClassID;
2320 case 192:
2321 return AMDGPU::VReg_192RegClassID;
2322 case 224:
2323 return AMDGPU::VReg_224RegClassID;
2324 case 256:
2325 return AMDGPU::VReg_256RegClassID;
2326 case 288:
2327 return AMDGPU::VReg_288RegClassID;
2328 case 320:
2329 return AMDGPU::VReg_320RegClassID;
2330 case 352:
2331 return AMDGPU::VReg_352RegClassID;
2332 case 384:
2333 return AMDGPU::VReg_384RegClassID;
2334 case 512:
2335 return AMDGPU::VReg_512RegClassID;
2336 case 1024:
2337 return AMDGPU::VReg_1024RegClassID;
2338 }
2339 } else if (Is == IS_TTMP) {
2340 switch (RegWidth) {
2341 default: return -1;
2342 case 32:
2343 return AMDGPU::TTMP_32RegClassID;
2344 case 64:
2345 return AMDGPU::TTMP_64RegClassID;
2346 case 128:
2347 return AMDGPU::TTMP_128RegClassID;
2348 case 256:
2349 return AMDGPU::TTMP_256RegClassID;
2350 case 512:
2351 return AMDGPU::TTMP_512RegClassID;
2352 }
2353 } else if (Is == IS_SGPR) {
2354 switch (RegWidth) {
2355 default: return -1;
2356 case 32:
2357 return AMDGPU::SGPR_32RegClassID;
2358 case 64:
2359 return AMDGPU::SGPR_64RegClassID;
2360 case 96:
2361 return AMDGPU::SGPR_96RegClassID;
2362 case 128:
2363 return AMDGPU::SGPR_128RegClassID;
2364 case 160:
2365 return AMDGPU::SGPR_160RegClassID;
2366 case 192:
2367 return AMDGPU::SGPR_192RegClassID;
2368 case 224:
2369 return AMDGPU::SGPR_224RegClassID;
2370 case 256:
2371 return AMDGPU::SGPR_256RegClassID;
2372 case 288:
2373 return AMDGPU::SGPR_288RegClassID;
2374 case 320:
2375 return AMDGPU::SGPR_320RegClassID;
2376 case 352:
2377 return AMDGPU::SGPR_352RegClassID;
2378 case 384:
2379 return AMDGPU::SGPR_384RegClassID;
2380 case 512:
2381 return AMDGPU::SGPR_512RegClassID;
2382 }
2383 } else if (Is == IS_AGPR) {
2384 switch (RegWidth) {
2385 default: return -1;
2386 case 32:
2387 return AMDGPU::AGPR_32RegClassID;
2388 case 64:
2389 return AMDGPU::AReg_64RegClassID;
2390 case 96:
2391 return AMDGPU::AReg_96RegClassID;
2392 case 128:
2393 return AMDGPU::AReg_128RegClassID;
2394 case 160:
2395 return AMDGPU::AReg_160RegClassID;
2396 case 192:
2397 return AMDGPU::AReg_192RegClassID;
2398 case 224:
2399 return AMDGPU::AReg_224RegClassID;
2400 case 256:
2401 return AMDGPU::AReg_256RegClassID;
2402 case 288:
2403 return AMDGPU::AReg_288RegClassID;
2404 case 320:
2405 return AMDGPU::AReg_320RegClassID;
2406 case 352:
2407 return AMDGPU::AReg_352RegClassID;
2408 case 384:
2409 return AMDGPU::AReg_384RegClassID;
2410 case 512:
2411 return AMDGPU::AReg_512RegClassID;
2412 case 1024:
2413 return AMDGPU::AReg_1024RegClassID;
2414 }
2415 }
2416 return -1;
2417}
2418
2421 .Case("exec", AMDGPU::EXEC)
2422 .Case("vcc", AMDGPU::VCC)
2423 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2424 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2425 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2426 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2427 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2428 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2429 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2430 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2431 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2432 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2433 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2434 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2435 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2436 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2437 .Case("m0", AMDGPU::M0)
2438 .Case("vccz", AMDGPU::SRC_VCCZ)
2439 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2440 .Case("execz", AMDGPU::SRC_EXECZ)
2441 .Case("src_execz", AMDGPU::SRC_EXECZ)
2442 .Case("scc", AMDGPU::SRC_SCC)
2443 .Case("src_scc", AMDGPU::SRC_SCC)
2444 .Case("tba", AMDGPU::TBA)
2445 .Case("tma", AMDGPU::TMA)
2446 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2447 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2448 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2449 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2450 .Case("vcc_lo", AMDGPU::VCC_LO)
2451 .Case("vcc_hi", AMDGPU::VCC_HI)
2452 .Case("exec_lo", AMDGPU::EXEC_LO)
2453 .Case("exec_hi", AMDGPU::EXEC_HI)
2454 .Case("tma_lo", AMDGPU::TMA_LO)
2455 .Case("tma_hi", AMDGPU::TMA_HI)
2456 .Case("tba_lo", AMDGPU::TBA_LO)
2457 .Case("tba_hi", AMDGPU::TBA_HI)
2458 .Case("pc", AMDGPU::PC_REG)
2459 .Case("null", AMDGPU::SGPR_NULL)
2460 .Default(AMDGPU::NoRegister);
2461}
2462
2463bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2464 SMLoc &EndLoc, bool RestoreOnFailure) {
2465 auto R = parseRegister();
2466 if (!R) return true;
2467 assert(R->isReg());
2468 RegNo = R->getReg();
2469 StartLoc = R->getStartLoc();
2470 EndLoc = R->getEndLoc();
2471 return false;
2472}
2473
2474bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2475 SMLoc &EndLoc) {
2476 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2477}
2478
2479ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2480 SMLoc &EndLoc) {
2481 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2482 bool PendingErrors = getParser().hasPendingError();
2483 getParser().clearPendingErrors();
2484 if (PendingErrors)
2485 return ParseStatus::Failure;
2486 if (Result)
2487 return ParseStatus::NoMatch;
2488 return ParseStatus::Success;
2489}
2490
2491bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2492 RegisterKind RegKind, unsigned Reg1,
2493 SMLoc Loc) {
2494 switch (RegKind) {
2495 case IS_SPECIAL:
2496 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2497 Reg = AMDGPU::EXEC;
2498 RegWidth = 64;
2499 return true;
2500 }
2501 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2502 Reg = AMDGPU::FLAT_SCR;
2503 RegWidth = 64;
2504 return true;
2505 }
2506 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2507 Reg = AMDGPU::XNACK_MASK;
2508 RegWidth = 64;
2509 return true;
2510 }
2511 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2512 Reg = AMDGPU::VCC;
2513 RegWidth = 64;
2514 return true;
2515 }
2516 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2517 Reg = AMDGPU::TBA;
2518 RegWidth = 64;
2519 return true;
2520 }
2521 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2522 Reg = AMDGPU::TMA;
2523 RegWidth = 64;
2524 return true;
2525 }
2526 Error(Loc, "register does not fit in the list");
2527 return false;
2528 case IS_VGPR:
2529 case IS_SGPR:
2530 case IS_AGPR:
2531 case IS_TTMP:
2532 if (Reg1 != Reg + RegWidth / 32) {
2533 Error(Loc, "registers in a list must have consecutive indices");
2534 return false;
2535 }
2536 RegWidth += 32;
2537 return true;
2538 default:
2539 llvm_unreachable("unexpected register kind");
2540 }
2541}
2542
2543struct RegInfo {
2545 RegisterKind Kind;
2546};
2547
2548static constexpr RegInfo RegularRegisters[] = {
2549 {{"v"}, IS_VGPR},
2550 {{"s"}, IS_SGPR},
2551 {{"ttmp"}, IS_TTMP},
2552 {{"acc"}, IS_AGPR},
2553 {{"a"}, IS_AGPR},
2554};
2555
2556static bool isRegularReg(RegisterKind Kind) {
2557 return Kind == IS_VGPR ||
2558 Kind == IS_SGPR ||
2559 Kind == IS_TTMP ||
2560 Kind == IS_AGPR;
2561}
2562
2564 for (const RegInfo &Reg : RegularRegisters)
2565 if (Str.startswith(Reg.Name))
2566 return &Reg;
2567 return nullptr;
2568}
2569
2570static bool getRegNum(StringRef Str, unsigned& Num) {
2571 return !Str.getAsInteger(10, Num);
2572}
2573
2574bool
2575AMDGPUAsmParser::isRegister(const AsmToken &Token,
2576 const AsmToken &NextToken) const {
2577
2578 // A list of consecutive registers: [s0,s1,s2,s3]
2579 if (Token.is(AsmToken::LBrac))
2580 return true;
2581
2582 if (!Token.is(AsmToken::Identifier))
2583 return false;
2584
2585 // A single register like s0 or a range of registers like s[0:1]
2586
2587 StringRef Str = Token.getString();
2588 const RegInfo *Reg = getRegularRegInfo(Str);
2589 if (Reg) {
2590 StringRef RegName = Reg->Name;
2591 StringRef RegSuffix = Str.substr(RegName.size());
2592 if (!RegSuffix.empty()) {
2593 unsigned Num;
2594 // A single register with an index: rXX
2595 if (getRegNum(RegSuffix, Num))
2596 return true;
2597 } else {
2598 // A range of registers: r[XX:YY].
2599 if (NextToken.is(AsmToken::LBrac))
2600 return true;
2601 }
2602 }
2603
2604 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2605}
2606
2607bool
2608AMDGPUAsmParser::isRegister()
2609{
2610 return isRegister(getToken(), peekToken());
2611}
2612
2613unsigned
2614AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2615 unsigned RegNum,
2616 unsigned RegWidth,
2617 SMLoc Loc) {
2618
2619 assert(isRegularReg(RegKind));
2620
2621 unsigned AlignSize = 1;
2622 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2623 // SGPR and TTMP registers must be aligned.
2624 // Max required alignment is 4 dwords.
2625 AlignSize = std::min(RegWidth / 32, 4u);
2626 }
2627
2628 if (RegNum % AlignSize != 0) {
2629 Error(Loc, "invalid register alignment");
2630 return AMDGPU::NoRegister;
2631 }
2632
2633 unsigned RegIdx = RegNum / AlignSize;
2634 int RCID = getRegClass(RegKind, RegWidth);
2635 if (RCID == -1) {
2636 Error(Loc, "invalid or unsupported register size");
2637 return AMDGPU::NoRegister;
2638 }
2639
2640 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2641 const MCRegisterClass RC = TRI->getRegClass(RCID);
2642 if (RegIdx >= RC.getNumRegs()) {
2643 Error(Loc, "register index is out of range");
2644 return AMDGPU::NoRegister;
2645 }
2646
2647 return RC.getRegister(RegIdx);
2648}
2649
2650bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2651 int64_t RegLo, RegHi;
2652 if (!skipToken(AsmToken::LBrac, "missing register index"))
2653 return false;
2654
2655 SMLoc FirstIdxLoc = getLoc();
2656 SMLoc SecondIdxLoc;
2657
2658 if (!parseExpr(RegLo))
2659 return false;
2660
2661 if (trySkipToken(AsmToken::Colon)) {
2662 SecondIdxLoc = getLoc();
2663 if (!parseExpr(RegHi))
2664 return false;
2665 } else {
2666 RegHi = RegLo;
2667 }
2668
2669 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2670 return false;
2671
2672 if (!isUInt<32>(RegLo)) {
2673 Error(FirstIdxLoc, "invalid register index");
2674 return false;
2675 }
2676
2677 if (!isUInt<32>(RegHi)) {
2678 Error(SecondIdxLoc, "invalid register index");
2679 return false;
2680 }
2681
2682 if (RegLo > RegHi) {
2683 Error(FirstIdxLoc, "first register index should not exceed second index");
2684 return false;
2685 }
2686
2687 Num = static_cast<unsigned>(RegLo);
2688 RegWidth = 32 * ((RegHi - RegLo) + 1);
2689 return true;
2690}
2691
2692unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2693 unsigned &RegNum, unsigned &RegWidth,
2694 SmallVectorImpl<AsmToken> &Tokens) {
2695 assert(isToken(AsmToken::Identifier));
2696 unsigned Reg = getSpecialRegForName(getTokenStr());
2697 if (Reg) {
2698 RegNum = 0;
2699 RegWidth = 32;
2700 RegKind = IS_SPECIAL;
2701 Tokens.push_back(getToken());
2702 lex(); // skip register name
2703 }
2704 return Reg;
2705}
2706
2707unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2708 unsigned &RegNum, unsigned &RegWidth,
2709 SmallVectorImpl<AsmToken> &Tokens) {
2710 assert(isToken(AsmToken::Identifier));
2711 StringRef RegName = getTokenStr();
2712 auto Loc = getLoc();
2713
2714 const RegInfo *RI = getRegularRegInfo(RegName);
2715 if (!RI) {
2716 Error(Loc, "invalid register name");
2717 return AMDGPU::NoRegister;
2718 }
2719
2720 Tokens.push_back(getToken());
2721 lex(); // skip register name
2722
2723 RegKind = RI->Kind;
2724 StringRef RegSuffix = RegName.substr(RI->Name.size());
2725 if (!RegSuffix.empty()) {
2726 // Single 32-bit register: vXX.
2727 if (!getRegNum(RegSuffix, RegNum)) {
2728 Error(Loc, "invalid register index");
2729 return AMDGPU::NoRegister;
2730 }
2731 RegWidth = 32;
2732 } else {
2733 // Range of registers: v[XX:YY]. ":YY" is optional.
2734 if (!ParseRegRange(RegNum, RegWidth))
2735 return AMDGPU::NoRegister;
2736 }
2737
2738 return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2739}
2740
2741unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2742 unsigned &RegWidth,
2743 SmallVectorImpl<AsmToken> &Tokens) {
2744 unsigned Reg = AMDGPU::NoRegister;
2745 auto ListLoc = getLoc();
2746
2747 if (!skipToken(AsmToken::LBrac,
2748 "expected a register or a list of registers")) {
2749 return AMDGPU::NoRegister;
2750 }
2751
2752 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2753
2754 auto Loc = getLoc();
2755 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2756 return AMDGPU::NoRegister;
2757 if (RegWidth != 32) {
2758 Error(Loc, "expected a single 32-bit register");
2759 return AMDGPU::NoRegister;
2760 }
2761
2762 for (; trySkipToken(AsmToken::Comma); ) {
2763 RegisterKind NextRegKind;
2764 unsigned NextReg, NextRegNum, NextRegWidth;
2765 Loc = getLoc();
2766
2767 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2768 NextRegNum, NextRegWidth,
2769 Tokens)) {
2770 return AMDGPU::NoRegister;
2771 }
2772 if (NextRegWidth != 32) {
2773 Error(Loc, "expected a single 32-bit register");
2774 return AMDGPU::NoRegister;
2775 }
2776 if (NextRegKind != RegKind) {
2777 Error(Loc, "registers in a list must be of the same kind");
2778 return AMDGPU::NoRegister;
2779 }
2780 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2781 return AMDGPU::NoRegister;
2782 }
2783
2784 if (!skipToken(AsmToken::RBrac,
2785 "expected a comma or a closing square bracket")) {
2786 return AMDGPU::NoRegister;
2787 }
2788
2789 if (isRegularReg(RegKind))
2790 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2791
2792 return Reg;
2793}
2794
2795bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2796 unsigned &RegNum, unsigned &RegWidth,
2797 SmallVectorImpl<AsmToken> &Tokens) {
2798 auto Loc = getLoc();
2799 Reg = AMDGPU::NoRegister;
2800
2801 if (isToken(AsmToken::Identifier)) {
2802 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2803 if (Reg == AMDGPU::NoRegister)
2804 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2805 } else {
2806 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2807 }
2808
2809 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2810 if (Reg == AMDGPU::NoRegister) {
2811 assert(Parser.hasPendingError());
2812 return false;
2813 }
2814
2815 if (!subtargetHasRegister(*TRI, Reg)) {
2816 if (Reg == AMDGPU::SGPR_NULL) {
2817 Error(Loc, "'null' operand is not supported on this GPU");
2818 } else {
2819 Error(Loc, "register not available on this GPU");
2820 }
2821 return false;
2822 }
2823
2824 return true;
2825}
2826
2827bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2828 unsigned &RegNum, unsigned &RegWidth,
2829 bool RestoreOnFailure /*=false*/) {
2830 Reg = AMDGPU::NoRegister;
2831
2833 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2834 if (RestoreOnFailure) {
2835 while (!Tokens.empty()) {
2836 getLexer().UnLex(Tokens.pop_back_val());
2837 }
2838 }
2839 return true;
2840 }
2841 return false;
2842}
2843
2844std::optional<StringRef>
2845AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2846 switch (RegKind) {
2847 case IS_VGPR:
2848 return StringRef(".amdgcn.next_free_vgpr");
2849 case IS_SGPR:
2850 return StringRef(".amdgcn.next_free_sgpr");
2851 default:
2852 return std::nullopt;
2853 }
2854}
2855
2856void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2857 auto SymbolName = getGprCountSymbolName(RegKind);
2858 assert(SymbolName && "initializing invalid register kind");
2859 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2860 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2861}
2862
2863bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2864 unsigned DwordRegIndex,
2865 unsigned RegWidth) {
2866 // Symbols are only defined for GCN targets
2867 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2868 return true;
2869
2870 auto SymbolName = getGprCountSymbolName(RegKind);
2871 if (!SymbolName)
2872 return true;
2873 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2874
2875 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2876 int64_t OldCount;
2877
2878 if (!Sym->isVariable())
2879 return !Error(getLoc(),
2880 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2881 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2882 return !Error(
2883 getLoc(),
2884 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2885
2886 if (OldCount <= NewMax)
2887 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2888
2889 return true;
2890}
2891
2892std::unique_ptr<AMDGPUOperand>
2893AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2894 const auto &Tok = getToken();
2895 SMLoc StartLoc = Tok.getLoc();
2896 SMLoc EndLoc = Tok.getEndLoc();
2897 RegisterKind RegKind;
2898 unsigned Reg, RegNum, RegWidth;
2899
2900 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2901 return nullptr;
2902 }
2903 if (isHsaAbi(getSTI())) {
2904 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2905 return nullptr;
2906 } else
2907 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2908 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2909}
2910
2911ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
2912 bool HasSP3AbsModifier, bool HasLit) {
2913 // TODO: add syntactic sugar for 1/(2*PI)
2914
2915 if (isRegister())
2916 return ParseStatus::NoMatch;
2917 assert(!isModifier());
2918
2919 if (!HasLit) {
2920 HasLit = trySkipId("lit");
2921 if (HasLit) {
2922 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
2923 return ParseStatus::Failure;
2924 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
2925 if (S.isSuccess() &&
2926 !skipToken(AsmToken::RParen, "expected closing parentheses"))
2927 return ParseStatus::Failure;
2928 return S;
2929 }
2930 }
2931
2932 const auto& Tok = getToken();
2933 const auto& NextTok = peekToken();
2934 bool IsReal = Tok.is(AsmToken::Real);
2935 SMLoc S = getLoc();
2936 bool Negate = false;
2937
2938 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2939 lex();
2940 IsReal = true;
2941 Negate = true;
2942 }
2943
2944 AMDGPUOperand::Modifiers Mods;
2945 Mods.Lit = HasLit;
2946
2947 if (IsReal) {
2948 // Floating-point expressions are not supported.
2949 // Can only allow floating-point literals with an
2950 // optional sign.
2951
2952 StringRef Num = getTokenStr();
2953 lex();
2954
2955 APFloat RealVal(APFloat::IEEEdouble());
2956 auto roundMode = APFloat::rmNearestTiesToEven;
2957 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
2958 return ParseStatus::Failure;
2959 if (Negate)
2960 RealVal.changeSign();
2961
2962 Operands.push_back(
2963 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2964 AMDGPUOperand::ImmTyNone, true));
2965 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2966 Op.setModifiers(Mods);
2967
2968 return ParseStatus::Success;
2969
2970 } else {
2971 int64_t IntVal;
2972 const MCExpr *Expr;
2973 SMLoc S = getLoc();
2974
2975 if (HasSP3AbsModifier) {
2976 // This is a workaround for handling expressions
2977 // as arguments of SP3 'abs' modifier, for example:
2978 // |1.0|
2979 // |-1|
2980 // |1+x|
2981 // This syntax is not compatible with syntax of standard
2982 // MC expressions (due to the trailing '|').
2983 SMLoc EndLoc;
2984 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2985 return ParseStatus::Failure;
2986 } else {
2987 if (Parser.parseExpression(Expr))
2988 return ParseStatus::Failure;
2989 }
2990
2991 if (Expr->evaluateAsAbsolute(IntVal)) {
2992 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2993 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2994 Op.setModifiers(Mods);
2995 } else {
2996 if (HasLit)
2997 return ParseStatus::NoMatch;
2998 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2999 }
3000
3001 return ParseStatus::Success;
3002 }
3003
3004 return ParseStatus::NoMatch;
3005}
3006
3007ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3008 if (!isRegister())
3009 return ParseStatus::NoMatch;
3010
3011 if (auto R = parseRegister()) {
3012 assert(R->isReg());
3013 Operands.push_back(std::move(R));
3014 return ParseStatus::Success;
3015 }
3016 return ParseStatus::Failure;
3017}
3018
3019ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3020 bool HasSP3AbsMod, bool HasLit) {
3021 ParseStatus Res = parseReg(Operands);
3022 if (!Res.isNoMatch())
3023 return Res;
3024 if (isModifier())
3025 return ParseStatus::NoMatch;
3026 return parseImm(Operands, HasSP3AbsMod, HasLit);
3027}
3028
3029bool
3030AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3031 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3032 const auto &str = Token.getString();
3033 return str == "abs" || str == "neg" || str == "sext";
3034 }
3035 return false;
3036}
3037
3038bool
3039AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3040 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3041}
3042
3043bool
3044AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3045 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3046}
3047
3048bool
3049AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3050 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3051}
3052
3053// Check if this is an operand modifier or an opcode modifier
3054// which may look like an expression but it is not. We should
3055// avoid parsing these modifiers as expressions. Currently
3056// recognized sequences are:
3057// |...|
3058// abs(...)
3059// neg(...)
3060// sext(...)
3061// -reg
3062// -|...|
3063// -abs(...)
3064// name:...
3065//
3066bool
3067AMDGPUAsmParser::isModifier() {
3068
3069 AsmToken Tok = getToken();
3070 AsmToken NextToken[2];
3071 peekTokens(NextToken);
3072
3073 return isOperandModifier(Tok, NextToken[0]) ||
3074 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3075 isOpcodeModifierWithVal(Tok, NextToken[0]);
3076}
3077
3078// Check if the current token is an SP3 'neg' modifier.
3079// Currently this modifier is allowed in the following context:
3080//
3081// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3082// 2. Before an 'abs' modifier: -abs(...)
3083// 3. Before an SP3 'abs' modifier: -|...|
3084//
3085// In all other cases "-" is handled as a part
3086// of an expression that follows the sign.
3087//
3088// Note: When "-" is followed by an integer literal,
3089// this is interpreted as integer negation rather
3090// than a floating-point NEG modifier applied to N.
3091// Beside being contr-intuitive, such use of floating-point
3092// NEG modifier would have resulted in different meaning
3093// of integer literals used with VOP1/2/C and VOP3,
3094// for example:
3095// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3096// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3097// Negative fp literals with preceding "-" are
3098// handled likewise for uniformity
3099//
3100bool
3101AMDGPUAsmParser::parseSP3NegModifier() {
3102
3103 AsmToken NextToken[2];
3104 peekTokens(NextToken);
3105
3106 if (isToken(AsmToken::Minus) &&
3107 (isRegister(NextToken[0], NextToken[1]) ||
3108 NextToken[0].is(AsmToken::Pipe) ||
3109 isId(NextToken[0], "abs"))) {
3110 lex();
3111 return true;
3112 }
3113
3114 return false;
3115}
3116
3118AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3119 bool AllowImm) {
3120 bool Neg, SP3Neg;
3121 bool Abs, SP3Abs;
3122 bool Lit;
3123 SMLoc Loc;
3124
3125 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3126 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3127 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3128
3129 SP3Neg = parseSP3NegModifier();
3130
3131 Loc = getLoc();
3132 Neg = trySkipId("neg");
3133 if (Neg && SP3Neg)
3134 return Error(Loc, "expected register or immediate");
3135 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3136 return ParseStatus::Failure;
3137
3138 Abs = trySkipId("abs");
3139 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3140 return ParseStatus::Failure;
3141
3142 Lit = trySkipId("lit");
3143 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3144 return ParseStatus::Failure;
3145
3146 Loc = getLoc();
3147 SP3Abs = trySkipToken(AsmToken::Pipe);
3148 if (Abs && SP3Abs)
3149 return Error(Loc, "expected register or immediate");
3150
3151 ParseStatus Res;
3152 if (AllowImm) {
3153 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3154 } else {
3155 Res = parseReg(Operands);
3156 }
3157 if (!Res.isSuccess())
3158 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3159
3160 if (Lit && !Operands.back()->isImm())
3161 Error(Loc, "expected immediate with lit modifier");
3162
3163 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3164 return ParseStatus::Failure;
3165 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3166 return ParseStatus::Failure;
3167 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3168 return ParseStatus::Failure;
3169 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3170 return ParseStatus::Failure;
3171
3172 AMDGPUOperand::Modifiers Mods;
3173 Mods.Abs = Abs || SP3Abs;
3174 Mods.Neg = Neg || SP3Neg;
3175 Mods.Lit = Lit;
3176
3177 if (Mods.hasFPModifiers() || Lit) {
3178 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3179 if (Op.isExpr())
3180 return Error(Op.getStartLoc(), "expected an absolute expression");
3181 Op.setModifiers(Mods);
3182 }
3183 return ParseStatus::Success;
3184}
3185
3187AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3188 bool AllowImm) {
3189 bool Sext = trySkipId("sext");
3190 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3191 return ParseStatus::Failure;
3192
3193 ParseStatus Res;
3194 if (AllowImm) {
3195 Res = parseRegOrImm(Operands);
3196 } else {
3197 Res = parseReg(Operands);
3198 }
3199 if (!Res.isSuccess())
3200 return Sext ? ParseStatus::Failure : Res;
3201
3202 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3203 return ParseStatus::Failure;
3204
3205 AMDGPUOperand::Modifiers Mods;
3206 Mods.Sext = Sext;
3207
3208 if (Mods.hasIntModifiers()) {
3209 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3210 if (Op.isExpr())
3211 return Error(Op.getStartLoc(), "expected an absolute expression");
3212 Op.setModifiers(Mods);
3213 }
3214
3215 return ParseStatus::Success;
3216}
3217
3218ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3219 return parseRegOrImmWithFPInputMods(Operands, false);
3220}
3221
3222ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3223 return parseRegOrImmWithIntInputMods(Operands, false);
3224}
3225
3226ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3227 auto Loc = getLoc();
3228 if (trySkipId("off")) {
3229 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3230 AMDGPUOperand::ImmTyOff, false));
3231 return ParseStatus::Success;
3232 }
3233
3234 if (!isRegister())
3235 return ParseStatus::NoMatch;
3236
3237 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3238 if (Reg) {
3239 Operands.push_back(std::move(Reg));
3240 return ParseStatus::Success;
3241 }
3242
3243 return ParseStatus::Failure;
3244}
3245
3246unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3247 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3248
3249 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3250 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3251 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3252 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3253 return Match_InvalidOperand;
3254
3255 if ((TSFlags & SIInstrFlags::VOP3) &&
3256 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3257 getForcedEncodingSize() != 64)
3258 return Match_PreferE32;
3259
3260 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3261 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3262 // v_mac_f32/16 allow only dst_sel == DWORD;
3263 auto OpNum =
3264 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3265 const auto &Op = Inst.getOperand(OpNum);
3266 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3267 return Match_InvalidOperand;
3268 }
3269 }
3270
3271 return Match_Success;
3272}
3273
3275 static const unsigned Variants[] = {
3279 };
3280
3281 return ArrayRef(Variants);
3282}
3283
3284// What asm variants we should check
3285ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3286 if (isForcedDPP() && isForcedVOP3()) {
3287 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3288 return ArrayRef(Variants);
3289 }
3290 if (getForcedEncodingSize() == 32) {
3291 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3292 return ArrayRef(Variants);
3293 }
3294
3295 if (isForcedVOP3()) {
3296 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3297 return ArrayRef(Variants);
3298 }
3299
3300 if (isForcedSDWA()) {
3301 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3303 return ArrayRef(Variants);
3304 }
3305
3306 if (isForcedDPP()) {
3307 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3308 return ArrayRef(Variants);
3309 }
3310
3311 return getAllVariants();
3312}
3313
3314StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3315 if (isForcedDPP() && isForcedVOP3())
3316 return "e64_dpp";
3317
3318 if (getForcedEncodingSize() == 32)
3319 return "e32";
3320
3321 if (isForcedVOP3())
3322 return "e64";
3323
3324 if (isForcedSDWA())
3325 return "sdwa";
3326
3327 if (isForcedDPP())
3328 return "dpp";
3329
3330 return "";
3331}
3332
3333unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3334 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3335 for (MCPhysReg Reg : Desc.implicit_uses()) {
3336 switch (Reg) {
3337 case AMDGPU::FLAT_SCR:
3338 case AMDGPU::VCC:
3339 case AMDGPU::VCC_LO:
3340 case AMDGPU::VCC_HI:
3341 case AMDGPU::M0:
3342 return Reg;
3343 default:
3344 break;
3345 }
3346 }
3347 return AMDGPU::NoRegister;
3348}
3349
3350// NB: This code is correct only when used to check constant
3351// bus limitations because GFX7 support no f16 inline constants.
3352// Note that there are no cases when a GFX7 opcode violates
3353// constant bus limitations due to the use of an f16 constant.
3354bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3355 unsigned OpIdx) const {
3356 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3357
3358 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3359 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3360 return false;
3361 }
3362
3363 const MCOperand &MO = Inst.getOperand(OpIdx);
3364
3365 int64_t Val = MO.getImm();
3366 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3367
3368 switch (OpSize) { // expected operand size
3369 case 8:
3370 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3371 case 4:
3372 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3373 case 2: {
3374 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3379
3384
3388 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3389
3390 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3391 }
3392 default:
3393 llvm_unreachable("invalid operand size");
3394 }
3395}
3396
3397unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3398 if (!isGFX10Plus())
3399 return 1;
3400
3401 switch (Opcode) {
3402 // 64-bit shift instructions can use only one scalar value input
3403 case AMDGPU::V_LSHLREV_B64_e64:
3404 case AMDGPU::V_LSHLREV_B64_gfx10:
3405 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3406 case AMDGPU::V_LSHRREV_B64_e64:
3407 case AMDGPU::V_LSHRREV_B64_gfx10:
3408 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3409 case AMDGPU::V_ASHRREV_I64_e64:
3410 case AMDGPU::V_ASHRREV_I64_gfx10:
3411 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3412 case AMDGPU::V_LSHL_B64_e64:
3413 case AMDGPU::V_LSHR_B64_e64:
3414 case AMDGPU::V_ASHR_I64_e64:
3415 return 1;
3416 default:
3417 return 2;
3418 }
3419}
3420
3421constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3423
3424// Get regular operand indices in the same order as specified
3425// in the instruction (but append mandatory literals to the end).
3427 bool AddMandatoryLiterals = false) {
3428
3429 int16_t ImmIdx =
3430 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3431
3432 if (isVOPD(Opcode)) {
3433 int16_t ImmDeferredIdx =
3434 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3435 : -1;
3436
3437 return {getNamedOperandIdx(Opcode, OpName::src0X),
3438 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3439 getNamedOperandIdx(Opcode, OpName::src0Y),
3440 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3441 ImmDeferredIdx,
3442 ImmIdx};
3443 }
3444
3445 return {getNamedOperandIdx(Opcode, OpName::src0),
3446 getNamedOperandIdx(Opcode, OpName::src1),
3447 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3448}
3449
3450bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3451 const MCOperand &MO = Inst.getOperand(OpIdx);
3452 if (MO.isImm()) {
3453 return !isInlineConstant(Inst, OpIdx);
3454 } else if (MO.isReg()) {
3455 auto Reg = MO.getReg();
3456 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3457 auto PReg = mc2PseudoReg(Reg);
3458 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3459 } else {
3460 return true;
3461 }
3462}
3463
3464bool AMDGPUAsmParser::validateConstantBusLimitations(
3465 const MCInst &Inst, const OperandVector &Operands) {
3466 const unsigned Opcode = Inst.getOpcode();
3467 const MCInstrDesc &Desc = MII.get(Opcode);
3468 unsigned LastSGPR = AMDGPU::NoRegister;
3469 unsigned ConstantBusUseCount = 0;
3470 unsigned NumLiterals = 0;
3471 unsigned LiteralSize;
3472
3473 if (!(Desc.TSFlags &
3476 !isVOPD(Opcode))
3477 return true;
3478
3479 // Check special imm operands (used by madmk, etc)
3480 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3481 ++NumLiterals;
3482 LiteralSize = 4;
3483 }
3484
3485 SmallDenseSet<unsigned> SGPRsUsed;
3486 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3487 if (SGPRUsed != AMDGPU::NoRegister) {
3488 SGPRsUsed.insert(SGPRUsed);
3489 ++ConstantBusUseCount;
3490 }
3491
3493
3494 for (int OpIdx : OpIndices) {
3495 if (OpIdx == -1)
3496 continue;
3497
3498 const MCOperand &MO = Inst.getOperand(OpIdx);
3499 if (usesConstantBus(Inst, OpIdx)) {
3500 if (MO.isReg()) {
3501 LastSGPR = mc2PseudoReg(MO.getReg());
3502 // Pairs of registers with a partial intersections like these
3503 // s0, s[0:1]
3504 // flat_scratch_lo, flat_scratch
3505 // flat_scratch_lo, flat_scratch_hi
3506 // are theoretically valid but they are disabled anyway.
3507 // Note that this code mimics SIInstrInfo::verifyInstruction
3508 if (SGPRsUsed.insert(LastSGPR).second) {
3509 ++ConstantBusUseCount;
3510 }
3511 } else { // Expression or a literal
3512
3513 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3514 continue; // special operand like VINTERP attr_chan
3515
3516 // An instruction may use only one literal.
3517 // This has been validated on the previous step.
3518 // See validateVOPLiteral.
3519 // This literal may be used as more than one operand.
3520 // If all these operands are of the same size,
3521 // this literal counts as one scalar value.
3522 // Otherwise it counts as 2 scalar values.
3523 // See "GFX10 Shader Programming", section 3.6.2.3.
3524
3525 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3526 if (Size < 4)
3527 Size = 4;
3528
3529 if (NumLiterals == 0) {
3530 NumLiterals = 1;
3531 LiteralSize = Size;
3532 } else if (LiteralSize != Size) {
3533 NumLiterals = 2;
3534 }
3535 }
3536 }
3537 }
3538 ConstantBusUseCount += NumLiterals;
3539
3540 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3541 return true;
3542
3543 SMLoc LitLoc = getLitLoc(Operands);
3544 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3545 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3546 Error(Loc, "invalid operand (violates constant bus restrictions)");
3547 return false;
3548}
3549
3550bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3551 const MCInst &Inst, const OperandVector &Operands) {
3552
3553 const unsigned Opcode = Inst.getOpcode();
3554 if (!isVOPD(Opcode))
3555 return true;
3556
3557 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3558
3559 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3560 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3561 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3562 ? Opr.getReg()
3564 };
3565
3566 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3567 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(getVRegIdx);
3568 if (!InvalidCompOprIdx)
3569 return true;
3570
3571 auto CompOprIdx = *InvalidCompOprIdx;
3572 auto ParsedIdx =
3573 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3574 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3575 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3576
3577 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3578 if (CompOprIdx == VOPD::Component::DST) {
3579 Error(Loc, "one dst register must be even and the other odd");
3580 } else {
3581 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3582 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3583 " operands must use different VGPR banks");
3584 }
3585
3586 return false;
3587}
3588
3589bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3590
3591 const unsigned Opc = Inst.getOpcode();
3592 const MCInstrDesc &Desc = MII.get(Opc);
3593
3594 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3595 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3596 assert(ClampIdx != -1);
3597 return Inst.getOperand(ClampIdx).getImm() == 0;
3598 }
3599
3600 return true;
3601}
3602
3603bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3604 const SMLoc &IDLoc) {
3605
3606 const unsigned Opc = Inst.getOpcode();
3607 const MCInstrDesc &Desc = MII.get(Opc);
3608
3609 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3610 return true;
3611
3612 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3613 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3614 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3615
3616 assert(VDataIdx != -1);
3617
3618 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3619 return true;
3620
3621 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3622 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3623 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3624 if (DMask == 0)
3625 DMask = 1;
3626
3627 bool IsPackedD16 = false;
3628 unsigned DataSize =
3629 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3630 if (hasPackedD16()) {
3631 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3632 IsPackedD16 = D16Idx >= 0;
3633 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3634 DataSize = (DataSize + 1) / 2;
3635 }
3636
3637 if ((VDataSize / 4) == DataSize + TFESize)
3638 return true;
3639
3640 StringRef Modifiers;
3641 if (isGFX90A())
3642 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3643 else
3644 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3645
3646 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3647 return false;
3648}
3649
3650bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3651 const SMLoc &IDLoc) {
3652 const unsigned Opc = Inst.getOpcode();
3653 const MCInstrDesc &Desc = MII.get(Opc);
3654
3655 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3656 return true;
3657
3659
3660 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3662 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3663 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3664 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3665 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3666
3667 assert(VAddr0Idx != -1);
3668 assert(SrsrcIdx != -1);
3669 assert(SrsrcIdx > VAddr0Idx);
3670
3671 bool IsA16 = Inst.getOperand(A16Idx).getImm();
3672 if (BaseOpcode->BVH) {
3673 if (IsA16 == BaseOpcode->A16)
3674 return true;
3675 Error(IDLoc, "image address size does not match a16");
3676 return false;
3677 }
3678
3679 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3681 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3682 unsigned ActualAddrSize =
3683 IsNSA ? SrsrcIdx - VAddr0Idx
3684 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3685
3686 unsigned ExpectedAddrSize =
3687 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3688
3689 if (IsNSA) {
3690 if (hasPartialNSAEncoding() && ExpectedAddrSize > getNSAMaxSize()) {
3691 int VAddrLastIdx = SrsrcIdx - 1;
3692 unsigned VAddrLastSize =
3693 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3694
3695 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3696 }
3697 } else {
3698 if (ExpectedAddrSize > 12)
3699 ExpectedAddrSize = 16;
3700
3701 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3702 // This provides backward compatibility for assembly created
3703 // before 160b/192b/224b types were directly supported.
3704 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3705 return true;
3706 }
3707
3708 if (ActualAddrSize == ExpectedAddrSize)
3709 return true;
3710
3711 Error(IDLoc, "image address size does not match dim and a16");
3712 return false;
3713}
3714
3715bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3716
3717 const unsigned Opc = Inst.getOpcode();
3718 const MCInstrDesc &Desc = MII.get(Opc);
3719
3720 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3721 return true;
3722 if (!Desc.mayLoad() || !Desc.mayStore())
3723 return true; // Not atomic
3724
3725 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3726 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3727
3728 // This is an incomplete check because image_atomic_cmpswap
3729 // may only use 0x3 and 0xf while other atomic operations
3730 // may use 0x1 and 0x3. However these limitations are
3731 // verified when we check that dmask matches dst size.
3732 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3733}
3734
3735bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3736
3737 const unsigned Opc = Inst.getOpcode();
3738 const MCInstrDesc &Desc = MII.get(Opc);
3739
3740 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3741 return true;
3742
3743 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3744 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3745
3746 // GATHER4 instructions use dmask in a different fashion compared to
3747 // other MIMG instructions. The only useful DMASK values are
3748 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3749 // (red,red,red,red) etc.) The ISA document doesn't mention
3750 // this.
3751 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3752}
3753
3754bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3755 const unsigned Opc = Inst.getOpcode();
3756 const MCInstrDesc &Desc = MII.get(Opc);
3757
3758 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3759 return true;
3760
3762 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3764
3765 if (!BaseOpcode->MSAA)
3766 return true;
3767
3768 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3769 assert(DimIdx != -1);
3770
3771 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3773
3774 return DimInfo->MSAA;
3775}
3776
3777static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3778{
3779 switch (Opcode) {
3780 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3781 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3782 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3783 return true;
3784 default:
3785 return false;
3786 }
3787}
3788
3789// movrels* opcodes should only allow VGPRS as src0.
3790// This is specified in .td description for vop1/vop3,
3791// but sdwa is handled differently. See isSDWAOperand.
3792bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3793 const OperandVector &Operands) {
3794
3795 const unsigned Opc = Inst.getOpcode();
3796 const MCInstrDesc &Desc = MII.get(Opc);
3797
3798 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3799 return true;
3800
3801 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3802 assert(Src0Idx != -1);
3803
3804 SMLoc ErrLoc;
3805 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3806 if (Src0.isReg()) {
3807 auto Reg = mc2PseudoReg(Src0.getReg());
3808 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3809 if (!isSGPR(Reg, TRI))
3810 return true;
3811 ErrLoc = getRegLoc(Reg, Operands);
3812 } else {
3813 ErrLoc = getConstLoc(Operands);
3814 }
3815
3816 Error(ErrLoc, "source operand must be a VGPR");
3817 return false;
3818}
3819
3820bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3821 const OperandVector &Operands) {
3822
3823 const unsigned Opc = Inst.getOpcode();
3824
3825 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3826 return true;
3827
3828 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3829 assert(Src0Idx != -1);
3830
3831 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3832 if (!Src0.isReg())
3833 return true;
3834
3835 auto Reg = mc2PseudoReg(Src0.getReg());
3836 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3837 if (!isGFX90A() && isSGPR(Reg, TRI)) {
3838 Error(getRegLoc(Reg, Operands),
3839 "source operand must be either a VGPR or an inline constant");
3840 return false;
3841 }
3842
3843 return true;
3844}
3845
3846bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
3847 const OperandVector &Operands) {
3848 unsigned Opcode = Inst.getOpcode();
3849 const MCInstrDesc &Desc = MII.get(Opcode);
3850
3851 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
3852 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
3853 return true;
3854
3855 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
3856 if (Src2Idx == -1)
3857 return true;
3858
3859 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
3860 Error(getConstLoc(Operands),
3861 "inline constants are not allowed for this operand");
3862 return false;
3863 }
3864
3865 return true;
3866}
3867
3868bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3869 const OperandVector &Operands) {
3870 const unsigned Opc = Inst.getOpcode();
3871 const MCInstrDesc &Desc = MII.get(Opc);
3872
3873 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3874 return true;
3875
3876 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3877 if (Src2Idx == -1)
3878 return true;
3879
3880 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3881 if (!Src2.isReg())
3882 return true;
3883
3884 MCRegister Src2Reg = Src2.getReg();
3885 MCRegister DstReg = Inst.getOperand(0).getReg();
3886 if (Src2Reg == DstReg)
3887 return true;
3888
3889 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3890 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
3891 return true;
3892
3893 if (TRI->regsOverlap(Src2Reg, DstReg)) {
3894 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3895 "source 2 operand must not partially overlap with dst");
3896 return false;
3897 }
3898
3899 return true;
3900}
3901
3902bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3903 switch (Inst.getOpcode()) {
3904 default:
3905 return true;
3906 case V_DIV_SCALE_F32_gfx6_gfx7:
3907 case V_DIV_SCALE_F32_vi:
3908 case V_DIV_SCALE_F32_gfx10:
3909 case V_DIV_SCALE_F64_gfx6_gfx7:
3910 case V_DIV_SCALE_F64_vi:
3911 case V_DIV_SCALE_F64_gfx10:
3912 break;
3913 }
3914
3915 // TODO: Check that src0 = src1 or src2.
3916
3917 for (auto Name : {AMDGPU::OpName::src0_modifiers,
3918 AMDGPU::OpName::src2_modifiers,
3919 AMDGPU::OpName::src2_modifiers}) {
3921 .getImm() &
3923 return false;
3924 }
3925 }
3926
3927 return true;
3928}
3929
3930bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3931
3932 const unsigned Opc = Inst.getOpcode();
3933 const MCInstrDesc &Desc = MII.get(Opc);
3934
3935 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3936 return true;
3937
3938 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3939 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3940 if (isCI() || isSI())
3941 return false;
3942 }
3943
3944 return true;
3945}
3946
3947static bool IsRevOpcode(const unsigned Opcode)
3948{
3949 switch (Opcode) {
3950 case AMDGPU::V_SUBREV_F32_e32:
3951 case AMDGPU::V_SUBREV_F32_e64:
3952 case AMDGPU::V_SUBREV_F32_e32_gfx10:
3953 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3954 case AMDGPU::V_SUBREV_F32_e32_vi:
3955 case AMDGPU::V_SUBREV_F32_e64_gfx10:
3956 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3957 case AMDGPU::V_SUBREV_F32_e64_vi:
3958
3959 case AMDGPU::V_SUBREV_CO_U32_e32:
3960 case AMDGPU::V_SUBREV_CO_U32_e64:
3961 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3962 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3963
3964 case AMDGPU::V_SUBBREV_U32_e32:
3965 case AMDGPU::V_SUBBREV_U32_e64:
3966 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3967 case AMDGPU::V_SUBBREV_U32_e32_vi:
3968 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3969 case AMDGPU::V_SUBBREV_U32_e64_vi:
3970
3971 case AMDGPU::V_SUBREV_U32_e32:
3972 case AMDGPU::V_SUBREV_U32_e64:
3973 case AMDGPU::V_SUBREV_U32_e32_gfx9:
3974 case AMDGPU::V_SUBREV_U32_e32_vi:
3975 case AMDGPU::V_SUBREV_U32_e64_gfx9:
3976 case AMDGPU::V_SUBREV_U32_e64_vi:
3977
3978 case AMDGPU::V_SUBREV_F16_e32:
3979 case AMDGPU::V_SUBREV_F16_e64:
3980 case AMDGPU::V_SUBREV_F16_e32_gfx10:
3981 case AMDGPU::V_SUBREV_F16_e32_vi:
3982 case AMDGPU::V_SUBREV_F16_e64_gfx10:
3983 case AMDGPU::V_SUBREV_F16_e64_vi:
3984
3985 case AMDGPU::V_SUBREV_U16_e32:
3986 case AMDGPU::V_SUBREV_U16_e64:
3987 case AMDGPU::V_SUBREV_U16_e32_vi:
3988 case AMDGPU::V_SUBREV_U16_e64_vi:
3989
3990 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3991 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3992 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3993
3994 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3995 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3996
3997 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3998 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3999
4000 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4001 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4002
4003 case AMDGPU::V_LSHRREV_B32_e32:
4004 case AMDGPU::V_LSHRREV_B32_e64:
4005 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4006 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4007 case AMDGPU::V_LSHRREV_B32_e32_vi:
4008 case AMDGPU::V_LSHRREV_B32_e64_vi:
4009 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4010 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4011
4012 case AMDGPU::V_ASHRREV_I32_e32:
4013 case AMDGPU::V_ASHRREV_I32_e64:
4014 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4015 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4016 case AMDGPU::V_ASHRREV_I32_e32_vi:
4017 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4018 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4019 case AMDGPU::V_ASHRREV_I32_e64_vi:
4020
4021 case AMDGPU::V_LSHLREV_B32_e32:
4022 case AMDGPU::V_LSHLREV_B32_e64:
4023 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4024 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4025 case AMDGPU::V_LSHLREV_B32_e32_vi:
4026 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4027 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4028 case AMDGPU::V_LSHLREV_B32_e64_vi:
4029
4030 case AMDGPU::V_LSHLREV_B16_e32:
4031 case AMDGPU::V_LSHLREV_B16_e64:
4032 case AMDGPU::V_LSHLREV_B16_e32_vi:
4033 case AMDGPU::V_LSHLREV_B16_e64_vi:
4034 case AMDGPU::V_LSHLREV_B16_gfx10:
4035
4036 case AMDGPU::V_LSHRREV_B16_e32:
4037 case AMDGPU::V_LSHRREV_B16_e64:
4038 case AMDGPU::V_LSHRREV_B16_e32_vi:
4039 case AMDGPU::V_LSHRREV_B16_e64_vi:
4040 case AMDGPU::V_LSHRREV_B16_gfx10:
4041
4042 case AMDGPU::V_ASHRREV_I16_e32:
4043 case AMDGPU::V_ASHRREV_I16_e64:
4044 case AMDGPU::V_ASHRREV_I16_e32_vi:
4045 case AMDGPU::V_ASHRREV_I16_e64_vi:
4046 case AMDGPU::V_ASHRREV_I16_gfx10:
4047
4048 case AMDGPU::V_LSHLREV_B64_e64:
4049 case AMDGPU::V_LSHLREV_B64_gfx10:
4050 case AMDGPU::V_LSHLREV_B64_vi:
4051
4052 case AMDGPU::V_LSHRREV_B64_e64:
4053 case AMDGPU::V_LSHRREV_B64_gfx10:
4054 case AMDGPU::V_LSHRREV_B64_vi:
4055
4056 case AMDGPU::V_ASHRREV_I64_e64:
4057 case AMDGPU::V_ASHRREV_I64_gfx10:
4058 case AMDGPU::V_ASHRREV_I64_vi:
4059
4060 case AMDGPU::V_PK_LSHLREV_B16:
4061 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4062 case AMDGPU::V_PK_LSHLREV_B16_vi:
4063
4064 case AMDGPU::V_PK_LSHRREV_B16:
4065 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4066 case AMDGPU::V_PK_LSHRREV_B16_vi:
4067 case AMDGPU::V_PK_ASHRREV_I16:
4068 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4069 case AMDGPU::V_PK_ASHRREV_I16_vi:
4070 return true;
4071 default:
4072 return false;
4073 }
4074}
4075
4076std::optional<StringRef>
4077AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4078
4079 using namespace SIInstrFlags;
4080 const unsigned Opcode = Inst.getOpcode();
4081 const MCInstrDesc &Desc = MII.get(Opcode);
4082
4083 // lds_direct register is defined so that it can be used
4084 // with 9-bit operands only. Ignore encodings which do not accept these.
4085 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4086 if ((Desc.TSFlags & Enc) == 0)
4087 return std::nullopt;
4088
4089 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4090 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4091 if (SrcIdx == -1)
4092 break;
4093 const auto &Src = Inst.getOperand(SrcIdx);
4094 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4095
4096 if (isGFX90A() || isGFX11Plus())
4097 return StringRef("lds_direct is not supported on this GPU");
4098
4099 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4100 return StringRef("lds_direct cannot be used with this instruction");
4101
4102 if (SrcName != OpName::src0)
4103 return StringRef("lds_direct may be used as src0 only");
4104 }
4105 }
4106
4107 return std::nullopt;
4108}
4109
4110SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4111 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4112 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4113 if (Op.isFlatOffset())
4114 return Op.getStartLoc();
4115 }
4116 return getLoc();
4117}
4118
4119bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4120 const OperandVector &Operands) {
4121 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4122 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4123 return true;
4124
4125 auto Opcode = Inst.getOpcode();
4126 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4127 assert(OpNum != -1);
4128
4129 const auto &Op = Inst.getOperand(OpNum);
4130 if (!hasFlatOffsets() && Op.getImm() != 0) {
4131 Error(getFlatOffsetLoc(Operands),
4132 "flat offset modifier is not supported on this GPU");
4133 return false;
4134 }
4135
4136 // For FLAT segment the offset must be positive;
4137 // MSB is ignored and forced to zero.
4138 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4139 bool AllowNegative =
4141 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4142 Error(getFlatOffsetLoc(Operands),
4143 Twine("expected a ") +
4144 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4145 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4146 return false;
4147 }
4148
4149 return true;
4150}
4151
4152SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4153 // Start with second operand because SMEM Offset cannot be dst or src0.
4154 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4155 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4156 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4157 return Op.getStartLoc();
4158 }
4159 return getLoc();
4160}
4161
4162bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4163 const OperandVector &Operands) {
4164 if (isCI() || isSI())
4165 return true;
4166
4167 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4168 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4169 return true;
4170
4171 auto Opcode = Inst.getOpcode();
4172 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4173 if (OpNum == -1)
4174 return true;
4175
4176 const auto &Op = Inst.getOperand(OpNum);
4177 if (!Op.isImm())
4178 return true;
4179
4180 uint64_t Offset = Op.getImm();
4181 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4184 return true;
4185
4186 Error(getSMEMOffsetLoc(Operands),
4187 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4188 "expected a 21-bit signed offset");
4189
4190 return false;
4191}
4192
4193bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4194 unsigned Opcode = Inst.getOpcode();
4195 const MCInstrDesc &Desc = MII.get(Opcode);
4196 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4197 return true;
4198
4199 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4200 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4201
4202 const int OpIndices[] = { Src0Idx, Src1Idx };
4203
4204 unsigned NumExprs = 0;
4205 unsigned NumLiterals = 0;
4207
4208 for (int OpIdx : OpIndices) {
4209 if (OpIdx == -1) break;
4210
4211 const MCOperand &MO = Inst.getOperand(OpIdx);
4212 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4213 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4214 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4215 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4216 if (NumLiterals == 0 || LiteralValue != Value) {
4218 ++NumLiterals;
4219 }
4220 } else if (MO.isExpr()) {
4221 ++NumExprs;
4222 }
4223 }
4224 }
4225
4226 return NumLiterals + NumExprs <= 1;
4227}
4228
4229bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4230 const unsigned Opc = Inst.getOpcode();
4231 if (isPermlane16(Opc)) {
4232 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4233 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4234
4235 if (OpSel & ~3)
4236 return false;
4237 }
4238
4239 uint64_t TSFlags = MII.get(Opc).TSFlags;
4240
4241 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4242 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4243 if (OpSelIdx != -1) {
4244 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4245 return false;
4246 }
4247 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4248 if (OpSelHiIdx != -1) {
4249 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4250 return false;
4251 }
4252 }
4253
4254 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4255 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4256 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4257 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4258 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4259 if (OpSel & 3)
4260 return false;
4261 }
4262
4263 return true;
4264}
4265
4266bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4267 const OperandVector &Operands) {
4268 const unsigned Opc = Inst.getOpcode();
4269 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4270 if (DppCtrlIdx >= 0) {
4271 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4272
4273 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4274 AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4275 // DP ALU DPP is supported for row_newbcast only on GFX9*
4276 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4277 Error(S, "DP ALU dpp only supports row_newbcast");
4278 return false;
4279 }
4280 }
4281
4282 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4283 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4284
4285 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4286 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4287 if (Src1Idx >= 0) {
4288 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4289 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4290 if (Src1.isImm() ||
4291 (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI))) {
4292 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[Src1Idx]);
4293 Error(Op.getStartLoc(), "invalid operand for instruction");
4294 return false;
4295 }
4296 }
4297 }
4298
4299 return true;
4300}
4301
4302// Check if VCC register matches wavefront size
4303bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4304 auto FB = getFeatureBits();
4305 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4306 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4307}
4308
4309// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4310bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4311 const OperandVector &Operands) {
4312 unsigned Opcode = Inst.getOpcode();
4313 const MCInstrDesc &Desc = MII.get(Opcode);
4314 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4315 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4316 !HasMandatoryLiteral && !isVOPD(Opcode))
4317 return true;
4318
4319 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4320
4321 unsigned NumExprs = 0;
4322 unsigned NumLiterals = 0;
4324
4325 for (int OpIdx : OpIndices) {
4326 if (OpIdx == -1)
4327 continue;
4328
4329 const MCOperand &MO = Inst.getOperand(OpIdx);
4330 if (!MO.isImm() && !MO.isExpr())
4331 continue;
4332 if (!isSISrcOperand(Desc, OpIdx))
4333 continue;
4334
4335 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4336 uint64_t Value = static_cast<uint64_t>(MO.getImm());
4337 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4338 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4339 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4340
4341 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4342 Error(getLitLoc(Operands), "invalid operand for instruction");
4343 return false;
4344 }
4345
4346 if (IsFP64 && IsValid32Op)
4347 Value = Hi_32(Value);
4348
4349 if (NumLiterals == 0 || LiteralValue != Value) {
4351 ++NumLiterals;
4352 }
4353 } else if (MO.isExpr()) {
4354 ++NumExprs;
4355 }
4356 }
4357 NumLiterals += NumExprs;
4358
4359 if (!NumLiterals)
4360 return true;
4361
4362 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4363 Error(getLitLoc(Operands), "literal operands are not supported");
4364 return false;
4365 }
4366
4367 if (NumLiterals > 1) {
4368 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4369 return false;
4370 }
4371
4372 return true;
4373}
4374
4375// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4376static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4377 const MCRegisterInfo *MRI) {
4378 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4379 if (OpIdx < 0)
4380 return -1;
4381
4382 const MCOperand &Op = Inst.getOperand(OpIdx);
4383 if (!Op.isReg())
4384 return -1;
4385
4386 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4387 auto Reg = Sub ? Sub : Op.getReg();
4388 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4389 return AGPR32.contains(Reg) ? 1 : 0;
4390}
4391
4392bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4393 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4394 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4396 SIInstrFlags::DS)) == 0)
4397 return true;
4398
4399 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4400 : AMDGPU::OpName::vdata;
4401
4402 const MCRegisterInfo *MRI = getMRI();
4403 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4404 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4405
4406 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4407 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4408 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4409 return false;
4410 }
4411
4412 auto FB = getFeatureBits();
4413 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4414 if (DataAreg < 0 || DstAreg < 0)
4415 return true;
4416 return DstAreg == DataAreg;
4417 }
4418
4419 return DstAreg < 1 && DataAreg < 1;
4420}
4421
4422bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4423 auto FB = getFeatureBits();
4424 if (!FB[AMDGPU::FeatureGFX90AInsts])
4425 return true;
4426
4427 const MCRegisterInfo *MRI = getMRI();
4428 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4429 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4430 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4431 const MCOperand &Op = Inst.getOperand(I);
4432 if (!Op.isReg())
4433 continue;
4434
4435 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4436 if (!Sub)
4437 continue;
4438
4439 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4440 return false;
4441 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4442 return false;
4443 }
4444
4445 return true;
4446}
4447
4448SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4449 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4450 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4451 if (Op.isBLGP())
4452 return Op.getStartLoc();
4453 }
4454 return SMLoc();
4455}
4456
4457bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4458 const OperandVector &Operands) {
4459 unsigned Opc = Inst.getOpcode();
4460 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4461 if (BlgpIdx == -1)
4462 return true;
4463 SMLoc BLGPLoc = getBLGPLoc(Operands);
4464 if (!BLGPLoc.isValid())
4465 return true;
4466 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4467 auto FB = getFeatureBits();
4468 bool UsesNeg = false;
4469 if (FB[AMDGPU::FeatureGFX940Insts]) {
4470 switch (Opc) {
4471 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4472 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4473 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4474 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4475 UsesNeg = true;
4476 }
4477 }
4478
4479 if (IsNeg == UsesNeg)
4480 return true;
4481
4482 Error(BLGPLoc,
4483 UsesNeg ? "invalid modifier: blgp is not supported"
4484 : "invalid modifier: neg is not supported");
4485
4486 return false;
4487}
4488
4489bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4490 const OperandVector &Operands) {
4491 if (!isGFX11Plus())
4492 return true;
4493
4494 unsigned Opc = Inst.getOpcode();
4495 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4496 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4497 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4498 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4499 return true;
4500
4501 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4502 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4503 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4504 if (Reg == AMDGPU::SGPR_NULL)
4505 return true;
4506
4507 SMLoc RegLoc = getRegLoc(Reg, Operands);
4508 Error(RegLoc, "src0 must be null");
4509 return false;
4510}
4511
4512bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4513 const OperandVector &Operands) {
4514 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4515 if ((TSFlags & SIInstrFlags::DS) == 0)
4516 return true;
4517 if (TSFlags & SIInstrFlags::GWS)
4518 return validateGWS(Inst, Operands);
4519 // Only validate GDS for non-GWS instructions.
4520 if (hasGDS())
4521 return true;
4522 int GDSIdx =
4523 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4524 if (GDSIdx < 0)
4525 return true;
4526 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4527 if (GDS) {
4528 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4529 Error(S, "gds modifier is not supported on this GPU");
4530 return false;
4531 }
4532 return true;
4533}
4534
4535// gfx90a has an undocumented limitation:
4536// DS_GWS opcodes must use even aligned registers.
4537bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4538 const OperandVector &Operands) {
4539 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4540 return true;
4541
4542 int Opc = Inst.getOpcode();
4543 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4544 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4545 return true;
4546
4547 const MCRegisterInfo *MRI = getMRI();
4548 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4549 int Data0Pos =
4550 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4551 assert(Data0Pos != -1);
4552 auto Reg = Inst.getOperand(Data0Pos).getReg();
4553 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4554 if (RegIdx & 1) {
4555 SMLoc RegLoc = getRegLoc(Reg, Operands);
4556 Error(RegLoc, "vgpr must be even aligned");
4557 return false;
4558 }
4559
4560 return true;
4561}
4562
4563bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4564 const OperandVector &Operands,
4565 const SMLoc &IDLoc) {
4566 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4567 AMDGPU::OpName::cpol);
4568 if (CPolPos == -1)
4569 return true;
4570
4571 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4572
4573 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4574 if (TSFlags & SIInstrFlags::SMRD) {
4575 if (CPol && (isSI() || isCI())) {
4576 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4577 Error(S, "cache policy is not supported for SMRD instructions");
4578 return false;
4579 }
4580 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4581 Error(IDLoc, "invalid cache policy for SMEM instruction");
4582 return false;
4583 }
4584 }
4585
4586 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4587 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4590 if (!(TSFlags & AllowSCCModifier)) {
4591 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4592 StringRef CStr(S.getPointer());
4593 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4594 Error(S,
4595 "scc modifier is not supported for this instruction on this GPU");
4596 return false;
4597 }
4598 }
4599
4601 return true;
4602
4603 if (TSFlags & SIInstrFlags::IsAtomicRet) {
4604 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4605 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4606 : "instruction must use glc");
4607 return false;
4608 }
4609 } else {
4610 if (CPol & CPol::GLC) {
4611 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4612 StringRef CStr(S.getPointer());
4614 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4615 Error(S, isGFX940() ? "instruction must not use sc0"
4616 : "instruction must not use glc");
4617 return false;
4618 }
4619 }
4620
4621 return true;
4622}
4623
4624bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4625 if (!isGFX11Plus())
4626 return true;
4627 for (auto &Operand : Operands) {
4628 if (!Operand->isReg())
4629 continue;
4630 unsigned Reg = Operand->getReg();
4631 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4632 Error(getRegLoc(Reg, Operands),
4633 "execz and vccz are not supported on this GPU");
4634 return false;
4635 }
4636 }
4637 return true;
4638}
4639
4640bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
4641 const OperandVector &Operands) {
4642 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4643 if (Desc.mayStore() &&
4645 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
4646 if (Loc != getInstLoc(Operands)) {
4647 Error(Loc, "TFE modifier has no meaning for store instructions");
4648 return false;
4649 }
4650 }
4651
4652 return true;
4653}
4654
4655bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4656 const SMLoc &IDLoc,
4657 const OperandVector &Operands) {
4658 if (auto ErrMsg = validateLdsDirect(Inst)) {
4659 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4660 return false;
4661 }
4662 if (!validateSOPLiteral(Inst)) {
4663 Error(getLitLoc(Operands),
4664 "only one unique literal operand is allowed");
4665 return false;
4666 }
4667 if (!validateVOPLiteral(Inst, Operands)) {
4668 return false;
4669 }
4670 if (!validateConstantBusLimitations(Inst, Operands)) {
4671 return false;
4672 }
4673 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
4674 return false;
4675 }
4676 if (!validateIntClampSupported(Inst)) {
4677 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4678 "integer clamping is not supported on this GPU");
4679 return false;
4680 }
4681 if (!validateOpSel(Inst)) {
4682 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4683 "invalid op_sel operand");
4684 return false;
4685 }
4686 if (!validateDPP(Inst, Operands)) {
4687 return false;
4688 }
4689 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4690 if (!validateMIMGD16(Inst)) {
4691 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4692 "d16 modifier is not supported on this GPU");
4693 return false;
4694 }
4695 if (!validateMIMGMSAA(Inst)) {
4696 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4697 "invalid dim; must be MSAA type");
4698 return false;
4699 }
4700 if (!validateMIMGDataSize(Inst, IDLoc)) {
4701 return false;
4702 }
4703 if (!validateMIMGAddrSize(Inst, IDLoc))
4704 return false;
4705 if (!validateMIMGAtomicDMask(Inst)) {
4706 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4707 "invalid atomic image dmask");
4708 return false;
4709 }
4710 if (!validateMIMGGatherDMask(Inst)) {
4711 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4712 "invalid image_gather dmask: only one bit must be set");
4713 return false;
4714 }
4715 if (!validateMovrels(Inst, Operands)) {
4716 return false;
4717 }
4718 if (!validateFlatOffset(Inst, Operands)) {
4719 return false;
4720 }
4721 if (!validateSMEMOffset(Inst, Operands)) {
4722 return false;
4723 }
4724 if (!validateMAIAccWrite(Inst, Operands)) {
4725 return false;
4726 }
4727 if (!validateMAISrc2(Inst, Operands)) {
4728 return false;
4729 }
4730 if (!validateMFMA(Inst, Operands)) {
4731 return false;
4732 }
4733 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4734 return false;
4735 }
4736
4737 if (!validateAGPRLdSt(Inst)) {
4738 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4739 ? "invalid register class: data and dst should be all VGPR or AGPR"
4740 : "invalid register class: agpr loads and stores not supported on this GPU"
4741 );
4742 return false;
4743 }
4744 if (!validateVGPRAlign(Inst)) {
4745 Error(IDLoc,
4746 "invalid register class: vgpr tuples must be 64 bit aligned");
4747 return false;
4748 }
4749 if (!validateDS(Inst, Operands)) {
4750 return false;
4751 }
4752
4753 if (!validateBLGP(Inst, Operands)) {
4754 return false;
4755 }
4756
4757 if (!validateDivScale(Inst)) {
4758 Error(IDLoc, "ABS not allowed in VOP3B instructions");
4759 return false;
4760 }
4761 if (!validateWaitCnt(Inst, Operands)) {
4762 return false;
4763 }
4764 if (!validateExeczVcczOperands(Operands)) {
4765 return false;
4766 }
4767 if (!validateTFE(Inst, Operands)) {
4768 return false;
4769 }
4770
4771 return true;
4772}
4773
4775 const FeatureBitset &FBS,
4776 unsigned VariantID = 0);
4777
4778static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4779 const FeatureBitset &AvailableFeatures,
4780 unsigned VariantID);
4781
4782bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4783 const FeatureBitset &FBS) {
4784 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4785}
4786
4787bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4788 const FeatureBitset &FBS,
4789 ArrayRef<unsigned> Variants) {
4790 for (auto Variant : Variants) {
4791 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4792 return true;
4793 }
4794
4795 return false;
4796}
4797
4798bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4799 const SMLoc &IDLoc) {
4800 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
4801
4802 // Check if requested instruction variant is supported.
4803 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4804 return false;
4805
4806 // This instruction is not supported.
4807 // Clear any other pending errors because they are no longer relevant.
4808 getParser().clearPendingErrors();
4809
4810 // Requested instruction variant is not supported.
4811 // Check if any other variants are supported.
4812 StringRef VariantName = getMatchedVariantName();
4813 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4814 return Error(IDLoc,
4815 Twine(VariantName,
4816 " variant of this instruction is not supported"));
4817 }
4818
4819 // Check if this instruction may be used with a different wavesize.
4820 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
4821 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
4822
4823 FeatureBitset FeaturesWS32 = getFeatureBits();
4824 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
4825 .flip(AMDGPU::FeatureWavefrontSize32);
4826 FeatureBitset AvailableFeaturesWS32 =
4827 ComputeAvailableFeatures(FeaturesWS32);
4828
4829 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
4830 return Error(IDLoc, "instruction requires wavesize=32");
4831 }
4832
4833 // Finally check if this instruction is supported on any other GPU.
4834 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4835 return Error(IDLoc, "instruction not supported on this GPU");
4836 }
4837
4838 // Instruction not supported on any GPU. Probably a typo.
4839 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4840 return Error(IDLoc, "invalid instruction" + Suggestion);
4841}
4842
4844 uint64_t InvalidOprIdx) {
4845 assert(InvalidOprIdx < Operands.size());
4846 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
4847 if (Op.isToken() && InvalidOprIdx > 1) {
4848 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
4849 return PrevOp.isToken() && PrevOp.getToken() == "::";
4850 }
4851 return false;
4852}
4853
4854bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4856 MCStreamer &Out,
4858 bool MatchingInlineAsm) {
4859 MCInst Inst;
4860 unsigned Result = Match_Success;
4861 for (auto Variant : getMatchedVariants()) {
4862 uint64_t EI;
4863 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4864 Variant);
4865 // We order match statuses from least to most specific. We use most specific
4866 // status as resulting
4867 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4868 if ((R == Match_Success) ||
4869 (R == Match_PreferE32) ||
4870 (R == Match_MissingFeature && Result != Match_PreferE32) ||
4871 (R == Match_InvalidOperand && Result != Match_MissingFeature
4872 && Result != Match_PreferE32) ||
4873 (R == Match_MnemonicFail && Result != Match_InvalidOperand
4874 && Result != Match_MissingFeature
4875 && Result != Match_PreferE32)) {
4876 Result = R;
4877 ErrorInfo = EI;
4878 }
4879 if (R == Match_Success)
4880 break;
4881 }
4882
4883 if (Result == Match_Success) {
4884 if (!validateInstruction(Inst, IDLoc, Operands)) {
4885 return true;
4886 }
4887 Inst.setLoc(IDLoc);
4888 Out.emitInstruction(Inst, getSTI());
4889 return false;
4890 }
4891
4892 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4893 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4894 return true;
4895 }
4896
4897 switch (Result) {
4898 default: break;
4899 case Match_MissingFeature:
4900 // It has been verified that the specified instruction
4901 // mnemonic is valid. A match was found but it requires
4902 // features which are not supported on this GPU.
4903 return Error(IDLoc, "operands are not valid for this GPU or mode");
4904
4905 case Match_InvalidOperand: {
4906 SMLoc ErrorLoc = IDLoc;
4907 if (ErrorInfo != ~0ULL) {
4908 if (ErrorInfo >= Operands.size()) {
4909 return Error(IDLoc, "too few operands for instruction");
4910 }
4911 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4912 if (ErrorLoc == SMLoc())
4913 ErrorLoc = IDLoc;
4914
4916 return Error(ErrorLoc, "invalid VOPDY instruction");
4917 }
4918 return Error(ErrorLoc, "invalid operand for instruction");
4919 }
4920
4921 case Match_PreferE32:
4922 return Error(IDLoc, "internal error: instruction without _e64 suffix "
4923 "should be encoded as e32");
4924 case Match_MnemonicFail:
4925 llvm_unreachable("Invalid instructions should have been handled already");
4926 }
4927 llvm_unreachable("Implement any new match types added!");
4928}
4929
4930bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4931 int64_t Tmp = -1;
4932 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4933 return true;
4934 }
4935 if (getParser().parseAbsoluteExpression(Tmp)) {
4936 return true;
4937 }
4938 Ret = static_cast<uint32_t>(Tmp);
4939 return false;
4940}
4941
4942bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4943 uint32_t &Minor) {
4944 if (ParseAsAbsoluteExpression(Major))
4945 return TokError("invalid major version");
4946
4947 if (!trySkipToken(AsmToken::Comma))
4948 return TokError("minor version number required, comma expected");
4949
4950 if (ParseAsAbsoluteExpression(Minor))
4951 return TokError("invalid minor version");
4952
4953 return false;
4954}
4955
4956bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4957 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4958 return TokError("directive only supported for amdgcn architecture");
4959
4960 std::string TargetIDDirective;
4961 SMLoc TargetStart = getTok().getLoc();
4962 if (getParser().parseEscapedString(TargetIDDirective))
4963 return true;
4964
4965 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4966 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4967 return getParser().Error(TargetRange.Start,
4968 (Twine(".amdgcn_target directive's target id ") +
4969 Twine(TargetIDDirective) +
4970 Twine(" does not match the specified target id ") +
4971 Twine(getTargetStreamer().getTargetID()->toString())).str());
4972
4973 return false;
4974}
4975
4976bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4977 return Error(Range.Start, "value out of range", Range);
4978}
4979
4980bool AMDGPUAsmParser::calculateGPRBlocks(
4981 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4982 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
4983 unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
4984 SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4985 // TODO(scott.linder): These calculations are duplicated from
4986 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4987 IsaVersion Version = getIsaVersion(getSTI().getCPU());
4988
4989 unsigned NumVGPRs = NextFreeVGPR;
4990 unsigned NumSGPRs = NextFreeSGPR;
4991
4992 if (Version.Major >= 10)
4993 NumSGPRs = 0;
4994 else {
4995 unsigned MaxAddressableNumSGPRs =
4997
4998 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4999 NumSGPRs > MaxAddressableNumSGPRs)
5000 return OutOfRangeError(SGPRRange);
5001
5002 NumSGPRs +=
5003 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
5004
5005 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5006 NumSGPRs > MaxAddressableNumSGPRs)
5007 return OutOfRangeError(SGPRRange);
5008
5009 if (Features.test(FeatureSGPRInitBug))
5011 }
5012
5013 VGPRBlocks =
5014 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
5015 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
5016
5017 return false;
5018}
5019
5020bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5021 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5022 return TokError("directive only supported for amdgcn architecture");
5023
5024 if (!isHsaAbi(getSTI()))
5025 return TokError("directive only supported for amdhsa OS");
5026
5027 StringRef KernelName;
5028 if (getParser().parseIdentifier(KernelName))
5029 return true;
5030
5032
5033 StringSet<> Seen;
5034
5035 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5036
5037 SMRange VGPRRange;
5038 uint64_t NextFreeVGPR = 0;
5039 uint64_t AccumOffset = 0;
5040 uint64_t SharedVGPRCount = 0;
5041 uint64_t PreloadLength = 0;
5042 uint64_t PreloadOffset = 0;
5043 SMRange SGPRRange;
5044 uint64_t NextFreeSGPR = 0;
5045
5046 // Count the number of user SGPRs implied from the enabled feature bits.
5047 unsigned ImpliedUserSGPRCount = 0;
5048
5049 // Track if the asm explicitly contains the directive for the user SGPR
5050 // count.
5051 std::optional<unsigned> ExplicitUserSGPRCount;
5052 bool ReserveVCC = true;
5053 bool ReserveFlatScr = true;
5054 std::optional<bool> EnableWavefrontSize32;
5055
5056 while (true) {
5057 while (trySkipToken(AsmToken::EndOfStatement));
5058
5059 StringRef ID;
5060 SMRange IDRange = getTok().getLocRange();
5061 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5062 return true;
5063
5064 if (ID == ".end_amdhsa_kernel")
5065 break;
5066
5067 if (!Seen.insert(ID).second)
5068 return TokError(".amdhsa_ directives cannot be repeated");
5069
5070 SMLoc ValStart = getLoc();
5071 int64_t IVal;
5072 if (getParser().parseAbsoluteExpression(IVal))
5073 return true;
5074 SMLoc ValEnd = getLoc();
5075 SMRange ValRange = SMRange(ValStart, ValEnd);
5076
5077 if (IVal < 0)
5078 return OutOfRangeError(ValRange);
5079
5080 uint64_t Val = IVal;
5081
5082#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5083 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
5084 return OutOfRangeError(RANGE); \
5085 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
5086
5087 if (ID == ".amdhsa_group_segment_fixed_size") {
5088 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
5089 return OutOfRangeError(ValRange);
5090 KD.group_segment_fixed_size = Val;
5091 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5092 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
5093 return OutOfRangeError(ValRange);
5095 } else if (ID == ".amdhsa_kernarg_size") {
5096 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
5097 return OutOfRangeError(ValRange);
5098 KD.kernarg_size = Val;
5099 } else if (ID == ".amdhsa_user_sgpr_count") {
5100 ExplicitUserSGPRCount = Val;
5101 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5103 return Error(IDRange.Start,
5104 "directive is not supported with architected flat scratch",
5105 IDRange);
5107 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5108 Val, ValRange);
5109 if (Val)
5110 ImpliedUserSGPRCount += 4;
5111 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5112 if (!hasKernargPreload())
5113 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5114
5115 if (Val > getMaxNumUserSGPRs())
5116 return OutOfRangeError(ValRange);
5117 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val,
5118 ValRange);
5119 if (Val) {
5120 ImpliedUserSGPRCount += Val;
5121 PreloadLength = Val;
5122 }
5123 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5124 if (!hasKernargPreload())
5125 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5126
5127 if (Val >= 1024)
5128 return OutOfRangeError(ValRange);
5129 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val,
5130 ValRange);
5131 if (Val)
5132 PreloadOffset = Val;
5133 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5135 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
5136 ValRange);
5137 if (Val)
5138 ImpliedUserSGPRCount += 2;
5139 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5141 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
5142 ValRange);
5143 if (Val)
5144 ImpliedUserSGPRCount += 2;
5145 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5147 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5148 Val, ValRange);
5149 if (Val)
5150 ImpliedUserSGPRCount += 2;
5151 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5153 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
5154 ValRange);
5155 if (Val)
5156 ImpliedUserSGPRCount += 2;
5157 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5159 return Error(IDRange.Start,
5160 "directive is not supported with architected flat scratch",
5161 IDRange);
5163 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5164 ValRange);
5165 if (Val)
5166 ImpliedUserSGPRCount += 2;
5167 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5169 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5170 Val, ValRange);
5171 if (Val)
5172 ImpliedUserSGPRCount += 1;
5173 } else if (ID == ".amdhsa_wavefront_size32") {
5174 if (IVersion.Major < 10)
5175 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5176 EnableWavefrontSize32 = Val;
5178 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5179 Val, ValRange);
5180 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5182 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5183 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5185 return Error(IDRange.Start,
5186 "directive is not supported with architected flat scratch",
5187 IDRange);
5189 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5190 } else if (ID == ".amdhsa_enable_private_segment") {
5192 return Error(
5193 IDRange.Start,
5194 "directive is not supported without architected flat scratch",
5195 IDRange);
5197 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5198 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5200 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5201 ValRange);
5202 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5204 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5205 ValRange);
5206 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5208 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5209 ValRange);
5210 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5212 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5213 ValRange);
5214 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5216 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5217 ValRange);
5218 } else if (ID == ".amdhsa_next_free_vgpr") {
5219 VGPRRange = ValRange;
5220 NextFreeVGPR = Val;
5221 } else if (ID == ".amdhsa_next_free_sgpr") {
5222 SGPRRange = ValRange;
5223 NextFreeSGPR = Val;
5224 } else if (ID == ".amdhsa_accum_offset") {
5225 if (!isGFX90A())
5226 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5227 AccumOffset = Val;
5228 } else if (ID == ".amdhsa_reserve_vcc") {
5229 if (!isUInt<1>(Val))
5230 return OutOfRangeError(ValRange);
5231 ReserveVCC = Val;
5232 } else if (ID == ".amdhsa_reserve_flat_scratch") {
5233 if (IVersion.Major < 7)
5234 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5236 return Error(IDRange.Start,
5237 "directive is not supported with architected flat scratch",
5238 IDRange);
5239 if (!isUInt<1>(Val))
5240 return OutOfRangeError(ValRange);
5241 ReserveFlatScr = Val;
5242 } else if (ID == ".amdhsa_reserve_xnack_mask") {
5243 if (IVersion.Major < 8)
5244 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5245 if (!isUInt<1>(Val))
5246 return OutOfRangeError(ValRange);
5247 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())