LLVM 17.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
12#include "SIDefines.h"
13#include "SIInstrInfo.h"
14#include "SIRegisterInfo.h"
19#include "llvm/ADT/APFloat.h"
21#include "llvm/ADT/StringSet.h"
22#include "llvm/ADT/Twine.h"
24#include "llvm/MC/MCAsmInfo.h"
25#include "llvm/MC/MCContext.h"
26#include "llvm/MC/MCExpr.h"
27#include "llvm/MC/MCInst.h"
28#include "llvm/MC/MCInstrDesc.h"
33#include "llvm/MC/MCSymbol.h"
41#include <optional>
42
43using namespace llvm;
44using namespace llvm::AMDGPU;
45using namespace llvm::amdhsa;
46
47namespace {
48
49class AMDGPUAsmParser;
50
51enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
52
53//===----------------------------------------------------------------------===//
54// Operand
55//===----------------------------------------------------------------------===//
56
57class AMDGPUOperand : public MCParsedAsmOperand {
58 enum KindTy {
59 Token,
60 Immediate,
63 } Kind;
64
65 SMLoc StartLoc, EndLoc;
66 const AMDGPUAsmParser *AsmParser;
67
68public:
69 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
70 : Kind(Kind_), AsmParser(AsmParser_) {}
71
72 using Ptr = std::unique_ptr<AMDGPUOperand>;
73
74 struct Modifiers {
75 bool Abs = false;
76 bool Neg = false;
77 bool Sext = false;
78
79 bool hasFPModifiers() const { return Abs || Neg; }
80 bool hasIntModifiers() const { return Sext; }
81 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
82
83 int64_t getFPModifiersOperand() const {
84 int64_t Operand = 0;
85 Operand |= Abs ? SISrcMods::ABS : 0u;
86 Operand |= Neg ? SISrcMods::NEG : 0u;
87 return Operand;
88 }
89
90 int64_t getIntModifiersOperand() const {
91 int64_t Operand = 0;
92 Operand |= Sext ? SISrcMods::SEXT : 0u;
93 return Operand;
94 }
95
96 int64_t getModifiersOperand() const {
97 assert(!(hasFPModifiers() && hasIntModifiers())
98 && "fp and int modifiers should not be used simultaneously");
99 if (hasFPModifiers()) {
100 return getFPModifiersOperand();
101 } else if (hasIntModifiers()) {
102 return getIntModifiersOperand();
103 } else {
104 return 0;
105 }
106 }
107
108 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
109 };
110
111 enum ImmTy {
112 ImmTyNone,
113 ImmTyGDS,
114 ImmTyLDS,
115 ImmTyOffen,
116 ImmTyIdxen,
117 ImmTyAddr64,
118 ImmTyOffset,
119 ImmTyInstOffset,
120 ImmTyOffset0,
121 ImmTyOffset1,
122 ImmTySMEMOffsetMod,
123 ImmTyCPol,
124 ImmTySWZ,
125 ImmTyTFE,
126 ImmTyD16,
127 ImmTyClampSI,
128 ImmTyOModSI,
129 ImmTySdwaDstSel,
130 ImmTySdwaSrc0Sel,
131 ImmTySdwaSrc1Sel,
132 ImmTySdwaDstUnused,
133 ImmTyDMask,
134 ImmTyDim,
135 ImmTyUNorm,
136 ImmTyDA,
137 ImmTyR128A16,
138 ImmTyA16,
139 ImmTyLWE,
140 ImmTyExpTgt,
141 ImmTyExpCompr,
142 ImmTyExpVM,
143 ImmTyFORMAT,
144 ImmTyHwreg,
145 ImmTyOff,
146 ImmTySendMsg,
147 ImmTyInterpSlot,
148 ImmTyInterpAttr,
149 ImmTyAttrChan,
150 ImmTyOpSel,
151 ImmTyOpSelHi,
152 ImmTyNegLo,
153 ImmTyNegHi,
154 ImmTyDPP8,
155 ImmTyDppCtrl,
156 ImmTyDppRowMask,
157 ImmTyDppBankMask,
158 ImmTyDppBoundCtrl,
159 ImmTyDppFi,
160 ImmTySwizzle,
161 ImmTyGprIdxMode,
162 ImmTyHigh,
163 ImmTyBLGP,
164 ImmTyCBSZ,
165 ImmTyABID,
166 ImmTyEndpgm,
167 ImmTyWaitVDST,
168 ImmTyWaitEXP,
169 };
170
171 // Immediate operand kind.
172 // It helps to identify the location of an offending operand after an error.
173 // Note that regular literals and mandatory literals (KImm) must be handled
174 // differently. When looking for an offending operand, we should usually
175 // ignore mandatory literals because they are part of the instruction and
176 // cannot be changed. Report location of mandatory operands only for VOPD,
177 // when both OpX and OpY have a KImm and there are no other literals.
178 enum ImmKindTy {
179 ImmKindTyNone,
180 ImmKindTyLiteral,
181 ImmKindTyMandatoryLiteral,
182 ImmKindTyConst,
183 };
184
185private:
186 struct TokOp {
187 const char *Data;
188 unsigned Length;
189 };
190
191 struct ImmOp {
192 int64_t Val;
193 ImmTy Type;
194 bool IsFPImm;
195 mutable ImmKindTy Kind;
196 Modifiers Mods;
197 };
198
199 struct RegOp {
200 unsigned RegNo;
201 Modifiers Mods;
202 };
203
204 union {
205 TokOp Tok;
206 ImmOp Imm;
207 RegOp Reg;
208 const MCExpr *Expr;
209 };
210
211public:
212 bool isToken() const override { return Kind == Token; }
213
214 bool isSymbolRefExpr() const {
215 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
216 }
217
218 bool isImm() const override {
219 return Kind == Immediate;
220 }
221
222 void setImmKindNone() const {
223 assert(isImm());
224 Imm.Kind = ImmKindTyNone;
225 }
226
227 void setImmKindLiteral() const {
228 assert(isImm());
229 Imm.Kind = ImmKindTyLiteral;
230 }
231
232 void setImmKindMandatoryLiteral() const {
233 assert(isImm());
234 Imm.Kind = ImmKindTyMandatoryLiteral;
235 }
236
237 void setImmKindConst() const {
238 assert(isImm());
239 Imm.Kind = ImmKindTyConst;
240 }
241
242 bool IsImmKindLiteral() const {
243 return isImm() && Imm.Kind == ImmKindTyLiteral;
244 }
245
246 bool IsImmKindMandatoryLiteral() const {
247 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
248 }
249
250 bool isImmKindConst() const {
251 return isImm() && Imm.Kind == ImmKindTyConst;
252 }
253
254 bool isInlinableImm(MVT type) const;
255 bool isLiteralImm(MVT type) const;
256
257 bool isRegKind() const {
258 return Kind == Register;
259 }
260
261 bool isReg() const override {
262 return isRegKind() && !hasModifiers();
263 }
264
265 bool isRegOrInline(unsigned RCID, MVT type) const {
266 return isRegClass(RCID) || isInlinableImm(type);
267 }
268
269 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
270 return isRegOrInline(RCID, type) || isLiteralImm(type);
271 }
272
273 bool isRegOrImmWithInt16InputMods() const {
274 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
275 }
276
277 bool isRegOrImmWithInt32InputMods() const {
278 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
279 }
280
281 bool isRegOrInlineImmWithInt16InputMods() const {
282 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
283 }
284
285 bool isRegOrInlineImmWithInt32InputMods() const {
286 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
287 }
288
289 bool isRegOrImmWithInt64InputMods() const {
290 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
291 }
292
293 bool isRegOrImmWithFP16InputMods() const {
294 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
295 }
296
297 bool isRegOrImmWithFP32InputMods() const {
298 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
299 }
300
301 bool isRegOrImmWithFP64InputMods() const {
302 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
303 }
304
305 bool isRegOrInlineImmWithFP16InputMods() const {
306 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
307 }
308
309 bool isRegOrInlineImmWithFP32InputMods() const {
310 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
311 }
312
313
314 bool isVReg() const {
315 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
316 isRegClass(AMDGPU::VReg_64RegClassID) ||
317 isRegClass(AMDGPU::VReg_96RegClassID) ||
318 isRegClass(AMDGPU::VReg_128RegClassID) ||
319 isRegClass(AMDGPU::VReg_160RegClassID) ||
320 isRegClass(AMDGPU::VReg_192RegClassID) ||
321 isRegClass(AMDGPU::VReg_256RegClassID) ||
322 isRegClass(AMDGPU::VReg_512RegClassID) ||
323 isRegClass(AMDGPU::VReg_1024RegClassID);
324 }
325
326 bool isVReg32() const {
327 return isRegClass(AMDGPU::VGPR_32RegClassID);
328 }
329
330 bool isVReg32OrOff() const {
331 return isOff() || isVReg32();
332 }
333
334 bool isNull() const {
335 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
336 }
337
338 bool isVRegWithInputMods() const;
339 bool isT16VRegWithInputMods() const;
340
341 bool isSDWAOperand(MVT type) const;
342 bool isSDWAFP16Operand() const;
343 bool isSDWAFP32Operand() const;
344 bool isSDWAInt16Operand() const;
345 bool isSDWAInt32Operand() const;
346
347 bool isImmTy(ImmTy ImmT) const {
348 return isImm() && Imm.Type == ImmT;
349 }
350
351 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
352
353 bool isImmModifier() const {
354 return isImm() && Imm.Type != ImmTyNone;
355 }
356
357 bool isClampSI() const { return isImmTy(ImmTyClampSI); }
358 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
359 bool isDMask() const { return isImmTy(ImmTyDMask); }
360 bool isDim() const { return isImmTy(ImmTyDim); }
361 bool isUNorm() const { return isImmTy(ImmTyUNorm); }
362 bool isDA() const { return isImmTy(ImmTyDA); }
363 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
364 bool isA16() const { return isImmTy(ImmTyA16); }
365 bool isLWE() const { return isImmTy(ImmTyLWE); }
366 bool isOff() const { return isImmTy(ImmTyOff); }
367 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
368 bool isExpVM() const { return isImmTy(ImmTyExpVM); }
369 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
370 bool isOffen() const { return isImmTy(ImmTyOffen); }
371 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
372 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
373 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
374 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
375 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
376 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
377 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
378 bool isGDS() const { return isImmTy(ImmTyGDS); }
379 bool isLDS() const { return isImmTy(ImmTyLDS); }
380 bool isCPol() const { return isImmTy(ImmTyCPol); }
381 bool isSWZ() const { return isImmTy(ImmTySWZ); }
382 bool isTFE() const { return isImmTy(ImmTyTFE); }
383 bool isD16() const { return isImmTy(ImmTyD16); }
384 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
385 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
386 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
387 bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
388 bool isFI() const { return isImmTy(ImmTyDppFi); }
389 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
390 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
391 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
392 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
393 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
394 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
395 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
396 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
397 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
398 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
399 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
400 bool isHigh() const { return isImmTy(ImmTyHigh); }
401
402 bool isRegOrImm() const {
403 return isReg() || isImm();
404 }
405
406 bool isRegClass(unsigned RCID) const;
407
408 bool isInlineValue() const;
409
410 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
411 return isRegOrInline(RCID, type) && !hasModifiers();
412 }
413
414 bool isSCSrcB16() const {
415 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
416 }
417
418 bool isSCSrcV2B16() const {
419 return isSCSrcB16();
420 }
421
422 bool isSCSrcB32() const {
423 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
424 }
425
426 bool isSCSrcB64() const {
427 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
428 }
429
430 bool isBoolReg() const;
431
432 bool isSCSrcF16() const {
433 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
434 }
435
436 bool isSCSrcV2F16() const {
437 return isSCSrcF16();
438 }
439
440 bool isSCSrcF32() const {
441 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
442 }
443
444 bool isSCSrcF64() const {
445 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
446 }
447
448 bool isSSrcB32() const {
449 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
450 }
451
452 bool isSSrcB16() const {
453 return isSCSrcB16() || isLiteralImm(MVT::i16);
454 }
455
456 bool isSSrcV2B16() const {
457 llvm_unreachable("cannot happen");
458 return isSSrcB16();
459 }
460
461 bool isSSrcB64() const {
462 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
463 // See isVSrc64().
464 return isSCSrcB64() || isLiteralImm(MVT::i64);
465 }
466
467 bool isSSrcF32() const {
468 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
469 }
470
471 bool isSSrcF64() const {
472 return isSCSrcB64() || isLiteralImm(MVT::f64);
473 }
474
475 bool isSSrcF16() const {
476 return isSCSrcB16() || isLiteralImm(MVT::f16);
477 }
478
479 bool isSSrcV2F16() const {
480 llvm_unreachable("cannot happen");
481 return isSSrcF16();
482 }
483
484 bool isSSrcV2FP32() const {
485 llvm_unreachable("cannot happen");
486 return isSSrcF32();
487 }
488
489 bool isSCSrcV2FP32() const {
490 llvm_unreachable("cannot happen");
491 return isSCSrcF32();
492 }
493
494 bool isSSrcV2INT32() const {
495 llvm_unreachable("cannot happen");
496 return isSSrcB32();
497 }
498
499 bool isSCSrcV2INT32() const {
500 llvm_unreachable("cannot happen");
501 return isSCSrcB32();
502 }
503
504 bool isSSrcOrLdsB32() const {
505 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
506 isLiteralImm(MVT::i32) || isExpr();
507 }
508
509 bool isVCSrcB32() const {
510 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
511 }
512
513 bool isVCSrcB64() const {
514 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
515 }
516
517 bool isVCSrcTB16_Lo128() const {
518 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
519 }
520
521 bool isVCSrcB16() const {
522 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
523 }
524
525 bool isVCSrcV2B16() const {
526 return isVCSrcB16();
527 }
528
529 bool isVCSrcF32() const {
530 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
531 }
532
533 bool isVCSrcF64() const {
534 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
535 }
536
537 bool isVCSrcTF16_Lo128() const {
538 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
539 }
540
541 bool isVCSrcF16() const {
542 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
543 }
544
545 bool isVCSrcV2F16() const {
546 return isVCSrcF16();
547 }
548
549 bool isVSrcB32() const {
550 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
551 }
552
553 bool isVSrcB64() const {
554 return isVCSrcF64() || isLiteralImm(MVT::i64);
555 }
556
557 bool isVSrcTB16_Lo128() const {
558 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
559 }
560
561 bool isVSrcB16() const {
562 return isVCSrcB16() || isLiteralImm(MVT::i16);
563 }
564
565 bool isVSrcV2B16() const {
566 return isVSrcB16() || isLiteralImm(MVT::v2i16);
567 }
568
569 bool isVCSrcV2FP32() const {
570 return isVCSrcF64();
571 }
572
573 bool isVSrcV2FP32() const {
574 return isVSrcF64() || isLiteralImm(MVT::v2f32);
575 }
576
577 bool isVCSrcV2INT32() const {
578 return isVCSrcB64();
579 }
580
581 bool isVSrcV2INT32() const {
582 return isVSrcB64() || isLiteralImm(MVT::v2i32);
583 }
584
585 bool isVSrcF32() const {
586 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
587 }
588
589 bool isVSrcF64() const {
590 return isVCSrcF64() || isLiteralImm(MVT::f64);
591 }
592
593 bool isVSrcTF16_Lo128() const {
594 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
595 }
596
597 bool isVSrcF16() const {
598 return isVCSrcF16() || isLiteralImm(MVT::f16);
599 }
600
601 bool isVSrcV2F16() const {
602 return isVSrcF16() || isLiteralImm(MVT::v2f16);
603 }
604
605 bool isVISrcB32() const {
606 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
607 }
608
609 bool isVISrcB16() const {
610 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
611 }
612
613 bool isVISrcV2B16() const {
614 return isVISrcB16();
615 }
616
617 bool isVISrcF32() const {
618 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
619 }
620
621 bool isVISrcF16() const {
622 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
623 }
624
625 bool isVISrcV2F16() const {
626 return isVISrcF16() || isVISrcB32();
627 }
628
629 bool isVISrc_64B64() const {
630 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
631 }
632
633 bool isVISrc_64F64() const {
634 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
635 }
636
637 bool isVISrc_64V2FP32() const {
638 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
639 }
640
641 bool isVISrc_64V2INT32() const {
642 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
643 }
644
645 bool isVISrc_256B64() const {
646 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
647 }
648
649 bool isVISrc_256F64() const {
650 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
651 }
652
653 bool isVISrc_128B16() const {
654 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
655 }
656
657 bool isVISrc_128V2B16() const {
658 return isVISrc_128B16();
659 }
660
661 bool isVISrc_128B32() const {
662 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
663 }
664
665 bool isVISrc_128F32() const {
666 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
667 }
668
669 bool isVISrc_256V2FP32() const {
670 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
671 }
672
673 bool isVISrc_256V2INT32() const {
674 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
675 }
676
677 bool isVISrc_512B32() const {
678 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
679 }
680
681 bool isVISrc_512B16() const {
682 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
683 }
684
685 bool isVISrc_512V2B16() const {
686 return isVISrc_512B16();
687 }
688
689 bool isVISrc_512F32() const {
690 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
691 }
692
693 bool isVISrc_512F16() const {
694 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
695 }
696
697 bool isVISrc_512V2F16() const {
698 return isVISrc_512F16() || isVISrc_512B32();
699 }
700
701 bool isVISrc_1024B32() const {
702 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
703 }
704
705 bool isVISrc_1024B16() const {
706 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
707 }
708
709 bool isVISrc_1024V2B16() const {
710 return isVISrc_1024B16();
711 }
712
713 bool isVISrc_1024F32() const {
714 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
715 }
716
717 bool isVISrc_1024F16() const {
718 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
719 }
720
721 bool isVISrc_1024V2F16() const {
722 return isVISrc_1024F16() || isVISrc_1024B32();
723 }
724
725 bool isAISrcB32() const {
726 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
727 }
728
729 bool isAISrcB16() const {
730 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
731 }
732
733 bool isAISrcV2B16() const {
734 return isAISrcB16();
735 }
736
737 bool isAISrcF32() const {
738 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
739 }
740
741 bool isAISrcF16() const {
742 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
743 }
744
745 bool isAISrcV2F16() const {
746 return isAISrcF16() || isAISrcB32();
747 }
748
749 bool isAISrc_64B64() const {
750 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
751 }
752
753 bool isAISrc_64F64() const {
754 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
755 }
756
757 bool isAISrc_128B32() const {
758 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
759 }
760
761 bool isAISrc_128B16() const {
762 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
763 }
764
765 bool isAISrc_128V2B16() const {
766 return isAISrc_128B16();
767 }
768
769 bool isAISrc_128F32() const {
770 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
771 }
772
773 bool isAISrc_128F16() const {
774 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
775 }
776
777 bool isAISrc_128V2F16() const {
778 return isAISrc_128F16() || isAISrc_128B32();
779 }
780
781 bool isVISrc_128F16() const {
782 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
783 }
784
785 bool isVISrc_128V2F16() const {
786 return isVISrc_128F16() || isVISrc_128B32();
787 }
788
789 bool isAISrc_256B64() const {
790 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
791 }
792
793 bool isAISrc_256F64() const {
794 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
795 }
796
797 bool isAISrc_512B32() const {
798 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
799 }
800
801 bool isAISrc_512B16() const {
802 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
803 }
804
805 bool isAISrc_512V2B16() const {
806 return isAISrc_512B16();
807 }
808
809 bool isAISrc_512F32() const {
810 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
811 }
812
813 bool isAISrc_512F16() const {
814 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
815 }
816
817 bool isAISrc_512V2F16() const {
818 return isAISrc_512F16() || isAISrc_512B32();
819 }
820
821 bool isAISrc_1024B32() const {
822 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
823 }
824
825 bool isAISrc_1024B16() const {
826 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
827 }
828
829 bool isAISrc_1024V2B16() const {
830 return isAISrc_1024B16();
831 }
832
833 bool isAISrc_1024F32() const {
834 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
835 }
836
837 bool isAISrc_1024F16() const {
838 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
839 }
840
841 bool isAISrc_1024V2F16() const {
842 return isAISrc_1024F16() || isAISrc_1024B32();
843 }
844
845 bool isKImmFP32() const {
846 return isLiteralImm(MVT::f32);
847 }
848
849 bool isKImmFP16() const {
850 return isLiteralImm(MVT::f16);
851 }
852
853 bool isMem() const override {
854 return false;
855 }
856
857 bool isExpr() const {
858 return Kind == Expression;
859 }
860
861 bool isSoppBrTarget() const {
862 return isExpr() || isImm();
863 }
864
865 bool isSWaitCnt() const;
866 bool isDepCtr() const;
867 bool isSDelayAlu() const;
868 bool isHwreg() const;
869 bool isSendMsg() const;
870 bool isSwizzle() const;
871 bool isSMRDOffset8() const;
872 bool isSMEMOffset() const;
873 bool isSMRDLiteralOffset() const;
874 bool isDPP8() const;
875 bool isDPPCtrl() const;
876 bool isBLGP() const;
877 bool isCBSZ() const;
878 bool isABID() const;
879 bool isGPRIdxMode() const;
880 bool isS16Imm() const;
881 bool isU16Imm() const;
882 bool isEndpgm() const;
883 bool isWaitVDST() const;
884 bool isWaitEXP() const;
885
886 StringRef getToken() const {
887 assert(isToken());
888 return StringRef(Tok.Data, Tok.Length);
889 }
890
891 int64_t getImm() const {
892 assert(isImm());
893 return Imm.Val;
894 }
895
896 void setImm(int64_t Val) {
897 assert(isImm());
898 Imm.Val = Val;
899 }
900
901 ImmTy getImmTy() const {
902 assert(isImm());
903 return Imm.Type;
904 }
905
906 unsigned getReg() const override {
907 assert(isRegKind());
908 return Reg.RegNo;
909 }
910
911 SMLoc getStartLoc() const override {
912 return StartLoc;
913 }
914
915 SMLoc getEndLoc() const override {
916 return EndLoc;
917 }
918
919 SMRange getLocRange() const {
920 return SMRange(StartLoc, EndLoc);
921 }
922
923 Modifiers getModifiers() const {
924 assert(isRegKind() || isImmTy(ImmTyNone));
925 return isRegKind() ? Reg.Mods : Imm.Mods;
926 }
927
928 void setModifiers(Modifiers Mods) {
929 assert(isRegKind() || isImmTy(ImmTyNone));
930 if (isRegKind())
931 Reg.Mods = Mods;
932 else
933 Imm.Mods = Mods;
934 }
935
936 bool hasModifiers() const {
937 return getModifiers().hasModifiers();
938 }
939
940 bool hasFPModifiers() const {
941 return getModifiers().hasFPModifiers();
942 }
943
944 bool hasIntModifiers() const {
945 return getModifiers().hasIntModifiers();
946 }
947
948 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
949
950 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
951
952 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
953
954 template <unsigned Bitwidth>
955 void addKImmFPOperands(MCInst &Inst, unsigned N) const;
956
957 void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
958 addKImmFPOperands<16>(Inst, N);
959 }
960
961 void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
962 addKImmFPOperands<32>(Inst, N);
963 }
964
965 void addRegOperands(MCInst &Inst, unsigned N) const;
966
967 void addBoolRegOperands(MCInst &Inst, unsigned N) const {
968 addRegOperands(Inst, N);
969 }
970
971 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
972 if (isRegKind())
973 addRegOperands(Inst, N);
974 else if (isExpr())
976 else
977 addImmOperands(Inst, N);
978 }
979
980 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
981 Modifiers Mods = getModifiers();
982 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
983 if (isRegKind()) {
984 addRegOperands(Inst, N);
985 } else {
986 addImmOperands(Inst, N, false);
987 }
988 }
989
990 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
991 assert(!hasIntModifiers());
992 addRegOrImmWithInputModsOperands(Inst, N);
993 }
994
995 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
996 assert(!hasFPModifiers());
997 addRegOrImmWithInputModsOperands(Inst, N);
998 }
999
1000 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1001 Modifiers Mods = getModifiers();
1002 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1003 assert(isRegKind());
1004 addRegOperands(Inst, N);
1005 }
1006
1007 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1008 assert(!hasIntModifiers());
1009 addRegWithInputModsOperands(Inst, N);
1010 }
1011
1012 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1013 assert(!hasFPModifiers());
1014 addRegWithInputModsOperands(Inst, N);
1015 }
1016
1017 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
1018 if (isImm())
1019 addImmOperands(Inst, N);
1020 else {
1021 assert(isExpr());
1023 }
1024 }
1025
1026 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1027 switch (Type) {
1028 case ImmTyNone: OS << "None"; break;
1029 case ImmTyGDS: OS << "GDS"; break;
1030 case ImmTyLDS: OS << "LDS"; break;
1031 case ImmTyOffen: OS << "Offen"; break;
1032 case ImmTyIdxen: OS << "Idxen"; break;
1033 case ImmTyAddr64: OS << "Addr64"; break;
1034 case ImmTyOffset: OS << "Offset"; break;
1035 case ImmTyInstOffset: OS << "InstOffset"; break;
1036 case ImmTyOffset0: OS << "Offset0"; break;
1037 case ImmTyOffset1: OS << "Offset1"; break;
1038 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1039 case ImmTyCPol: OS << "CPol"; break;
1040 case ImmTySWZ: OS << "SWZ"; break;
1041 case ImmTyTFE: OS << "TFE"; break;
1042 case ImmTyD16: OS << "D16"; break;
1043 case ImmTyFORMAT: OS << "FORMAT"; break;
1044 case ImmTyClampSI: OS << "ClampSI"; break;
1045 case ImmTyOModSI: OS << "OModSI"; break;
1046 case ImmTyDPP8: OS << "DPP8"; break;
1047 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1048 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1049 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1050 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1051 case ImmTyDppFi: OS << "FI"; break;
1052 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1053 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1054 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1055 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1056 case ImmTyDMask: OS << "DMask"; break;
1057 case ImmTyDim: OS << "Dim"; break;
1058 case ImmTyUNorm: OS << "UNorm"; break;
1059 case ImmTyDA: OS << "DA"; break;
1060 case ImmTyR128A16: OS << "R128A16"; break;
1061 case ImmTyA16: OS << "A16"; break;
1062 case ImmTyLWE: OS << "LWE"; break;
1063 case ImmTyOff: OS << "Off"; break;
1064 case ImmTyExpTgt: OS << "ExpTgt"; break;
1065 case ImmTyExpCompr: OS << "ExpCompr"; break;
1066 case ImmTyExpVM: OS << "ExpVM"; break;
1067 case ImmTyHwreg: OS << "Hwreg"; break;
1068 case ImmTySendMsg: OS << "SendMsg"; break;
1069 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1070 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1071 case ImmTyAttrChan: OS << "AttrChan"; break;
1072 case ImmTyOpSel: OS << "OpSel"; break;
1073 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1074 case ImmTyNegLo: OS << "NegLo"; break;
1075 case ImmTyNegHi: OS << "NegHi"; break;
1076 case ImmTySwizzle: OS << "Swizzle"; break;
1077 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1078 case ImmTyHigh: OS << "High"; break;
1079 case ImmTyBLGP: OS << "BLGP"; break;
1080 case ImmTyCBSZ: OS << "CBSZ"; break;
1081 case ImmTyABID: OS << "ABID"; break;
1082 case ImmTyEndpgm: OS << "Endpgm"; break;
1083 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1084 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1085 }
1086 }
1087
1088 void print(raw_ostream &OS) const override {
1089 switch (Kind) {
1090 case Register:
1091 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1092 break;
1093 case Immediate:
1094 OS << '<' << getImm();
1095 if (getImmTy() != ImmTyNone) {
1096 OS << " type: "; printImmTy(OS, getImmTy());
1097 }
1098 OS << " mods: " << Imm.Mods << '>';
1099 break;
1100 case Token:
1101 OS << '\'' << getToken() << '\'';
1102 break;
1103 case Expression:
1104 OS << "<expr " << *Expr << '>';
1105 break;
1106 }
1107 }
1108
1109 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1110 int64_t Val, SMLoc Loc,
1111 ImmTy Type = ImmTyNone,
1112 bool IsFPImm = false) {
1113 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1114 Op->Imm.Val = Val;
1115 Op->Imm.IsFPImm = IsFPImm;
1116 Op->Imm.Kind = ImmKindTyNone;
1117 Op->Imm.Type = Type;
1118 Op->Imm.Mods = Modifiers();
1119 Op->StartLoc = Loc;
1120 Op->EndLoc = Loc;
1121 return Op;
1122 }
1123
1124 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1125 StringRef Str, SMLoc Loc,
1126 bool HasExplicitEncodingSize = true) {
1127 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1128 Res->Tok.Data = Str.data();
1129 Res->Tok.Length = Str.size();
1130 Res->StartLoc = Loc;
1131 Res->EndLoc = Loc;
1132 return Res;
1133 }
1134
1135 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1136 unsigned RegNo, SMLoc S,
1137 SMLoc E) {
1138 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1139 Op->Reg.RegNo = RegNo;
1140 Op->Reg.Mods = Modifiers();
1141 Op->StartLoc = S;
1142 Op->EndLoc = E;
1143 return Op;
1144 }
1145
1146 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1147 const class MCExpr *Expr, SMLoc S) {
1148 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1149 Op->Expr = Expr;
1150 Op->StartLoc = S;
1151 Op->EndLoc = S;
1152 return Op;
1153 }
1154};
1155
1156raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1157 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1158 return OS;
1159}
1160
1161//===----------------------------------------------------------------------===//
1162// AsmParser
1163//===----------------------------------------------------------------------===//
1164
1165// Holds info related to the current kernel, e.g. count of SGPRs used.
1166// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1167// .amdgpu_hsa_kernel or at EOF.
1168class KernelScopeInfo {
1169 int SgprIndexUnusedMin = -1;
1170 int VgprIndexUnusedMin = -1;
1171 int AgprIndexUnusedMin = -1;
1172 MCContext *Ctx = nullptr;
1173 MCSubtargetInfo const *MSTI = nullptr;
1174
1175 void usesSgprAt(int i) {
1176 if (i >= SgprIndexUnusedMin) {
1177 SgprIndexUnusedMin = ++i;
1178 if (Ctx) {
1179 MCSymbol* const Sym =
1180 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1181 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1182 }
1183 }
1184 }
1185
1186 void usesVgprAt(int i) {
1187 if (i >= VgprIndexUnusedMin) {
1188 VgprIndexUnusedMin = ++i;
1189 if (Ctx) {
1190 MCSymbol* const Sym =
1191 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1192 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1193 VgprIndexUnusedMin);
1194 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1195 }
1196 }
1197 }
1198
1199 void usesAgprAt(int i) {
1200 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1201 if (!hasMAIInsts(*MSTI))
1202 return;
1203
1204 if (i >= AgprIndexUnusedMin) {
1205 AgprIndexUnusedMin = ++i;
1206 if (Ctx) {
1207 MCSymbol* const Sym =
1208 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1209 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1210
1211 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1212 MCSymbol* const vSym =
1213 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1214 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1215 VgprIndexUnusedMin);
1216 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1217 }
1218 }
1219 }
1220
1221public:
1222 KernelScopeInfo() = default;
1223
1224 void initialize(MCContext &Context) {
1225 Ctx = &Context;
1226 MSTI = Ctx->getSubtargetInfo();
1227
1228 usesSgprAt(SgprIndexUnusedMin = -1);
1229 usesVgprAt(VgprIndexUnusedMin = -1);
1230 if (hasMAIInsts(*MSTI)) {
1231 usesAgprAt(AgprIndexUnusedMin = -1);
1232 }
1233 }
1234
1235 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1236 unsigned RegWidth) {
1237 switch (RegKind) {
1238 case IS_SGPR:
1239 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1240 break;
1241 case IS_AGPR:
1242 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1243 break;
1244 case IS_VGPR:
1245 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1246 break;
1247 default:
1248 break;
1249 }
1250 }
1251};
1252
1253class AMDGPUAsmParser : public MCTargetAsmParser {
1254 MCAsmParser &Parser;
1255
1256 unsigned ForcedEncodingSize = 0;
1257 bool ForcedDPP = false;
1258 bool ForcedSDWA = false;
1259 KernelScopeInfo KernelScope;
1260
1261 /// @name Auto-generated Match Functions
1262 /// {
1263
1264#define GET_ASSEMBLER_HEADER
1265#include "AMDGPUGenAsmMatcher.inc"
1266
1267 /// }
1268
1269private:
1270 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1271 bool OutOfRangeError(SMRange Range);
1272 /// Calculate VGPR/SGPR blocks required for given target, reserved
1273 /// registers, and user-specified NextFreeXGPR values.
1274 ///
1275 /// \param Features [in] Target features, used for bug corrections.
1276 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1277 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1278 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1279 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1280 /// descriptor field, if valid.
1281 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1282 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1283 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1284 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1285 /// \param VGPRBlocks [out] Result VGPR block count.
1286 /// \param SGPRBlocks [out] Result SGPR block count.
1287 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1288 bool FlatScrUsed, bool XNACKUsed,
1289 std::optional<bool> EnableWavefrontSize32,
1290 unsigned NextFreeVGPR, SMRange VGPRRange,
1291 unsigned NextFreeSGPR, SMRange SGPRRange,
1292 unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1293 bool ParseDirectiveAMDGCNTarget();
1294 bool ParseDirectiveAMDHSAKernel();
1295 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1296 bool ParseDirectiveHSACodeObjectVersion();
1297 bool ParseDirectiveHSACodeObjectISA();
1298 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1299 bool ParseDirectiveAMDKernelCodeT();
1300 // TODO: Possibly make subtargetHasRegister const.
1301 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1302 bool ParseDirectiveAMDGPUHsaKernel();
1303
1304 bool ParseDirectiveISAVersion();
1305 bool ParseDirectiveHSAMetadata();
1306 bool ParseDirectivePALMetadataBegin();
1307 bool ParseDirectivePALMetadata();
1308 bool ParseDirectiveAMDGPULDS();
1309
1310 /// Common code to parse out a block of text (typically YAML) between start and
1311 /// end directives.
1312 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1313 const char *AssemblerDirectiveEnd,
1314 std::string &CollectString);
1315
1316 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1317 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1318 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1319 unsigned &RegNum, unsigned &RegWidth,
1320 bool RestoreOnFailure = false);
1321 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1322 unsigned &RegNum, unsigned &RegWidth,
1324 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1325 unsigned &RegWidth,
1327 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1328 unsigned &RegWidth,
1330 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1331 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1332 bool ParseRegRange(unsigned& Num, unsigned& Width);
1333 unsigned getRegularReg(RegisterKind RegKind,
1334 unsigned RegNum,
1335 unsigned RegWidth,
1336 SMLoc Loc);
1337
1338 bool isRegister();
1339 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1340 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1341 void initializeGprCountSymbol(RegisterKind RegKind);
1342 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1343 unsigned RegWidth);
1344 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1345 bool IsAtomic);
1346 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1347 bool IsGdsHardcoded);
1348
1349public:
1350 enum AMDGPUMatchResultTy {
1351 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1352 };
1353 enum OperandMode {
1354 OperandMode_Default,
1355 OperandMode_NSA,
1356 };
1357
1358 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1359
1360 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1361 const MCInstrInfo &MII,
1362 const MCTargetOptions &Options)
1363 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1365
1366 if (getFeatureBits().none()) {
1367 // Set default features.
1368 copySTI().ToggleFeature("southern-islands");
1369 }
1370
1371 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1372
1373 {
1374 // TODO: make those pre-defined variables read-only.
1375 // Currently there is none suitable machinery in the core llvm-mc for this.
1376 // MCSymbol::isRedefinable is intended for another purpose, and
1377 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1379 MCContext &Ctx = getContext();
1380 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1381 MCSymbol *Sym =
1382 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1384 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1386 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1387 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1388 } else {
1389 MCSymbol *Sym =
1390 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1392 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1394 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1395 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1396 }
1397 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1398 initializeGprCountSymbol(IS_VGPR);
1399 initializeGprCountSymbol(IS_SGPR);
1400 } else
1401 KernelScope.initialize(getContext());
1402 }
1403 }
1404
1405 bool hasMIMG_R128() const {
1406 return AMDGPU::hasMIMG_R128(getSTI());
1407 }
1408
1409 bool hasPackedD16() const {
1410 return AMDGPU::hasPackedD16(getSTI());
1411 }
1412
1413 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1414
1415 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1416
1417 bool isSI() const {
1418 return AMDGPU::isSI(getSTI());
1419 }
1420
1421 bool isCI() const {
1422 return AMDGPU::isCI(getSTI());
1423 }
1424
1425 bool isVI() const {
1426 return AMDGPU::isVI(getSTI());
1427 }
1428
1429 bool isGFX9() const {
1430 return AMDGPU::isGFX9(getSTI());
1431 }
1432
1433 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1434 bool isGFX90A() const {
1435 return AMDGPU::isGFX90A(getSTI());
1436 }
1437
1438 bool isGFX940() const {
1439 return AMDGPU::isGFX940(getSTI());
1440 }
1441
1442 bool isGFX9Plus() const {
1443 return AMDGPU::isGFX9Plus(getSTI());
1444 }
1445
1446 bool isGFX10() const {
1447 return AMDGPU::isGFX10(getSTI());
1448 }
1449
1450 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1451
1452 bool isGFX11() const {
1453 return AMDGPU::isGFX11(getSTI());
1454 }
1455
1456 bool isGFX11Plus() const {
1457 return AMDGPU::isGFX11Plus(getSTI());
1458 }
1459
1460 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1461
1462 bool isGFX10_BEncoding() const {
1464 }
1465
1466 bool hasInv2PiInlineImm() const {
1467 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1468 }
1469
1470 bool hasFlatOffsets() const {
1471 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1472 }
1473
1474 bool hasArchitectedFlatScratch() const {
1475 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1476 }
1477
1478 bool hasSGPR102_SGPR103() const {
1479 return !isVI() && !isGFX9();
1480 }
1481
1482 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1483
1484 bool hasIntClamp() const {
1485 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1486 }
1487
1488 bool hasPartialNSAEncoding() const {
1489 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1490 }
1491
1492 unsigned getNSAMaxSize() const {
1493 return AMDGPU::getNSAMaxSize(getSTI());
1494 }
1495
1496 AMDGPUTargetStreamer &getTargetStreamer() {
1498 return static_cast<AMDGPUTargetStreamer &>(TS);
1499 }
1500
1501 const MCRegisterInfo *getMRI() const {
1502 // We need this const_cast because for some reason getContext() is not const
1503 // in MCAsmParser.
1504 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1505 }
1506
1507 const MCInstrInfo *getMII() const {
1508 return &MII;
1509 }
1510
1511 const FeatureBitset &getFeatureBits() const {
1512 return getSTI().getFeatureBits();
1513 }
1514
1515 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1516 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1517 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1518
1519 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1520 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1521 bool isForcedDPP() const { return ForcedDPP; }
1522 bool isForcedSDWA() const { return ForcedSDWA; }
1523 ArrayRef<unsigned> getMatchedVariants() const;
1524 StringRef getMatchedVariantName() const;
1525
1526 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1527 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1528 bool RestoreOnFailure);
1529 bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1530 SMLoc &EndLoc) override;
1532 SMLoc &EndLoc) override;
1533 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1535 unsigned Kind) override;
1536 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1539 bool MatchingInlineAsm) override;
1540 bool ParseDirective(AsmToken DirectiveID) override;
1541 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1542 OperandMode Mode = OperandMode_Default);
1543 StringRef parseMnemonicSuffix(StringRef Name);
1545 SMLoc NameLoc, OperandVector &Operands) override;
1546 //bool ProcessInstruction(MCInst &Inst);
1547
1549
1550 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1551
1553 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1554 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1555 bool (*ConvertResult)(int64_t &) = nullptr);
1556
1558 parseOperandArrayWithPrefix(const char *Prefix,
1560 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1561 bool (*ConvertResult)(int64_t&) = nullptr);
1562
1564 parseNamedBit(StringRef Name, OperandVector &Operands,
1565 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1566 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1568 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1570 SMLoc &StringLoc);
1571
1572 bool isModifier();
1573 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1574 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1575 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1576 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1577 bool parseSP3NegModifier();
1578 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1580 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1581 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1582 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1583 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1584 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1585 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1586 OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1587 OperandMatchResultTy parseUfmt(int64_t &Format);
1588 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1589 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1591 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1592 OperandMatchResultTy parseNumericFormat(int64_t &Format);
1595 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1596 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1597
1598 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1599 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1600 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1601 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1602
1603 bool parseCnt(int64_t &IntVal);
1604 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1605
1606 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1607 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1609
1610 bool parseDelay(int64_t &Delay);
1611 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1612
1614
1615private:
1616 struct OperandInfoTy {
1617 SMLoc Loc;
1618 int64_t Id;
1619 bool IsSymbolic = false;
1620 bool IsDefined = false;
1621
1622 OperandInfoTy(int64_t Id_) : Id(Id_) {}
1623 };
1624
1625 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1626 bool validateSendMsg(const OperandInfoTy &Msg,
1627 const OperandInfoTy &Op,
1628 const OperandInfoTy &Stream);
1629
1630 bool parseHwregBody(OperandInfoTy &HwReg,
1631 OperandInfoTy &Offset,
1632 OperandInfoTy &Width);
1633 bool validateHwreg(const OperandInfoTy &HwReg,
1634 const OperandInfoTy &Offset,
1635 const OperandInfoTy &Width);
1636
1637 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1638 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1639 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1640
1641 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1642 const OperandVector &Operands) const;
1643 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1644 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1645 SMLoc getLitLoc(const OperandVector &Operands,
1646 bool SearchMandatoryLiterals = false) const;
1647 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1648 SMLoc getConstLoc(const OperandVector &Operands) const;
1649 SMLoc getInstLoc(const OperandVector &Operands) const;
1650
1651 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1652 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1653 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1654 bool validateSOPLiteral(const MCInst &Inst) const;
1655 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1656 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1657 const OperandVector &Operands);
1658 bool validateIntClampSupported(const MCInst &Inst);
1659 bool validateMIMGAtomicDMask(const MCInst &Inst);
1660 bool validateMIMGGatherDMask(const MCInst &Inst);
1661 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1662 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1663 bool validateMIMGAddrSize(const MCInst &Inst);
1664 bool validateMIMGD16(const MCInst &Inst);
1665 bool validateMIMGMSAA(const MCInst &Inst);
1666 bool validateOpSel(const MCInst &Inst);
1667 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1668 bool validateVccOperand(unsigned Reg) const;
1669 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1670 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1671 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1672 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1673 bool validateAGPRLdSt(const MCInst &Inst) const;
1674 bool validateVGPRAlign(const MCInst &Inst) const;
1675 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1676 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1677 bool validateDivScale(const MCInst &Inst);
1678 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1679 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1680 const SMLoc &IDLoc);
1681 bool validateExeczVcczOperands(const OperandVector &Operands);
1682 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1683 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1684 unsigned getConstantBusLimit(unsigned Opcode) const;
1685 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1686 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1687 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1688
1689 bool isSupportedMnemo(StringRef Mnemo,
1690 const FeatureBitset &FBS);
1691 bool isSupportedMnemo(StringRef Mnemo,
1692 const FeatureBitset &FBS,
1693 ArrayRef<unsigned> Variants);
1694 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1695
1696 bool isId(const StringRef Id) const;
1697 bool isId(const AsmToken &Token, const StringRef Id) const;
1698 bool isToken(const AsmToken::TokenKind Kind) const;
1699 StringRef getId() const;
1700 bool trySkipId(const StringRef Id);
1701 bool trySkipId(const StringRef Pref, const StringRef Id);
1702 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1703 bool trySkipToken(const AsmToken::TokenKind Kind);
1704 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1705 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1706 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1707
1708 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1709 AsmToken::TokenKind getTokenKind() const;
1710 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1711 bool parseExpr(OperandVector &Operands);
1712 StringRef getTokenStr() const;
1713 AsmToken peekToken(bool ShouldSkipSpace = true);
1714 AsmToken getToken() const;
1715 SMLoc getLoc() const;
1716 void lex();
1717
1718public:
1719 void onBeginOfFile() override;
1720
1721 OperandMatchResultTy parseCustomOperand(OperandVector &Operands,
1722 unsigned MCK);
1723
1728 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1730
1731 bool parseSwizzleOperand(int64_t &Op,
1732 const unsigned MinVal,
1733 const unsigned MaxVal,
1734 const StringRef ErrMsg,
1735 SMLoc &Loc);
1736 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1737 const unsigned MinVal,
1738 const unsigned MaxVal,
1739 const StringRef ErrMsg);
1741 bool parseSwizzleOffset(int64_t &Imm);
1742 bool parseSwizzleMacro(int64_t &Imm);
1743 bool parseSwizzleQuadPerm(int64_t &Imm);
1744 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1745 bool parseSwizzleBroadcast(int64_t &Imm);
1746 bool parseSwizzleSwap(int64_t &Imm);
1747 bool parseSwizzleReverse(int64_t &Imm);
1748
1750 int64_t parseGPRIdxMacro();
1751
1752 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1753 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1754 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1755
1756 AMDGPUOperand::Ptr defaultCPol() const;
1757
1758 AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1759 AMDGPUOperand::Ptr defaultSMEMOffset() const;
1760 AMDGPUOperand::Ptr defaultSMEMOffsetMod() const;
1761 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1762 AMDGPUOperand::Ptr defaultFlatOffset() const;
1763
1764 OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1765
1766 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1767 OptionalImmIndexMap &OptionalIdx);
1768 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1769 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1770 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1771 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1772 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1773 OptionalImmIndexMap &OptionalIdx);
1774 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1775 OptionalImmIndexMap &OptionalIdx);
1776
1777 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1778 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1779
1780 void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1781 bool IsAtomic = false);
1782 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1783 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1784
1785 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1786
1787 bool parseDimId(unsigned &Encoding);
1791 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1792 int64_t parseDPPCtrlSel(StringRef Ctrl);
1793 int64_t parseDPPCtrlPerm();
1794 AMDGPUOperand::Ptr defaultRowMask() const;
1795 AMDGPUOperand::Ptr defaultBankMask() const;
1796 AMDGPUOperand::Ptr defaultDppBoundCtrl() const;
1797 AMDGPUOperand::Ptr defaultFI() const;
1798 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1799 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1800 cvtDPP(Inst, Operands, true);
1801 }
1802 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1803 bool IsDPP8 = false);
1804 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1805 cvtVOP3DPP(Inst, Operands, true);
1806 }
1807
1809 AMDGPUOperand::ImmTy Type);
1810 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1811 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1812 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1813 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1814 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1815 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1816 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1817 uint64_t BasicInstType,
1818 bool SkipDstVcc = false,
1819 bool SkipSrcVcc = false);
1820
1821 AMDGPUOperand::Ptr defaultBLGP() const;
1822 AMDGPUOperand::Ptr defaultCBSZ() const;
1823 AMDGPUOperand::Ptr defaultABID() const;
1824
1826 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1827
1828 AMDGPUOperand::Ptr defaultWaitVDST() const;
1829 AMDGPUOperand::Ptr defaultWaitEXP() const;
1831};
1832
1833} // end anonymous namespace
1834
1835// May be called with integer type with equivalent bitwidth.
1836static const fltSemantics *getFltSemantics(unsigned Size) {
1837 switch (Size) {
1838 case 4:
1839 return &APFloat::IEEEsingle();
1840 case 8:
1841 return &APFloat::IEEEdouble();
1842 case 2:
1843 return &APFloat::IEEEhalf();
1844 default:
1845 llvm_unreachable("unsupported fp type");
1846 }
1847}
1848
1850 return getFltSemantics(VT.getSizeInBits() / 8);
1851}
1852
1854 switch (OperandType) {
1855 case AMDGPU::OPERAND_REG_IMM_INT32:
1856 case AMDGPU::OPERAND_REG_IMM_FP32:
1857 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1858 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1859 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1860 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1861 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1862 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1863 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1864 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1865 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1866 case AMDGPU::OPERAND_KIMM32:
1867 return &APFloat::IEEEsingle();
1868 case AMDGPU::OPERAND_REG_IMM_INT64:
1869 case AMDGPU::OPERAND_REG_IMM_FP64:
1870 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1871 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1872 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1873 return &APFloat::IEEEdouble();
1874 case AMDGPU::OPERAND_REG_IMM_INT16:
1875 case AMDGPU::OPERAND_REG_IMM_FP16:
1876 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1877 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1878 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1879 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1880 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1881 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1882 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1883 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1884 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1885 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1886 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1887 case AMDGPU::OPERAND_KIMM16:
1888 return &APFloat::IEEEhalf();
1889 default:
1890 llvm_unreachable("unsupported fp type");
1891 }
1892}
1893
1894//===----------------------------------------------------------------------===//
1895// Operand
1896//===----------------------------------------------------------------------===//
1897
1898static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1899 bool Lost;
1900
1901 // Convert literal to single precision
1903 APFloat::rmNearestTiesToEven,
1904 &Lost);
1905 // We allow precision lost but not overflow or underflow
1906 if (Status != APFloat::opOK &&
1907 Lost &&
1908 ((Status & APFloat::opOverflow) != 0 ||
1909 (Status & APFloat::opUnderflow) != 0)) {
1910 return false;
1911 }
1912
1913 return true;
1914}
1915
1916static bool isSafeTruncation(int64_t Val, unsigned Size) {
1917 return isUIntN(Size, Val) || isIntN(Size, Val);
1918}
1919
1920static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1921 if (VT.getScalarType() == MVT::i16) {
1922 // FP immediate values are broken.
1923 return isInlinableIntLiteral(Val);
1924 }
1925
1926 // f16/v2f16 operands work correctly for all values.
1927 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1928}
1929
1930bool AMDGPUOperand::isInlinableImm(MVT type) const {
1931
1932 // This is a hack to enable named inline values like
1933 // shared_base with both 32-bit and 64-bit operands.
1934 // Note that these values are defined as
1935 // 32-bit operands only.
1936 if (isInlineValue()) {
1937 return true;
1938 }
1939
1940 if (!isImmTy(ImmTyNone)) {
1941 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1942 return false;
1943 }
1944 // TODO: We should avoid using host float here. It would be better to
1945 // check the float bit values which is what a few other places do.
1946 // We've had bot failures before due to weird NaN support on mips hosts.
1947
1948 APInt Literal(64, Imm.Val);
1949
1950 if (Imm.IsFPImm) { // We got fp literal token
1951 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1952 return AMDGPU::isInlinableLiteral64(Imm.Val,
1953 AsmParser->hasInv2PiInlineImm());
1954 }
1955
1956 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1957 if (!canLosslesslyConvertToFPType(FPLiteral, type))
1958 return false;
1959
1960 if (type.getScalarSizeInBits() == 16) {
1962 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1963 type, AsmParser->hasInv2PiInlineImm());
1964 }
1965
1966 // Check if single precision literal is inlinable
1967 return AMDGPU::isInlinableLiteral32(
1968 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1969 AsmParser->hasInv2PiInlineImm());
1970 }
1971
1972 // We got int literal token.
1973 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1974 return AMDGPU::isInlinableLiteral64(Imm.Val,
1975 AsmParser->hasInv2PiInlineImm());
1976 }
1977
1978 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1979 return false;
1980 }
1981
1982 if (type.getScalarSizeInBits() == 16) {
1984 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1985 type, AsmParser->hasInv2PiInlineImm());
1986 }
1987
1988 return AMDGPU::isInlinableLiteral32(
1989 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1990 AsmParser->hasInv2PiInlineImm());
1991}
1992
1993bool AMDGPUOperand::isLiteralImm(MVT type) const {
1994 // Check that this immediate can be added as literal
1995 if (!isImmTy(ImmTyNone)) {
1996 return false;
1997 }
1998
1999 if (!Imm.IsFPImm) {
2000 // We got int literal token.
2001
2002 if (type == MVT::f64 && hasFPModifiers()) {
2003 // Cannot apply fp modifiers to int literals preserving the same semantics
2004 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2005 // disable these cases.
2006 return false;
2007 }
2008
2009 unsigned Size = type.getSizeInBits();
2010 if (Size == 64)
2011 Size = 32;
2012
2013 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2014 // types.
2015 return isSafeTruncation(Imm.Val, Size);
2016 }
2017
2018 // We got fp literal token
2019 if (type == MVT::f64) { // Expected 64-bit fp operand
2020 // We would set low 64-bits of literal to zeroes but we accept this literals
2021 return true;
2022 }
2023
2024 if (type == MVT::i64) { // Expected 64-bit int operand
2025 // We don't allow fp literals in 64-bit integer instructions. It is
2026 // unclear how we should encode them.
2027 return false;
2028 }
2029
2030 // We allow fp literals with f16x2 operands assuming that the specified
2031 // literal goes into the lower half and the upper half is zero. We also
2032 // require that the literal may be losslessly converted to f16.
2033 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
2034 (type == MVT::v2i16)? MVT::i16 :
2035 (type == MVT::v2f32)? MVT::f32 : type;
2036
2037 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2038 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2039}
2040
2041bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2042 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2043}
2044
2045bool AMDGPUOperand::isVRegWithInputMods() const {
2046 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2047 // GFX90A allows DPP on 64-bit operands.
2048 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2049 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
2050}
2051
2052bool AMDGPUOperand::isT16VRegWithInputMods() const {
2053 return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID);
2054}
2055
2056bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2057 if (AsmParser->isVI())
2058 return isVReg32();
2059 else if (AsmParser->isGFX9Plus())
2060 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2061 else
2062 return false;
2063}
2064
2065bool AMDGPUOperand::isSDWAFP16Operand() const {
2066 return isSDWAOperand(MVT::f16);
2067}
2068
2069bool AMDGPUOperand::isSDWAFP32Operand() const {
2070 return isSDWAOperand(MVT::f32);
2071}
2072
2073bool AMDGPUOperand::isSDWAInt16Operand() const {
2074 return isSDWAOperand(MVT::i16);
2075}
2076
2077bool AMDGPUOperand::isSDWAInt32Operand() const {
2078 return isSDWAOperand(MVT::i32);
2079}
2080
2081bool AMDGPUOperand::isBoolReg() const {
2082 auto FB = AsmParser->getFeatureBits();
2083 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2084 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2085}
2086
2087uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2088{
2089 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2090 assert(Size == 2 || Size == 4 || Size == 8);
2091
2092 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2093
2094 if (Imm.Mods.Abs) {
2095 Val &= ~FpSignMask;
2096 }
2097 if (Imm.Mods.Neg) {
2098 Val ^= FpSignMask;
2099 }
2100
2101 return Val;
2102}
2103
2104void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2105 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2106 Inst.getNumOperands())) {
2107 addLiteralImmOperand(Inst, Imm.Val,
2108 ApplyModifiers &
2109 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2110 } else {
2111 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2113 setImmKindNone();
2114 }
2115}
2116
2117void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2118 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2119 auto OpNum = Inst.getNumOperands();
2120 // Check that this operand accepts literals
2121 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2122
2123 if (ApplyModifiers) {
2124 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2125 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2126 Val = applyInputFPModifiers(Val, Size);
2127 }
2128
2129 APInt Literal(64, Val);
2130 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2131
2132 if (Imm.IsFPImm) { // We got fp literal token
2133 switch (OpTy) {
2134 case AMDGPU::OPERAND_REG_IMM_INT64:
2135 case AMDGPU::OPERAND_REG_IMM_FP64:
2136 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2137 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2138 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2139 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2140 AsmParser->hasInv2PiInlineImm())) {
2141 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2142 setImmKindConst();
2143 return;
2144 }
2145
2146 // Non-inlineable
2147 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2148 // For fp operands we check if low 32 bits are zeros
2149 if (Literal.getLoBits(32) != 0) {
2150 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2151 "Can't encode literal as exact 64-bit floating-point operand. "
2152 "Low 32-bits will be set to zero");
2153 }
2154
2155 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2156 setImmKindLiteral();
2157 return;
2158 }
2159
2160 // We don't allow fp literals in 64-bit integer instructions. It is
2161 // unclear how we should encode them. This case should be checked earlier
2162 // in predicate methods (isLiteralImm())
2163 llvm_unreachable("fp literal in 64-bit integer instruction.");
2164
2165 case AMDGPU::OPERAND_REG_IMM_INT32:
2166 case AMDGPU::OPERAND_REG_IMM_FP32:
2167 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2168 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2169 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2170 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2171 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2172 case AMDGPU::OPERAND_REG_IMM_INT16:
2173 case AMDGPU::OPERAND_REG_IMM_FP16:
2174 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2175 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2176 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2177 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2178 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2179 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2180 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2181 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2182 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2183 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2184 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2185 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2186 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2187 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2188 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2189 case AMDGPU::OPERAND_KIMM32:
2190 case AMDGPU::OPERAND_KIMM16: {
2191 bool lost;
2192 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2193 // Convert literal to single precision
2194 FPLiteral.convert(*getOpFltSemantics(OpTy),
2195 APFloat::rmNearestTiesToEven, &lost);
2196 // We allow precision lost but not overflow or underflow. This should be
2197 // checked earlier in isLiteralImm()
2198
2199 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2200 Inst.addOperand(MCOperand::createImm(ImmVal));
2201 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2202 setImmKindMandatoryLiteral();
2203 } else {
2204 setImmKindLiteral();
2205 }
2206 return;
2207 }
2208 default:
2209 llvm_unreachable("invalid operand size");
2210 }
2211
2212 return;
2213 }
2214
2215 // We got int literal token.
2216 // Only sign extend inline immediates.
2217 switch (OpTy) {
2218 case AMDGPU::OPERAND_REG_IMM_INT32:
2219 case AMDGPU::OPERAND_REG_IMM_FP32:
2220 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2221 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2222 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2223 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2224 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2225 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2226 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2227 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2228 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2229 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2230 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2231 if (isSafeTruncation(Val, 32) &&
2232 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2233 AsmParser->hasInv2PiInlineImm())) {
2235 setImmKindConst();
2236 return;
2237 }
2238
2239 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2240 setImmKindLiteral();
2241 return;
2242
2243 case AMDGPU::OPERAND_REG_IMM_INT64:
2244 case AMDGPU::OPERAND_REG_IMM_FP64:
2245 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2246 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2247 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2248 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2250 setImmKindConst();
2251 return;
2252 }
2253
2255 setImmKindLiteral();
2256 return;
2257
2258 case AMDGPU::OPERAND_REG_IMM_INT16:
2259 case AMDGPU::OPERAND_REG_IMM_FP16:
2260 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2261 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2262 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2263 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2264 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2265 if (isSafeTruncation(Val, 16) &&
2266 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2267 AsmParser->hasInv2PiInlineImm())) {
2269 setImmKindConst();
2270 return;
2271 }
2272
2273 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2274 setImmKindLiteral();
2275 return;
2276
2277 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2278 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2279 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2280 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2281 assert(isSafeTruncation(Val, 16));
2282 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2283 AsmParser->hasInv2PiInlineImm()));
2284
2286 return;
2287 }
2288 case AMDGPU::OPERAND_KIMM32:
2289 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2290 setImmKindMandatoryLiteral();
2291 return;
2292 case AMDGPU::OPERAND_KIMM16:
2293 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2294 setImmKindMandatoryLiteral();
2295 return;
2296 default:
2297 llvm_unreachable("invalid operand size");
2298 }
2299}
2300
2301template <unsigned Bitwidth>
2302void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2303 APInt Literal(64, Imm.Val);
2304 setImmKindMandatoryLiteral();
2305
2306 if (!Imm.IsFPImm) {
2307 // We got int literal token.
2308 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2309 return;
2310 }
2311
2312 bool Lost;
2313 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2314 FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2315 APFloat::rmNearestTiesToEven, &Lost);
2316 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2317}
2318
2319void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2320 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2321}
2322
2323bool AMDGPUOperand::isInlineValue() const {
2324 return isRegKind() && ::isInlineValue(getReg());
2325}
2326
2327//===----------------------------------------------------------------------===//
2328// AsmParser
2329//===----------------------------------------------------------------------===//
2330
2331static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2332 if (Is == IS_VGPR) {
2333 switch (RegWidth) {
2334 default: return -1;
2335 case 32:
2336 return AMDGPU::VGPR_32RegClassID;
2337 case 64:
2338 return AMDGPU::VReg_64RegClassID;
2339 case 96:
2340 return AMDGPU::VReg_96RegClassID;
2341 case 128:
2342 return AMDGPU::VReg_128RegClassID;
2343 case 160:
2344 return AMDGPU::VReg_160RegClassID;
2345 case 192:
2346 return AMDGPU::VReg_192RegClassID;
2347 case 224:
2348 return AMDGPU::VReg_224RegClassID;
2349 case 256:
2350 return AMDGPU::VReg_256RegClassID;
2351 case 288:
2352 return AMDGPU::VReg_288RegClassID;
2353 case 320:
2354 return AMDGPU::VReg_320RegClassID;
2355 case 352:
2356 return AMDGPU::VReg_352RegClassID;
2357 case 384:
2358 return AMDGPU::VReg_384RegClassID;
2359 case 512:
2360 return AMDGPU::VReg_512RegClassID;
2361 case 1024:
2362 return AMDGPU::VReg_1024RegClassID;
2363 }
2364 } else if (Is == IS_TTMP) {
2365 switch (RegWidth) {
2366 default: return -1;
2367 case 32:
2368 return AMDGPU::TTMP_32RegClassID;
2369 case 64:
2370 return AMDGPU::TTMP_64RegClassID;
2371 case 128:
2372 return AMDGPU::TTMP_128RegClassID;
2373 case 256:
2374 return AMDGPU::TTMP_256RegClassID;
2375 case 512:
2376 return AMDGPU::TTMP_512RegClassID;
2377 }
2378 } else if (Is == IS_SGPR) {
2379 switch (RegWidth) {
2380 default: return -1;
2381 case 32:
2382 return AMDGPU::SGPR_32RegClassID;
2383 case 64:
2384 return AMDGPU::SGPR_64RegClassID;
2385 case 96:
2386 return AMDGPU::SGPR_96RegClassID;
2387 case 128:
2388 return AMDGPU::SGPR_128RegClassID;
2389 case 160:
2390 return AMDGPU::SGPR_160RegClassID;
2391 case 192:
2392 return AMDGPU::SGPR_192RegClassID;
2393 case 224:
2394 return AMDGPU::SGPR_224RegClassID;
2395 case 256:
2396 return AMDGPU::SGPR_256RegClassID;
2397 case 288:
2398 return AMDGPU::SGPR_288RegClassID;
2399 case 320:
2400 return AMDGPU::SGPR_320RegClassID;
2401 case 352:
2402 return AMDGPU::SGPR_352RegClassID;
2403 case 384:
2404 return AMDGPU::SGPR_384RegClassID;
2405 case 512:
2406 return AMDGPU::SGPR_512RegClassID;
2407 }
2408 } else if (Is == IS_AGPR) {
2409 switch (RegWidth) {
2410 default: return -1;
2411 case 32:
2412 return AMDGPU::AGPR_32RegClassID;
2413 case 64:
2414 return AMDGPU::AReg_64RegClassID;
2415 case 96:
2416 return AMDGPU::AReg_96RegClassID;
2417 case 128:
2418 return AMDGPU::AReg_128RegClassID;
2419 case 160:
2420 return AMDGPU::AReg_160RegClassID;
2421 case 192:
2422 return AMDGPU::AReg_192RegClassID;
2423 case 224:
2424 return AMDGPU::AReg_224RegClassID;
2425 case 256:
2426 return AMDGPU::AReg_256RegClassID;
2427 case 288:
2428 return AMDGPU::AReg_288RegClassID;
2429 case 320:
2430 return AMDGPU::AReg_320RegClassID;
2431 case 352:
2432 return AMDGPU::AReg_352RegClassID;
2433 case 384:
2434 return AMDGPU::AReg_384RegClassID;
2435 case 512:
2436 return AMDGPU::AReg_512RegClassID;
2437 case 1024:
2438 return AMDGPU::AReg_1024RegClassID;
2439 }
2440 }
2441 return -1;
2442}
2443
2446 .Case("exec", AMDGPU::EXEC)
2447 .Case("vcc", AMDGPU::VCC)
2448 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2449 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2450 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2451 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2452 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2453 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2454 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2455 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2456 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2457 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2458 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2459 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2460 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2461 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2462 .Case("m0", AMDGPU::M0)
2463 .Case("vccz", AMDGPU::SRC_VCCZ)
2464 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2465 .Case("execz", AMDGPU::SRC_EXECZ)
2466 .Case("src_execz", AMDGPU::SRC_EXECZ)
2467 .Case("scc", AMDGPU::SRC_SCC)
2468 .Case("src_scc", AMDGPU::SRC_SCC)
2469 .Case("tba", AMDGPU::TBA)
2470 .Case("tma", AMDGPU::TMA)
2471 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2472 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2473 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2474 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2475 .Case("vcc_lo", AMDGPU::VCC_LO)
2476 .Case("vcc_hi", AMDGPU::VCC_HI)
2477 .Case("exec_lo", AMDGPU::EXEC_LO)
2478 .Case("exec_hi", AMDGPU::EXEC_HI)
2479 .Case("tma_lo", AMDGPU::TMA_LO)
2480 .Case("tma_hi", AMDGPU::TMA_HI)
2481 .Case("tba_lo", AMDGPU::TBA_LO)
2482 .Case("tba_hi", AMDGPU::TBA_HI)
2483 .Case("pc", AMDGPU::PC_REG)
2484 .Case("null", AMDGPU::SGPR_NULL)
2485 .Default(AMDGPU::NoRegister);
2486}
2487
2488bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2489 SMLoc &EndLoc, bool RestoreOnFailure) {
2490 auto R = parseRegister();
2491 if (!R) return true;
2492 assert(R->isReg());
2493 RegNo = R->getReg();
2494 StartLoc = R->getStartLoc();
2495 EndLoc = R->getEndLoc();
2496 return false;
2497}
2498
2499bool AMDGPUAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2500 SMLoc &EndLoc) {
2501 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2502}
2503
2504OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(MCRegister &RegNo,
2505 SMLoc &StartLoc,
2506 SMLoc &EndLoc) {
2507 bool Result =
2508 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2509 bool PendingErrors = getParser().hasPendingError();
2510 getParser().clearPendingErrors();
2511 if (PendingErrors)
2513 if (Result)
2514 return MatchOperand_NoMatch;
2515 return MatchOperand_Success;
2516}
2517
2518bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2519 RegisterKind RegKind, unsigned Reg1,
2520 SMLoc Loc) {
2521 switch (RegKind) {
2522 case IS_SPECIAL:
2523 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2524 Reg = AMDGPU::EXEC;
2525 RegWidth = 64;
2526 return true;
2527 }
2528 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2529 Reg = AMDGPU::FLAT_SCR;
2530 RegWidth = 64;
2531 return true;
2532 }
2533 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2534 Reg = AMDGPU::XNACK_MASK;
2535 RegWidth = 64;
2536 return true;
2537 }
2538 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2539 Reg = AMDGPU::VCC;
2540 RegWidth = 64;
2541 return true;
2542 }
2543 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2544 Reg = AMDGPU::TBA;
2545 RegWidth = 64;
2546 return true;
2547 }
2548 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2549 Reg = AMDGPU::TMA;
2550 RegWidth = 64;
2551 return true;
2552 }
2553 Error(Loc, "register does not fit in the list");
2554 return false;
2555 case IS_VGPR:
2556 case IS_SGPR:
2557 case IS_AGPR:
2558 case IS_TTMP:
2559 if (Reg1 != Reg + RegWidth / 32) {
2560 Error(Loc, "registers in a list must have consecutive indices");
2561 return false;
2562 }
2563 RegWidth += 32;
2564 return true;
2565 default:
2566 llvm_unreachable("unexpected register kind");
2567 }
2568}
2569
2570struct RegInfo {
2572 RegisterKind Kind;
2573};
2574
2575static constexpr RegInfo RegularRegisters[] = {
2576 {{"v"}, IS_VGPR},
2577 {{"s"}, IS_SGPR},
2578 {{"ttmp"}, IS_TTMP},
2579 {{"acc"}, IS_AGPR},
2580 {{"a"}, IS_AGPR},
2581};
2582
2583static bool isRegularReg(RegisterKind Kind) {
2584 return Kind == IS_VGPR ||
2585 Kind == IS_SGPR ||
2586 Kind == IS_TTMP ||
2587 Kind == IS_AGPR;
2588}
2589
2591 for (const RegInfo &Reg : RegularRegisters)
2592 if (Str.startswith(Reg.Name))
2593 return &Reg;
2594 return nullptr;
2595}
2596
2597static bool getRegNum(StringRef Str, unsigned& Num) {
2598 return !Str.getAsInteger(10, Num);
2599}
2600
2601bool
2602AMDGPUAsmParser::isRegister(const AsmToken &Token,
2603 const AsmToken &NextToken) const {
2604
2605 // A list of consecutive registers: [s0,s1,s2,s3]
2606 if (Token.is(AsmToken::LBrac))
2607 return true;
2608
2609 if (!Token.is(AsmToken::Identifier))
2610 return false;
2611
2612 // A single register like s0 or a range of registers like s[0:1]
2613
2614 StringRef Str = Token.getString();
2615 const RegInfo *Reg = getRegularRegInfo(Str);
2616 if (Reg) {
2617 StringRef RegName = Reg->Name;
2618 StringRef RegSuffix = Str.substr(RegName.size());
2619 if (!RegSuffix.empty()) {
2620 unsigned Num;
2621 // A single register with an index: rXX
2622 if (getRegNum(RegSuffix, Num))
2623 return true;
2624 } else {
2625 // A range of registers: r[XX:YY].
2626 if (NextToken.is(AsmToken::LBrac))
2627 return true;
2628 }
2629 }
2630
2631 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2632}
2633
2634bool
2635AMDGPUAsmParser::isRegister()
2636{
2637 return isRegister(getToken(), peekToken());
2638}
2639
2640unsigned
2641AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2642 unsigned RegNum,
2643 unsigned RegWidth,
2644 SMLoc Loc) {
2645
2646 assert(isRegularReg(RegKind));
2647
2648 unsigned AlignSize = 1;
2649 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2650 // SGPR and TTMP registers must be aligned.
2651 // Max required alignment is 4 dwords.
2652 AlignSize = std::min(RegWidth / 32, 4u);
2653 }
2654
2655 if (RegNum % AlignSize != 0) {
2656 Error(Loc, "invalid register alignment");
2657 return AMDGPU::NoRegister;
2658 }
2659
2660 unsigned RegIdx = RegNum / AlignSize;
2661 int RCID = getRegClass(RegKind, RegWidth);
2662 if (RCID == -1) {
2663 Error(Loc, "invalid or unsupported register size");
2664 return AMDGPU::NoRegister;
2665 }
2666
2667 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2668 const MCRegisterClass RC = TRI->getRegClass(RCID);
2669 if (RegIdx >= RC.getNumRegs()) {
2670 Error(Loc, "register index is out of range");
2671 return AMDGPU::NoRegister;
2672 }
2673
2674 return RC.getRegister(RegIdx);
2675}
2676
2677bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2678 int64_t RegLo, RegHi;
2679 if (!skipToken(AsmToken::LBrac, "missing register index"))
2680 return false;
2681
2682 SMLoc FirstIdxLoc = getLoc();
2683 SMLoc SecondIdxLoc;
2684
2685 if (!parseExpr(RegLo))
2686 return false;
2687
2688 if (trySkipToken(AsmToken::Colon)) {
2689 SecondIdxLoc = getLoc();
2690 if (!parseExpr(RegHi))
2691 return false;
2692 } else {
2693 RegHi = RegLo;
2694 }
2695
2696 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2697 return false;
2698
2699 if (!isUInt<32>(RegLo)) {
2700 Error(FirstIdxLoc, "invalid register index");
2701 return false;
2702 }
2703
2704 if (!isUInt<32>(RegHi)) {
2705 Error(SecondIdxLoc, "invalid register index");
2706 return false;
2707 }
2708
2709 if (RegLo > RegHi) {
2710 Error(FirstIdxLoc, "first register index should not exceed second index");
2711 return false;
2712 }
2713
2714 Num = static_cast<unsigned>(RegLo);
2715 RegWidth = 32 * ((RegHi - RegLo) + 1);
2716 return true;
2717}
2718
2719unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2720 unsigned &RegNum, unsigned &RegWidth,
2721 SmallVectorImpl<AsmToken> &Tokens) {
2722 assert(isToken(AsmToken::Identifier));
2723 unsigned Reg = getSpecialRegForName(getTokenStr());
2724 if (Reg) {
2725 RegNum = 0;
2726 RegWidth = 32;
2727 RegKind = IS_SPECIAL;
2728 Tokens.push_back(getToken());
2729 lex(); // skip register name
2730 }
2731 return Reg;
2732}
2733
2734unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2735 unsigned &RegNum, unsigned &RegWidth,
2736 SmallVectorImpl<AsmToken> &Tokens) {
2737 assert(isToken(AsmToken::Identifier));
2738 StringRef RegName = getTokenStr();
2739 auto Loc = getLoc();
2740
2741 const RegInfo *RI = getRegularRegInfo(RegName);
2742 if (!RI) {
2743 Error(Loc, "invalid register name");
2744 return AMDGPU::NoRegister;
2745 }
2746
2747 Tokens.push_back(getToken());
2748 lex(); // skip register name
2749
2750 RegKind = RI->Kind;
2751 StringRef RegSuffix = RegName.substr(RI->Name.size());
2752 if (!RegSuffix.empty()) {
2753 // Single 32-bit register: vXX.
2754 if (!getRegNum(RegSuffix, RegNum)) {
2755 Error(Loc, "invalid register index");
2756 return AMDGPU::NoRegister;
2757 }
2758 RegWidth = 32;
2759 } else {
2760 // Range of registers: v[XX:YY]. ":YY" is optional.
2761 if (!ParseRegRange(RegNum, RegWidth))
2762 return AMDGPU::NoRegister;
2763 }
2764
2765 return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2766}
2767
2768unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2769 unsigned &RegWidth,
2770 SmallVectorImpl<AsmToken> &Tokens) {
2771 unsigned Reg = AMDGPU::NoRegister;
2772 auto ListLoc = getLoc();
2773
2774 if (!skipToken(AsmToken::LBrac,
2775 "expected a register or a list of registers")) {
2776 return AMDGPU::NoRegister;
2777 }
2778
2779 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2780
2781 auto Loc = getLoc();
2782 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2783 return AMDGPU::NoRegister;
2784 if (RegWidth != 32) {
2785 Error(Loc, "expected a single 32-bit register");
2786 return AMDGPU::NoRegister;
2787 }
2788
2789 for (; trySkipToken(AsmToken::Comma); ) {
2790 RegisterKind NextRegKind;
2791 unsigned NextReg, NextRegNum, NextRegWidth;
2792 Loc = getLoc();
2793
2794 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2795 NextRegNum, NextRegWidth,
2796 Tokens)) {
2797 return AMDGPU::NoRegister;
2798 }
2799 if (NextRegWidth != 32) {
2800 Error(Loc, "expected a single 32-bit register");
2801 return AMDGPU::NoRegister;
2802 }
2803 if (NextRegKind != RegKind) {
2804 Error(Loc, "registers in a list must be of the same kind");
2805 return AMDGPU::NoRegister;
2806 }
2807 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2808 return AMDGPU::NoRegister;
2809 }
2810
2811 if (!skipToken(AsmToken::RBrac,
2812 "expected a comma or a closing square bracket")) {
2813 return AMDGPU::NoRegister;
2814 }
2815
2816 if (isRegularReg(RegKind))
2817 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2818
2819 return Reg;
2820}
2821
2822bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2823 unsigned &RegNum, unsigned &RegWidth,
2824 SmallVectorImpl<AsmToken> &Tokens) {
2825 auto Loc = getLoc();
2826 Reg = AMDGPU::NoRegister;
2827
2828 if (isToken(AsmToken::Identifier)) {
2829 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2830 if (Reg == AMDGPU::NoRegister)
2831 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2832 } else {
2833 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2834 }
2835
2836 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2837 if (Reg == AMDGPU::NoRegister) {
2838 assert(Parser.hasPendingError());
2839 return false;
2840 }
2841
2842 if (!subtargetHasRegister(*TRI, Reg)) {
2843 if (Reg == AMDGPU::SGPR_NULL) {
2844 Error(Loc, "'null' operand is not supported on this GPU");
2845 } else {
2846 Error(Loc, "register not available on this GPU");
2847 }
2848 return false;
2849 }
2850
2851 return true;
2852}
2853
2854bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2855 unsigned &RegNum, unsigned &RegWidth,
2856 bool RestoreOnFailure /*=false*/) {
2857 Reg = AMDGPU::NoRegister;
2858
2860 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2861 if (RestoreOnFailure) {
2862 while (!Tokens.empty()) {
2863 getLexer().UnLex(Tokens.pop_back_val());
2864 }
2865 }
2866 return true;
2867 }
2868 return false;
2869}
2870
2871std::optional<StringRef>
2872AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2873 switch (RegKind) {
2874 case IS_VGPR:
2875 return StringRef(".amdgcn.next_free_vgpr");
2876 case IS_SGPR:
2877 return StringRef(".amdgcn.next_free_sgpr");
2878 default:
2879 return std::nullopt;
2880 }
2881}
2882
2883void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2884 auto SymbolName = getGprCountSymbolName(RegKind);
2885 assert(SymbolName && "initializing invalid register kind");
2886 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2887 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2888}
2889
2890bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2891 unsigned DwordRegIndex,
2892 unsigned RegWidth) {
2893 // Symbols are only defined for GCN targets
2894 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2895 return true;
2896
2897 auto SymbolName = getGprCountSymbolName(RegKind);
2898 if (!SymbolName)
2899 return true;
2900 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2901
2902 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2903 int64_t OldCount;
2904
2905 if (!Sym->isVariable())
2906 return !Error(getLoc(),
2907 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2908 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2909 return !Error(
2910 getLoc(),
2911 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2912
2913 if (OldCount <= NewMax)
2914 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2915
2916 return true;
2917}
2918
2919std::unique_ptr<AMDGPUOperand>
2920AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2921 const auto &Tok = getToken();
2922 SMLoc StartLoc = Tok.getLoc();
2923 SMLoc EndLoc = Tok.getEndLoc();
2924 RegisterKind RegKind;
2925 unsigned Reg, RegNum, RegWidth;
2926
2927 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2928 return nullptr;
2929 }
2930 if (isHsaAbiVersion3AndAbove(&getSTI())) {
2931 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2932 return nullptr;
2933 } else
2934 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2935 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2936}
2937
2939AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2940 // TODO: add syntactic sugar for 1/(2*PI)
2941
2942 if (isRegister())
2943 return MatchOperand_NoMatch;
2944 assert(!isModifier());
2945
2946 const auto& Tok = getToken();
2947 const auto& NextTok = peekToken();
2948 bool IsReal = Tok.is(AsmToken::Real);
2949 SMLoc S = getLoc();
2950 bool Negate = false;
2951
2952 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2953 lex();
2954 IsReal = true;
2955 Negate = true;
2956 }
2957
2958 if (IsReal) {
2959 // Floating-point expressions are not supported.
2960 // Can only allow floating-point literals with an
2961 // optional sign.
2962
2963 StringRef Num = getTokenStr();
2964 lex();
2965
2966 APFloat RealVal(APFloat::IEEEdouble());
2967 auto roundMode = APFloat::rmNearestTiesToEven;
2968 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2970 }
2971 if (Negate)
2972 RealVal.changeSign();
2973
2974 Operands.push_back(
2975 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2976 AMDGPUOperand::ImmTyNone, true));
2977
2978 return MatchOperand_Success;
2979
2980 } else {
2981 int64_t IntVal;
2982 const MCExpr *Expr;
2983 SMLoc S = getLoc();
2984
2985 if (HasSP3AbsModifier) {
2986 // This is a workaround for handling expressions
2987 // as arguments of SP3 'abs' modifier, for example:
2988 // |1.0|
2989 // |-1|
2990 // |1+x|
2991 // This syntax is not compatible with syntax of standard
2992 // MC expressions (due to the trailing '|').
2993 SMLoc EndLoc;
2994 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2996 } else {
2997 if (Parser.parseExpression(Expr))
2999 }
3000
3001 if (Expr->evaluateAsAbsolute(IntVal)) {
3002 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3003 } else {
3004 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3005 }
3006
3007 return MatchOperand_Success;
3008 }
3009
3010 return MatchOperand_NoMatch;
3011}
3012
3014AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3015 if (!isRegister())
3016 return MatchOperand_NoMatch;
3017
3018 if (auto R = parseRegister()) {
3019 assert(R->isReg());
3020 Operands.push_back(std::move(R));
3021 return MatchOperand_Success;
3022 }
3024}
3025
3027AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
3028 auto res = parseReg(Operands);
3029 if (res != MatchOperand_NoMatch) {
3030 return res;
3031 } else if (isModifier()) {
3032 return MatchOperand_NoMatch;
3033 } else {
3034 return parseImm(Operands, HasSP3AbsMod);
3035 }
3036}
3037
3038bool
3039AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3040 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3041 const auto &str = Token.getString();
3042 return str == "abs" || str == "neg" || str == "sext";
3043 }
3044 return false;
3045}
3046
3047bool
3048AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3049 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3050}
3051
3052bool
3053AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3054 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3055}
3056
3057bool
3058AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3059 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3060}
3061
3062// Check if this is an operand modifier or an opcode modifier
3063// which may look like an expression but it is not. We should
3064// avoid parsing these modifiers as expressions. Currently
3065// recognized sequences are:
3066// |...|
3067// abs(...)
3068// neg(...)
3069// sext(...)
3070// -reg
3071// -|...|
3072// -abs(...)
3073// name:...
3074//
3075bool
3076AMDGPUAsmParser::isModifier() {
3077
3078 AsmToken Tok = getToken();
3079 AsmToken NextToken[2];
3080 peekTokens(NextToken);
3081
3082 return isOperandModifier(Tok, NextToken[0]) ||
3083 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3084 isOpcodeModifierWithVal(Tok, NextToken[0]);
3085}
3086
3087// Check if the current token is an SP3 'neg' modifier.
3088// Currently this modifier is allowed in the following context:
3089//
3090// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3091// 2. Before an 'abs' modifier: -abs(...)
3092// 3. Before an SP3 'abs' modifier: -|...|
3093//
3094// In all other cases "-" is handled as a part
3095// of an expression that follows the sign.
3096//
3097// Note: When "-" is followed by an integer literal,
3098// this is interpreted as integer negation rather
3099// than a floating-point NEG modifier applied to N.
3100// Beside being contr-intuitive, such use of floating-point
3101// NEG modifier would have resulted in different meaning
3102// of integer literals used with VOP1/2/C and VOP3,
3103// for example:
3104// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3105// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3106// Negative fp literals with preceding "-" are
3107// handled likewise for uniformity
3108//
3109bool
3110AMDGPUAsmParser::parseSP3NegModifier() {
3111
3112 AsmToken NextToken[2];
3113 peekTokens(NextToken);
3114
3115 if (isToken(AsmToken::Minus) &&
3116 (isRegister(NextToken[0], NextToken[1]) ||
3117 NextToken[0].is(AsmToken::Pipe) ||
3118 isId(NextToken[0], "abs"))) {
3119 lex();
3120 return true;
3121 }
3122
3123 return false;
3124}
3125
3127AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3128 bool AllowImm) {
3129 bool Neg, SP3Neg;
3130 bool Abs, SP3Abs;
3131 SMLoc Loc;
3132
3133 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3134 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3135 Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3137 }
3138
3139 SP3Neg = parseSP3NegModifier();
3140
3141 Loc = getLoc();
3142 Neg = trySkipId("neg");
3143 if (Neg && SP3Neg) {
3144 Error(Loc, "expected register or immediate");
3146 }
3147 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3149
3150 Abs = trySkipId("abs");
3151 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3153
3154 Loc = getLoc();
3155 SP3Abs = trySkipToken(AsmToken::Pipe);
3156 if (Abs && SP3Abs) {
3157 Error(Loc, "expected register or immediate");
3159 }
3160
3162 if (AllowImm) {
3163 Res = parseRegOrImm(Operands, SP3Abs);
3164 } else {
3165 Res = parseReg(Operands);
3166 }
3167 if (Res != MatchOperand_Success) {
3168 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3169 }
3170
3171 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3173 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3175 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3177
3178 AMDGPUOperand::Modifiers Mods;
3179 Mods.Abs = Abs || SP3Abs;
3180 Mods.Neg = Neg || SP3Neg;
3181
3182 if (Mods.hasFPModifiers()) {
3183 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3184 if (Op.isExpr()) {
3185 Error(Op.getStartLoc(), "expected an absolute expression");
3187 }
3188 Op.setModifiers(Mods);
3189 }
3190 return MatchOperand_Success;
3191}
3192
3194AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3195 bool AllowImm) {
3196 bool Sext = trySkipId("sext");
3197 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3199
3201 if (AllowImm) {
3202 Res = parseRegOrImm(Operands);
3203 } else {
3204 Res = parseReg(Operands);
3205 }
3206 if (Res != MatchOperand_Success) {
3207 return Sext? MatchOperand_ParseFail : Res;
3208 }
3209
3210 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3212
3213 AMDGPUOperand::Modifiers Mods;
3214 Mods.Sext = Sext;
3215
3216 if (Mods.hasIntModifiers()) {
3217 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3218 if (Op.isExpr()) {
3219 Error(Op.getStartLoc(), "expected an absolute expression");
3221 }
3222 Op.setModifiers(Mods);
3223 }
3224
3225 return MatchOperand_Success;
3226}
3227
3229AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3230 return parseRegOrImmWithFPInputMods(Operands, false);
3231}
3232
3234AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3235 return parseRegOrImmWithIntInputMods(Operands, false);
3236}
3237
3238OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3239 auto Loc = getLoc();
3240 if (trySkipId("off")) {
3241 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3242 AMDGPUOperand::ImmTyOff, false));
3243 return MatchOperand_Success;
3244 }
3245
3246 if (!isRegister())
3247 return MatchOperand_NoMatch;
3248
3249 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3250 if (Reg) {
3251 Operands.push_back(std::move(Reg));
3252 return MatchOperand_Success;
3253 }
3254
3256
3257}
3258
3259unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3260 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3261
3262 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3263 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3264 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3265 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3266 return Match_InvalidOperand;
3267
3268 if ((TSFlags & SIInstrFlags::VOP3) &&
3270 getForcedEncodingSize() != 64)
3271 return Match_PreferE32;
3272
3273 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3274 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3275 // v_mac_f32/16 allow only dst_sel == DWORD;
3276 auto OpNum =
3277 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3278 const auto &Op = Inst.getOperand(OpNum);
3279 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3280 return Match_InvalidOperand;
3281 }
3282 }
3283
3284 return Match_Success;
3285}
3286
3288 static const unsigned Variants[] = {
3292 };
3293
3294 return ArrayRef(Variants);
3295}
3296
3297// What asm variants we should check
3298ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3299 if (isForcedDPP() && isForcedVOP3()) {
3300 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3301 return ArrayRef(Variants);
3302 }
3303 if (getForcedEncodingSize() == 32) {
3304 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3305 return ArrayRef(Variants);
3306 }
3307
3308 if (isForcedVOP3()) {
3309 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3310 return ArrayRef(Variants);
3311 }
3312
3313 if (isForcedSDWA()) {
3314 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3316 return ArrayRef(Variants);
3317 }
3318
3319 if (isForcedDPP()) {
3320 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3321 return ArrayRef(Variants);
3322 }
3323
3324 return getAllVariants();
3325}
3326
3327StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3328 if (isForcedDPP() && isForcedVOP3())
3329 return "e64_dpp";
3330
3331 if (getForcedEncodingSize() == 32)
3332 return "e32";
3333
3334 if (isForcedVOP3())
3335 return "e64";
3336
3337 if (isForcedSDWA())
3338 return "sdwa";
3339
3340 if (isForcedDPP())
3341 return "dpp";
3342
3343 return "";
3344}
3345
3346unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3347 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3348 for (MCPhysReg Reg : Desc.implicit_uses()) {
3349 switch (Reg) {
3350 case AMDGPU::FLAT_SCR:
3351 case AMDGPU::VCC:
3352 case AMDGPU::VCC_LO:
3353 case AMDGPU::VCC_HI:
3354 case AMDGPU::M0:
3355 return Reg;
3356 default:
3357 break;
3358 }
3359 }
3360 return AMDGPU::NoRegister;
3361}
3362
3363// NB: This code is correct only when used to check constant
3364// bus limitations because GFX7 support no f16 inline constants.
3365// Note that there are no cases when a GFX7 opcode violates
3366// constant bus limitations due to the use of an f16 constant.
3367bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3368 unsigned OpIdx) const {
3369 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3370
3371 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3372 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3373 return false;
3374 }
3375
3376 const MCOperand &MO = Inst.getOperand(OpIdx);
3377
3378 int64_t Val = MO.getImm();
3379 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3380
3381 switch (OpSize) { // expected operand size
3382 case 8:
3383 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3384 case 4:
3385 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3386 case 2: {
3387 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3388 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3389 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3390 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3391 return AMDGPU::isInlinableIntLiteral(Val);
3392
3393 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3394 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3395 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3396 return AMDGPU::isInlinableIntLiteralV216(Val);
3397
3398 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3399 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3400 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3401 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3402
3403 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3404 }
3405 default:
3406 llvm_unreachable("invalid operand size");
3407 }
3408}
3409
3410unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3411 if (!isGFX10Plus())
3412 return 1;
3413
3414 switch (Opcode) {
3415 // 64-bit shift instructions can use only one scalar value input
3416 case AMDGPU::V_LSHLREV_B64_e64:
3417 case AMDGPU::V_LSHLREV_B64_gfx10:
3418 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3419 case AMDGPU::V_LSHRREV_B64_e64:
3420 case AMDGPU::V_LSHRREV_B64_gfx10:
3421 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3422 case AMDGPU::V_ASHRREV_I64_e64:
3423 case AMDGPU::V_ASHRREV_I64_gfx10:
3424 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3425 case AMDGPU::V_LSHL_B64_e64:
3426 case AMDGPU::V_LSHR_B64_e64:
3427 case AMDGPU::V_ASHR_I64_e64:
3428 return 1;
3429 default:
3430 return 2;
3431 }
3432}
3433
3434constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3436
3437// Get regular operand indices in the same order as specified
3438// in the instruction (but append mandatory literals to the end).
3440 bool AddMandatoryLiterals = false) {
3441
3442 int16_t ImmIdx =
3443 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3444
3445 if (isVOPD(Opcode)) {
3446 int16_t ImmDeferredIdx =
3447 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3448 : -1;
3449
3450 return {getNamedOperandIdx(Opcode, OpName::src0X),
3451 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3452 getNamedOperandIdx(Opcode, OpName::src0Y),
3453 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3454 ImmDeferredIdx,
3455 ImmIdx};
3456 }
3457
3458 return {getNamedOperandIdx(Opcode, OpName::src0),
3459 getNamedOperandIdx(Opcode, OpName::src1),
3460 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3461}
3462
3463bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3464 const MCOperand &MO = Inst.getOperand(OpIdx);
3465 if (MO.isImm()) {
3466 return !isInlineConstant(Inst, OpIdx);
3467 } else if (MO.isReg()) {
3468 auto Reg = MO.getReg();
3469 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3470 auto PReg = mc2PseudoReg(Reg);
3471 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3472 } else {
3473 return true;
3474 }
3475}
3476
3477bool AMDGPUAsmParser::validateConstantBusLimitations(
3478 const MCInst &Inst, const OperandVector &Operands) {
3479 const unsigned Opcode = Inst.getOpcode();
3480 const MCInstrDesc &Desc = MII.get(Opcode);
3481 unsigned LastSGPR = AMDGPU::NoRegister;
3482 unsigned ConstantBusUseCount = 0;
3483 unsigned NumLiterals = 0;
3484 unsigned LiteralSize;
3485
3486 if (!(Desc.TSFlags &
3489 !isVOPD(Opcode))
3490 return true;
3491
3492 // Check special imm operands (used by madmk, etc)
3493 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3494 ++NumLiterals;
3495 LiteralSize = 4;
3496 }
3497
3498 SmallDenseSet<unsigned> SGPRsUsed;
3499 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3500 if (SGPRUsed != AMDGPU::NoRegister) {
3501 SGPRsUsed.insert(SGPRUsed);
3502 ++ConstantBusUseCount;
3503 }
3504
3505 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3506
3507 for (int OpIdx : OpIndices) {
3508 if (OpIdx == -1)
3509 continue;
3510
3511 const MCOperand &MO = Inst.getOperand(OpIdx);
3512 if (usesConstantBus(Inst, OpIdx)) {
3513 if (MO.isReg()) {
3514 LastSGPR = mc2PseudoReg(MO.getReg());
3515 // Pairs of registers with a partial intersections like these
3516 // s0, s[0:1]
3517 // flat_scratch_lo, flat_scratch
3518 // flat_scratch_lo, flat_scratch_hi
3519 // are theoretically valid but they are disabled anyway.
3520 // Note that this code mimics SIInstrInfo::verifyInstruction
3521 if (SGPRsUsed.insert(LastSGPR).second) {
3522 ++ConstantBusUseCount;
3523 }
3524 } else { // Expression or a literal
3525
3526 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3527 continue; // special operand like VINTERP attr_chan
3528
3529 // An instruction may use only one literal.
3530 // This has been validated on the previous step.
3531 // See validateVOPLiteral.
3532 // This literal may be used as more than one operand.
3533 // If all these operands are of the same size,
3534 // this literal counts as one scalar value.
3535 // Otherwise it counts as 2 scalar values.
3536 // See "GFX10 Shader Programming", section 3.6.2.3.
3537
3538 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3539 if (Size < 4)
3540 Size = 4;
3541
3542 if (NumLiterals == 0) {
3543 NumLiterals = 1;
3544 LiteralSize = Size;
3545 } else if (LiteralSize != Size) {
3546 NumLiterals = 2;
3547 }
3548 }
3549 }
3550 }
3551 ConstantBusUseCount += NumLiterals;
3552
3553 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3554 return true;
3555
3556 SMLoc LitLoc = getLitLoc(Operands);
3557 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3558 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3559 Error(Loc, "invalid operand (violates constant bus restrictions)");
3560 return false;
3561}
3562
3563bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3564 const MCInst &Inst, const OperandVector &Operands) {
3565
3566 const unsigned Opcode = Inst.getOpcode();
3567 if (!isVOPD(Opcode))
3568 return true;
3569
3570 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3571
3572 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3573 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3574 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3575 ? Opr.getReg()
3577 };
3578
3579 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3580 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(getVRegIdx);
3581 if (!InvalidCompOprIdx)
3582 return true;
3583
3584 auto CompOprIdx = *InvalidCompOprIdx;
3585 auto ParsedIdx =
3586 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3587 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3588 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3589
3590 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3591 if (CompOprIdx == VOPD::Component::DST) {
3592 Error(Loc, "one dst register must be even and the other odd");
3593 } else {
3594 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3595 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3596 " operands must use different VGPR banks");
3597 }
3598
3599 return false;
3600}
3601
3602bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3603
3604 const unsigned Opc = Inst.getOpcode();
3605 const MCInstrDesc &Desc = MII.get(Opc);
3606
3607 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3608 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3609 assert(ClampIdx != -1);
3610 return Inst.getOperand(ClampIdx).getImm() == 0;
3611 }
3612
3613 return true;
3614}
3615
3616bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3617 const SMLoc &IDLoc) {
3618
3619 const unsigned Opc = Inst.getOpcode();
3620 const MCInstrDesc &Desc = MII.get(Opc);
3621
3622 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3623 return true;
3624
3625 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3626 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3627 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3628
3629 assert(VDataIdx != -1);
3630
3631 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3632 return true;
3633
3634 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3635 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3636 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3637 if (DMask == 0)
3638 DMask = 1;
3639
3640 bool IsPackedD16 = false;
3641 unsigned DataSize =
3642 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3643 if (hasPackedD16()) {
3644 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3645 IsPackedD16 = D16Idx >= 0;
3646 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3647 DataSize = (DataSize + 1) / 2;
3648 }
3649
3650 if ((VDataSize / 4) == DataSize + TFESize)
3651 return true;
3652
3653 StringRef Modifiers;
3654 if (isGFX90A())
3655 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3656 else
3657 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3658
3659 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3660 return false;
3661}
3662
3663bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3664 const unsigned Opc = Inst.getOpcode();
3665 const MCInstrDesc &Desc = MII.get(Opc);
3666
3667 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3668 return true;
3669
3670 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3671
3672 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3673 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3674 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3675 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3676 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3677 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3678
3679 assert(VAddr0Idx != -1);
3680 assert(SrsrcIdx != -1);
3681 assert(SrsrcIdx > VAddr0Idx);
3682
3683 if (DimIdx == -1)
3684 return true; // intersect_ray
3685
3686 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3687 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3688 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3689 unsigned ActualAddrSize =
3690 IsNSA ? SrsrcIdx - VAddr0Idx
3691 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3692 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3693
3694 unsigned ExpectedAddrSize =
3695 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3696
3697 if (IsNSA) {
3698 if (hasPartialNSAEncoding() && ExpectedAddrSize > getNSAMaxSize()) {
3699 int VAddrLastIdx = SrsrcIdx - 1;
3700 unsigned VAddrLastSize =
3701 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3702
3703 return VAddrLastIdx - VAddr0Idx + VAddrLastSize == ExpectedAddrSize;
3704 }
3705 } else {
3706 if (ExpectedAddrSize > 12)
3707 ExpectedAddrSize = 16;
3708
3709 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3710 // This provides backward compatibility for assembly created
3711 // before 160b/192b/224b types were directly supported.
3712 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3713 return true;
3714 }
3715
3716 return ActualAddrSize == ExpectedAddrSize;
3717}
3718
3719bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3720
3721 const unsigned Opc = Inst.getOpcode();
3722 const MCInstrDesc &Desc = MII.get(Opc);
3723
3724 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3725 return true;
3726 if (!Desc.mayLoad() || !Desc.mayStore())
3727 return true; // Not atomic
3728
3729 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3730 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3731
3732 // This is an incomplete check because image_atomic_cmpswap
3733 // may only use 0x3 and 0xf while other atomic operations
3734 // may use 0x1 and 0x3. However these limitations are
3735 // verified when we check that dmask matches dst size.
3736 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3737}
3738
3739bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3740
3741 const unsigned Opc = Inst.getOpcode();
3742 const MCInstrDesc &Desc = MII.get(Opc);
3743
3744 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3745 return true;
3746
3747 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3748 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3749
3750 // GATHER4 instructions use dmask in a different fashion compared to
3751 // other MIMG instructions. The only useful DMASK values are
3752 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3753 // (red,red,red,red) etc.) The ISA document doesn't mention
3754 // this.
3755 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3756}
3757
3758bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3759 const unsigned Opc = Inst.getOpcode();
3760 const MCInstrDesc &Desc = MII.get(Opc);
3761
3762 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3763 return true;
3764
3765 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3766 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3767 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3768
3769 if (!BaseOpcode->MSAA)
3770 return true;
3771
3772 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3773 assert(DimIdx != -1);
3774
3775 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3776 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3777
3778 return DimInfo->MSAA;
3779}
3780
3781static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3782{
3783 switch (Opcode) {
3784 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3785 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3786 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3787 return true;
3788 default:
3789 return false;
3790 }
3791}
3792
3793// movrels* opcodes should only allow VGPRS as src0.
3794// This is specified in .td description for vop1/vop3,
3795// but sdwa is handled differently. See isSDWAOperand.
3796bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3797 const OperandVector &Operands) {
3798
3799 const unsigned Opc = Inst.getOpcode();
3800 const MCInstrDesc &Desc = MII.get(Opc);
3801
3802 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3803 return true;
3804
3805 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3806 assert(Src0Idx != -1);
3807
3808 SMLoc ErrLoc;
3809 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3810 if (Src0.isReg()) {
3811 auto Reg = mc2PseudoReg(Src0.getReg());
3812 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3813 if (!isSGPR(Reg, TRI))
3814 return true;
3815 ErrLoc = getRegLoc(Reg, Operands);
3816 } else {
3817 ErrLoc = getConstLoc(Operands);
3818 }
3819
3820 Error(ErrLoc, "source operand must be a VGPR");
3821 return false;
3822}
3823
3824bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3825 const OperandVector &Operands) {
3826
3827 const unsigned Opc = Inst.getOpcode();
3828
3829 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3830 return true;
3831
3832 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3833 assert(Src0Idx != -1);
3834
3835 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3836 if (!Src0.isReg())
3837 return true;
3838
3839 auto Reg = mc2PseudoReg(Src0.getReg());
3840 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3841 if (!isGFX90A() && isSGPR(Reg, TRI)) {
3842 Error(getRegLoc(Reg, Operands),
3843 "source operand must be either a VGPR or an inline constant");
3844 return false;
3845 }
3846
3847 return true;
3848}
3849
3850bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
3851 const OperandVector &Operands) {
3852 unsigned Opcode = Inst.getOpcode();
3853 const MCInstrDesc &Desc = MII.get(Opcode);
3854
3855 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
3856 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
3857 return true;
3858
3859 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
3860 if (Src2Idx == -1)
3861 return true;
3862
3863 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
3864 Error(getConstLoc(Operands),
3865 "inline constants are not allowed for this operand");
3866 return false;
3867 }
3868
3869 return true;
3870}
3871
3872bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3873 const OperandVector &Operands) {
3874 const unsigned Opc = Inst.getOpcode();
3875 const MCInstrDesc &Desc = MII.get(Opc);
3876
3877 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3878 return true;
3879
3880 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3881 if (Src2Idx == -1)
3882 return true;
3883
3884 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3885 if (!Src2.isReg())
3886 return true;
3887
3888 MCRegister Src2Reg = Src2.getReg();
3889 MCRegister DstReg = Inst.getOperand(0).getReg();
3890 if (Src2Reg == DstReg)
3891 return true;
3892
3893 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3894 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
3895 return true;
3896
3897 if (TRI->regsOverlap(Src2Reg, DstReg)) {
3898 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3899 "source 2 operand must not partially overlap with dst");
3900 return false;
3901 }
3902
3903 return true;
3904}
3905
3906bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3907 switch (Inst.getOpcode()) {
3908 default:
3909 return true;
3910 case V_DIV_SCALE_F32_gfx6_gfx7:
3911 case V_DIV_SCALE_F32_vi:
3912 case V_DIV_SCALE_F32_gfx10:
3913 case V_DIV_SCALE_F64_gfx6_gfx7:
3914 case V_DIV_SCALE_F64_vi:
3915 case V_DIV_SCALE_F64_gfx10:
3916 break;
3917 }
3918
3919 // TODO: Check that src0 = src1 or src2.
3920
3921 for (auto Name : {AMDGPU::OpName::src0_modifiers,
3922 AMDGPU::OpName::src2_modifiers,
3923 AMDGPU::OpName::src2_modifiers}) {
3924 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3925 .getImm() &
3927 return false;
3928 }
3929 }
3930
3931 return true;
3932}
3933
3934bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3935
3936 const unsigned Opc = Inst.getOpcode();
3937 const MCInstrDesc &Desc = MII.get(Opc);
3938
3939 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3940 return true;
3941
3942 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3943 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3944 if (isCI() || isSI())
3945 return false;
3946 }
3947
3948 return true;
3949}
3950
3951static bool IsRevOpcode(const unsigned Opcode)
3952{
3953 switch (Opcode) {
3954 case AMDGPU::V_SUBREV_F32_e32:
3955 case AMDGPU::V_SUBREV_F32_e64:
3956 case AMDGPU::V_SUBREV_F32_e32_gfx10:
3957 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3958 case AMDGPU::V_SUBREV_F32_e32_vi:
3959 case AMDGPU::V_SUBREV_F32_e64_gfx10:
3960 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3961 case AMDGPU::V_SUBREV_F32_e64_vi:
3962
3963 case AMDGPU::V_SUBREV_CO_U32_e32:
3964 case AMDGPU::V_SUBREV_CO_U32_e64:
3965 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3966 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3967
3968 case AMDGPU::V_SUBBREV_U32_e32:
3969 case AMDGPU::V_SUBBREV_U32_e64:
3970 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3971 case AMDGPU::V_SUBBREV_U32_e32_vi:
3972 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3973 case AMDGPU::V_SUBBREV_U32_e64_vi:
3974
3975 case AMDGPU::V_SUBREV_U32_e32:
3976 case AMDGPU::V_SUBREV_U32_e64:
3977 case AMDGPU::V_SUBREV_U32_e32_gfx9:
3978 case AMDGPU::V_SUBREV_U32_e32_vi:
3979 case AMDGPU::V_SUBREV_U32_e64_gfx9:
3980 case AMDGPU::V_SUBREV_U32_e64_vi:
3981
3982 case AMDGPU::V_SUBREV_F16_e32:
3983 case AMDGPU::V_SUBREV_F16_e64:
3984 case AMDGPU::V_SUBREV_F16_e32_gfx10:
3985 case AMDGPU::V_SUBREV_F16_e32_vi:
3986 case AMDGPU::V_SUBREV_F16_e64_gfx10:
3987 case AMDGPU::V_SUBREV_F16_e64_vi:
3988
3989 case AMDGPU::V_SUBREV_U16_e32:
3990 case AMDGPU::V_SUBREV_U16_e64:
3991 case AMDGPU::V_SUBREV_U16_e32_vi:
3992 case AMDGPU::V_SUBREV_U16_e64_vi:
3993
3994 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3995 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3996 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3997
3998 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3999 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4000
4001 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4002 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4003
4004 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4005 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4006
4007 case AMDGPU::V_LSHRREV_B32_e32:
4008 case AMDGPU::V_LSHRREV_B32_e64:
4009 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4010 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4011 case AMDGPU::V_LSHRREV_B32_e32_vi:
4012 case AMDGPU::V_LSHRREV_B32_e64_vi:
4013 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4014 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4015
4016 case AMDGPU::V_ASHRREV_I32_e32:
4017 case AMDGPU::V_ASHRREV_I32_e64:
4018 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4019 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4020 case AMDGPU::V_ASHRREV_I32_e32_vi:
4021 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4022 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4023 case AMDGPU::V_ASHRREV_I32_e64_vi:
4024
4025 case AMDGPU::V_LSHLREV_B32_e32:
4026 case AMDGPU::V_LSHLREV_B32_e64:
4027 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4028 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4029 case AMDGPU::V_LSHLREV_B32_e32_vi:
4030 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4031 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4032 case AMDGPU::V_LSHLREV_B32_e64_vi:
4033
4034 case AMDGPU::V_LSHLREV_B16_e32:
4035 case AMDGPU::V_LSHLREV_B16_e64:
4036 case AMDGPU::V_LSHLREV_B16_e32_vi:
4037 case AMDGPU::V_LSHLREV_B16_e64_vi:
4038 case AMDGPU::V_LSHLREV_B16_gfx10:
4039
4040 case AMDGPU::V_LSHRREV_B16_e32:
4041 case AMDGPU::V_LSHRREV_B16_e64:
4042 case AMDGPU::V_LSHRREV_B16_e32_vi:
4043 case AMDGPU::V_LSHRREV_B16_e64_vi:
4044 case AMDGPU::V_LSHRREV_B16_gfx10:
4045
4046 case AMDGPU::V_ASHRREV_I16_e32:
4047 case AMDGPU::V_ASHRREV_I16_e64:
4048 case AMDGPU::V_ASHRREV_I16_e32_vi:
4049 case AMDGPU::V_ASHRREV_I16_e64_vi:
4050 case AMDGPU::V_ASHRREV_I16_gfx10:
4051
4052 case AMDGPU::V_LSHLREV_B64_e64:
4053 case AMDGPU::V_LSHLREV_B64_gfx10:
4054 case AMDGPU::V_LSHLREV_B64_vi:
4055
4056 case AMDGPU::V_LSHRREV_B64_e64:
4057 case AMDGPU::V_LSHRREV_B64_gfx10:
4058 case AMDGPU::V_LSHRREV_B64_vi:
4059
4060 case AMDGPU::V_ASHRREV_I64_e64:
4061 case AMDGPU::V_ASHRREV_I64_gfx10:
4062 case AMDGPU::V_ASHRREV_I64_vi:
4063
4064 case AMDGPU::V_PK_LSHLREV_B16:
4065 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4066 case AMDGPU::V_PK_LSHLREV_B16_vi:
4067
4068 case AMDGPU::V_PK_LSHRREV_B16:
4069 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4070 case AMDGPU::V_PK_LSHRREV_B16_vi:
4071 case AMDGPU::V_PK_ASHRREV_I16:
4072 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4073 case AMDGPU::V_PK_ASHRREV_I16_vi:
4074 return true;
4075 default:
4076 return false;
4077 }
4078}
4079
4080std::optional<StringRef>
4081AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4082
4083 using namespace SIInstrFlags;
4084 const unsigned Opcode = Inst.getOpcode();
4085 const MCInstrDesc &Desc = MII.get(Opcode);
4086
4087 // lds_direct register is defined so that it can be used
4088 // with 9-bit operands only. Ignore encodings which do not accept these.
4089 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4090 if ((Desc.TSFlags & Enc) == 0)
4091 return std::nullopt;
4092
4093 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4094 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4095 if (SrcIdx == -1)
4096 break;
4097 const auto &Src = Inst.getOperand(SrcIdx);
4098 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4099
4100 if (isGFX90A() || isGFX11Plus())
4101 return StringRef("lds_direct is not supported on this GPU");
4102
4103 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4104 return StringRef("lds_direct cannot be used with this instruction");
4105
4106 if (SrcName != OpName::src0)
4107 return StringRef("lds_direct may be used as src0 only");
4108 }
4109 }
4110
4111 return std::nullopt;
4112}
4113
4114SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4115 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4116 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4117 if (Op.isFlatOffset())
4118 return Op.getStartLoc();
4119 }
4120 return getLoc();
4121}
4122
4123bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4124 const OperandVector &Operands) {
4125 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4126 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4127 return true;
4128
4129 auto Opcode = Inst.getOpcode();
4130 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4131 assert(OpNum != -1);
4132
4133 const auto &Op = Inst.getOperand(OpNum);
4134 if (!hasFlatOffsets() && Op.getImm() != 0) {
4135 Error(getFlatOffsetLoc(Operands),
4136 "flat offset modifier is not supported on this GPU");
4137 return false;
4138 }
4139
4140 // For FLAT segment the offset must be positive;
4141 // MSB is ignored and forced to zero.
4142 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4143 bool AllowNegative =
4145 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4146 Error(getFlatOffsetLoc(Operands),
4147 Twine("expected a ") +
4148 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4149 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4150 return false;
4151 }
4152
4153 return true;
4154}
4155
4156SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4157 // Start with second operand because SMEM Offset cannot be dst or src0.
4158 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4159 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4160 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4161 return Op.getStartLoc();
4162 }
4163 return getLoc();
4164}
4165
4166bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4167 const OperandVector &Operands) {
4168 if (isCI() || isSI())
4169 return true;
4170
4171 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4172 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4173 return true;
4174
4175 auto Opcode = Inst.getOpcode();
4176 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4177 if (OpNum == -1)
4178 return true;
4179
4180 const auto &Op = Inst.getOperand(OpNum);
4181 if (!Op.isImm())
4182 return true;
4183
4184 uint64_t Offset = Op.getImm();
4185 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4186 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4187 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4188 return true;
4189
4190 Error(getSMEMOffsetLoc(Operands),
4191 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4192 "expected a 21-bit signed offset");
4193
4194 return false;
4195}
4196
4197bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4198 unsigned Opcode = Inst.getOpcode();
4199 const MCInstrDesc &Desc = MII.get(Opcode);
4201 return true;
4202
4203 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4204 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4205
4206 const int OpIndices[] = { Src0Idx, Src1Idx };
4207
4208 unsigned NumExprs = 0;
4209 unsigned NumLiterals = 0;
4211
4212 for (int OpIdx : OpIndices) {
4213 if (OpIdx == -1) break;
4214
4215 const MCOperand &MO = Inst.getOperand(OpIdx);
4216 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4217 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4218 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4219 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4220 if (NumLiterals == 0 || LiteralValue != Value) {
4222 ++NumLiterals;
4223 }
4224 } else if (MO.isExpr()) {
4225 ++NumExprs;
4226 }
4227 }
4228 }
4229
4230 return NumLiterals + NumExprs <= 1;
4231}
4232
4233bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4234 const unsigned Opc = Inst.getOpcode();
4235 if (isPermlane16(Opc)) {
4236 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4237 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4238
4239 if (OpSel & ~3)
4240 return false;
4241 }
4242
4243 uint64_t TSFlags = MII.get(Opc).TSFlags;
4244
4245 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4246 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4247 if (OpSelIdx != -1) {
4248 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4249 return false;
4250 }
4251 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4252 if (OpSelHiIdx != -1) {
4253 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4254 return false;
4255 }
4256 }
4257
4258 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4261 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4262 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4263 if (OpSel & 3)
4264 return false;
4265 }
4266
4267 return true;
4268}
4269
4270bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4271 const OperandVector &Operands) {
4272 const unsigned Opc = Inst.getOpcode();
4273 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4274 if (DppCtrlIdx < 0)
4275 return true;
4276 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4277
4278 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4279 // DPP64 is supported for row_newbcast only.
4280 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4281 if (Src0Idx >= 0 &&
4282 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4283 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4284 Error(S, "64 bit dpp only supports row_newbcast");
4285 return false;
4286 }
4287 }
4288
4289 return true;
4290}
4291
4292// Check if VCC register matches wavefront size
4293bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4294 auto FB = getFeatureBits();
4295 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4296 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4297}
4298
4299// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4300bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4301 const OperandVector &Operands) {
4302 unsigned Opcode = Inst.getOpcode();
4303 const MCInstrDesc &Desc = MII.get(Opcode);
4304 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4306 !HasMandatoryLiteral && !isVOPD(Opcode))
4307 return true;
4308
4309 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4310
4311 unsigned NumExprs = 0;
4312 unsigned NumLiterals = 0;
4314
4315 for (int OpIdx : OpIndices) {
4316 if (OpIdx == -1)
4317 continue;
4318
4319 const MCOperand &MO = Inst.getOperand(OpIdx);
4320 if (!MO.isImm() && !MO.isExpr())
4321 continue;
4322 if (!isSISrcOperand(Desc, OpIdx))
4323 continue;
4324
4325 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4326 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4327 if (NumLiterals == 0 || LiteralValue != Value) {
4329 ++NumLiterals;
4330 }
4331 } else if (MO.isExpr()) {
4332 ++NumExprs;
4333 }
4334 }
4335 NumLiterals += NumExprs;
4336
4337 if (!NumLiterals)
4338 return true;
4339
4340 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4341 Error(getLitLoc(Operands), "literal operands are not supported");
4342 return false;
4343 }
4344
4345 if (NumLiterals > 1) {
4346 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4347 return false;
4348 }
4349
4350 return true;
4351}
4352
4353// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4354static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4355 const MCRegisterInfo *MRI) {
4356 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4357 if (OpIdx < 0)
4358 return -1;
4359
4360 const MCOperand &Op = Inst.getOperand(OpIdx);
4361 if (!Op.isReg())
4362 return -1;
4363
4364 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4365 auto Reg = Sub ? Sub : Op.getReg();
4366 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4367 return AGPR32.contains(Reg) ? 1 : 0;
4368}
4369
4370bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4371 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4374 SIInstrFlags::DS)) == 0)
4375 return true;
4376
4377 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4378 : AMDGPU::OpName::vdata;
4379
4380 const MCRegisterInfo *MRI = getMRI();
4381 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4382 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4383
4384 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4385 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4386 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4387 return false;
4388 }
4389
4390 auto FB = getFeatureBits();
4391 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4392 if (DataAreg < 0 || DstAreg < 0)
4393 return true;
4394 return DstAreg == DataAreg;
4395 }
4396
4397 return DstAreg < 1 && DataAreg < 1;
4398}
4399
4400bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4401 auto FB = getFeatureBits();
4402 if (!FB[AMDGPU::FeatureGFX90AInsts])
4403 return true;
4404
4405 const MCRegisterInfo *MRI = getMRI();
4406 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4407 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4408 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4409 const MCOperand &Op = Inst.getOperand(I);
4410 if (!Op.isReg())
4411 continue;
4412
4413 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4414 if (!Sub)
4415 continue;
4416
4417 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4418 return false;
4419 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4420 return false;
4421 }
4422
4423 return true;
4424}
4425
4426SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4427 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4428 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4429 if (Op.isBLGP())
4430 return Op.getStartLoc();
4431 }
4432 return SMLoc();
4433}
4434
4435bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4436 const OperandVector &Operands) {
4437 unsigned Opc = Inst.getOpcode();
4438 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4439 if (BlgpIdx == -1)
4440 return true;
4441 SMLoc BLGPLoc = getBLGPLoc(Operands);
4442 if (!BLGPLoc.isValid())
4443 return true;
4444 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4445 auto FB = getFeatureBits();
4446 bool UsesNeg = false;
4447 if (FB[AMDGPU::FeatureGFX940Insts]) {
4448 switch (Opc) {
4449 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4450 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4451 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4452 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4453 UsesNeg = true;
4454 }
4455 }
4456
4457 if (IsNeg == UsesNeg)
4458 return true;
4459
4460 Error(BLGPLoc,
4461 UsesNeg ? "invalid modifier: blgp is not supported"
4462 : "invalid modifier: neg is not supported");
4463
4464 return false;
4465}
4466
4467bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4468 const OperandVector &Operands) {
4469 if (!isGFX11Plus())
4470 return true;
4471
4472 unsigned Opc = Inst.getOpcode();
4473 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4474 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4475 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4476 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4477 return true;
4478
4479 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4480 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4481 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4482 if (Reg == AMDGPU::SGPR_NULL)
4483 return true;
4484
4485 SMLoc RegLoc = getRegLoc(Reg, Operands);
4486 Error(RegLoc, "src0 must be null");
4487 return false;
4488}
4489
4490// gfx90a has an undocumented limitation:
4491// DS_GWS opcodes must use even aligned registers.
4492bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4493 const OperandVector &Operands) {
4494 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4495 return true;
4496
4497 int Opc = Inst.getOpcode();
4498 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4499 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4500 return true;
4501
4502 const MCRegisterInfo *MRI = getMRI();
4503 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4504 int Data0Pos =
4505 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4506 assert(Data0Pos != -1);
4507 auto Reg = Inst.getOperand(Data0Pos).getReg();
4508 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4509 if (RegIdx & 1) {
4510 SMLoc RegLoc = getRegLoc(Reg, Operands);
4511 Error(RegLoc, "vgpr must be even aligned");
4512 return false;
4513 }
4514
4515 return true;
4516}
4517
4518bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4519 const OperandVector &Operands,
4520 const SMLoc &IDLoc) {
4521 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4522 AMDGPU::OpName::cpol);
4523 if (CPolPos == -1)
4524 return true;
4525
4526 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4527
4528 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4530 if (CPol && (isSI() || isCI())) {
4531 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4532 Error(S, "cache policy is not supported for SMRD instructions");
4533 return false;
4534 }
4535 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4536 Error(IDLoc, "invalid cache policy for SMEM instruction");
4537 return false;
4538 }
4539 }
4540
4541 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4542 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4543 StringRef CStr(S.getPointer());
4544 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4545 Error(S, "scc is not supported on this GPU");
4546 return false;
4547 }
4548
4550 return true;
4551
4553 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4554 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4555 : "instruction must use glc");
4556 return false;
4557 }
4558 } else {
4559 if (CPol & CPol::GLC) {
4560 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4561 StringRef CStr(S.getPointer());
4563 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4564 Error(S, isGFX940() ? "instruction must not use sc0"
4565 : "instruction must not use glc");
4566 return false;
4567 }
4568 }
4569
4570 return true;
4571}
4572
4573bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4574 if (!isGFX11Plus())
4575 return true;
4576 for (auto &Operand : Operands) {
4577 if (!Operand->isReg())
4578 continue;
4579 unsigned Reg = Operand->getReg();
4580 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4581 Error(getRegLoc(Reg, Operands),
4582 "execz and vccz are not supported on this GPU");
4583 return false;
4584 }
4585 }
4586 return true;
4587}
4588
4589bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
4590 const OperandVector &Operands) {
4591 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4592 if (Desc.mayStore() &&
4594 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
4595 if (Loc != getInstLoc(Operands)) {
4596 Error(Loc, "TFE modifier has no meaning for store instructions");
4597 return false;
4598 }
4599 }
4600
4601 return true;
4602}
4603
4604bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4605 const SMLoc &IDLoc,
4606 const OperandVector &Operands) {
4607 if (auto ErrMsg = validateLdsDirect(Inst)) {
4608 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4609 return false;
4610 }
4611 if (!validateSOPLiteral(Inst)) {
4612 Error(getLitLoc(Operands),
4613 "only one unique literal operand is allowed");
4614 return false;
4615 }
4616 if (!validateVOPLiteral(Inst, Operands)) {
4617 return false;
4618 }
4619 if (!validateConstantBusLimitations(Inst, Operands)) {
4620 return false;
4621 }
4622 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
4623 return false;
4624 }
4625 if (!validateIntClampSupported(Inst)) {
4626 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4627 "integer clamping is not supported on this GPU");
4628 return false;
4629 }
4630 if (!validateOpSel(Inst)) {
4631 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4632 "invalid op_sel operand");
4633 return false;
4634 }
4635 if (!validateDPP(Inst, Operands)) {
4636 return false;
4637 }
4638 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4639 if (!validateMIMGD16(Inst)) {
4640 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4641 "d16 modifier is not supported on this GPU");
4642 return false;
4643 }
4644 if (!validateMIMGMSAA(Inst)) {
4645 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4646 "invalid dim; must be MSAA type");
4647 return false;
4648 }
4649 if (!validateMIMGDataSize(Inst, IDLoc)) {
4650 return false;
4651 }
4652 if (!validateMIMGAddrSize(Inst)) {
4653 Error(IDLoc,
4654 "image address size does not match dim and a16");
4655 return false;
4656 }
4657 if (!validateMIMGAtomicDMask(Inst)) {
4658 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4659 "invalid atomic image dmask");
4660 return false;
4661 }
4662 if (!validateMIMGGatherDMask(Inst)) {
4663 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4664 "invalid image_gather dmask: only one bit must be set");
4665 return false;
4666 }
4667 if (!validateMovrels(Inst, Operands)) {
4668 return false;
4669 }
4670 if (!validateFlatOffset(Inst, Operands)) {
4671 return false;
4672 }
4673 if (!validateSMEMOffset(Inst, Operands)) {
4674 return false;
4675 }
4676 if (!validateMAIAccWrite(Inst, Operands)) {
4677 return false;
4678 }
4679 if (!validateMAISrc2(Inst, Operands)) {
4680 return false;
4681 }
4682 if (!validateMFMA(Inst, Operands)) {
4683 return false;
4684 }
4685 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4686 return false;
4687 }
4688
4689 if (!validateAGPRLdSt(Inst)) {
4690 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4691 ? "invalid register class: data and dst should be all VGPR or AGPR"
4692 : "invalid register class: agpr loads and stores not supported on this GPU"
4693 );
4694 return false;
4695 }
4696 if (!validateVGPRAlign(Inst)) {
4697 Error(IDLoc,
4698 "invalid register class: vgpr tuples must be 64 bit aligned");
4699 return false;
4700 }
4701 if (!validateGWS(Inst, Operands)) {
4702 return false;
4703 }
4704
4705 if (!validateBLGP(Inst, Operands)) {
4706 return false;
4707 }
4708
4709 if (!validateDivScale(Inst)) {
4710 Error(IDLoc, "ABS not allowed in VOP3B instructions");
4711 return false;
4712 }
4713 if (!validateWaitCnt(Inst, Operands)) {
4714 return false;
4715 }
4716 if (!validateExeczVcczOperands(Operands)) {
4717 return false;
4718 }
4719 if (!validateTFE(Inst, Operands)) {
4720 return false;
4721 }
4722
4723 return true;
4724}
4725
4727 const FeatureBitset &FBS,
4728 unsigned VariantID = 0);
4729
4730static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4731 const FeatureBitset &AvailableFeatures,
4732 unsigned VariantID);
4733
4734bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4735 const FeatureBitset &FBS) {
4736 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4737}
4738
4739bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4740 const FeatureBitset &FBS,
4741 ArrayRef<unsigned> Variants) {
4742 for (auto Variant : Variants) {
4743 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4744 return true;
4745 }
4746
4747 return false;
4748}
4749
4750bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4751 const SMLoc &IDLoc) {
4752 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
4753
4754 // Check if requested instruction variant is supported.
4755 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4756 return false;
4757
4758 // This instruction is not supported.
4759 // Clear any other pending errors because they are no longer relevant.
4760 getParser().clearPendingErrors();
4761
4762 // Requested instruction variant is not supported.
4763 // Check if any other variants are supported.
4764 StringRef VariantName = getMatchedVariantName();
4765 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4766 return Error(IDLoc,
4767 Twine(VariantName,
4768 " variant of this instruction is not supported"));
4769 }
4770
4771 // Check if this instruction may be used with a different wavesize.
4772 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
4773 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
4774
4775 FeatureBitset FeaturesWS32 = getFeatureBits();
4776 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
4777 .flip(AMDGPU::FeatureWavefrontSize32);
4778 FeatureBitset AvailableFeaturesWS32 =
4779 ComputeAvailableFeatures(FeaturesWS32);
4780
4781 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
4782 return Error(IDLoc, "instruction requires wavesize=32");
4783 }
4784
4785 // Finally check if this instruction is supported on any other GPU.
4786 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4787 return Error(IDLoc, "instruction not supported on this GPU");
4788 }
4789
4790 // Instruction not supported on any GPU. Probably a typo.
4791 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4792 return Error(IDLoc, "invalid instruction" + Suggestion);
4793}
4794
4796 uint64_t InvalidOprIdx) {
4797 assert(InvalidOprIdx < Operands.size());
4798 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
4799 if (Op.isToken() && InvalidOprIdx > 1) {
4800 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
4801 return PrevOp.isToken() && PrevOp.getToken() == "::";
4802 }
4803 return false;
4804}
4805
4806bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4808 MCStreamer &Out,
4810 bool MatchingInlineAsm) {
4811 MCInst Inst;
4812 unsigned Result = Match_Success;
4813 for (auto Variant : getMatchedVariants()) {
4814 uint64_t EI;
4815 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4816 Variant);
4817 // We order match statuses from least to most specific. We use most specific
4818 // status as resulting
4819 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4820 if ((R == Match_Success) ||
4821 (R == Match_PreferE32) ||
4822 (R == Match_MissingFeature && Result != Match_PreferE32) ||
4823 (R == Match_InvalidOperand && Result != Match_MissingFeature
4824 && Result != Match_PreferE32) ||
4825 (R == Match_MnemonicFail && Result != Match_InvalidOperand
4826 && Result != Match_MissingFeature
4827 && Result != Match_PreferE32)) {
4828 Result = R;
4829 ErrorInfo = EI;
4830 }
4831 if (R == Match_Success)
4832 break;
4833 }
4834
4835 if (Result == Match_Success) {
4836 if (!validateInstruction(Inst, IDLoc, Operands)) {
4837 return true;
4838 }
4839 Inst.setLoc(IDLoc);
4840 Out.emitInstruction(Inst, getSTI());
4841 return false;
4842 }
4843
4844 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4845 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4846 return true;
4847 }
4848
4849 switch (Result) {
4850 default: break;
4851 case Match_MissingFeature:
4852 // It has been verified that the specified instruction
4853 // mnemonic is valid. A match was found but it requires
4854 // features which are not supported on this GPU.
4855 return Error(IDLoc, "operands are not valid for this GPU or mode");
4856
4857 case Match_InvalidOperand: {
4858 SMLoc ErrorLoc = IDLoc;
4859 if (ErrorInfo != ~0ULL) {
4860 if (ErrorInfo >= Operands.size()) {
4861 return Error(IDLoc, "too few operands for instruction");
4862 }
4863 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4864 if (ErrorLoc == SMLoc())
4865 ErrorLoc = IDLoc;
4866
4868 return Error(ErrorLoc, "invalid VOPDY instruction");
4869 }
4870 return Error(ErrorLoc, "invalid operand for instruction");
4871 }
4872
4873 case Match_PreferE32:
4874 return Error(IDLoc, "internal error: instruction without _e64 suffix "
4875 "should be encoded as e32");
4876 case Match_MnemonicFail:
4877 llvm_unreachable("Invalid instructions should have been handled already");
4878 }
4879 llvm_unreachable("Implement any new match types added!");
4880}
4881
4882bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4883 int64_t Tmp = -1;
4884 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4885 return true;
4886 }
4887 if (getParser().parseAbsoluteExpression(Tmp)) {
4888 return true;
4889 }
4890 Ret = static_cast<uint32_t>(Tmp);
4891 return false;
4892}
4893
4894bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4895 uint32_t &Minor) {
4896 if (ParseAsAbsoluteExpression(Major))
4897 return TokError("invalid major version");
4898
4899 if (!trySkipToken(AsmToken::Comma))
4900 return TokError("minor version number required, comma expected");
4901
4902 if (ParseAsAbsoluteExpression(Minor))
4903 return TokError("invalid minor version");
4904
4905 return false;
4906}
4907
4908bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4909 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4910 return TokError("directive only supported for amdgcn architecture");
4911
4912 std::string TargetIDDirective;
4913 SMLoc TargetStart = getTok().getLoc();
4914 if (getParser().parseEscapedString(TargetIDDirective))
4915 return true;
4916
4917 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4918 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4919 return getParser().Error(TargetRange.Start,
4920 (Twine(".amdgcn_target directive's target id ") +
4921 Twine(TargetIDDirective) +
4922 Twine(" does not match the specified target id ") +
4923 Twine(getTargetStreamer().getTargetID()->toString())).str());
4924
4925 return false;
4926}
4927
4928bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4929 return Error(Range.Start, "value out of range", Range);
4930}
4931
4932bool AMDGPUAsmParser::calculateGPRBlocks(
4933 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4934 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
4935 unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
4936 SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4937 // TODO(scott.linder): These calculations are duplicated from
4938 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4939 IsaVersion Version = getIsaVersion(getSTI().getCPU());
4940
4941 unsigned NumVGPRs = NextFreeVGPR;
4942 unsigned NumSGPRs = NextFreeSGPR;
4943
4944 if (Version.Major >= 10)
4945 NumSGPRs = 0;
4946 else {
4947 unsigned MaxAddressableNumSGPRs =
4949
4950 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4951 NumSGPRs > MaxAddressableNumSGPRs)
4952 return OutOfRangeError(SGPRRange);
4953
4954 NumSGPRs +=
4955 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4956
4957 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4958 NumSGPRs > MaxAddressableNumSGPRs)
4959 return OutOfRangeError(SGPRRange);
4960
4961 if (Features.test(FeatureSGPRInitBug))
4963 }
4964
4965 VGPRBlocks =
4966 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4967 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4968
4969 return false;
4970}
4971
4972bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4973 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4974 return TokError("directive only supported for amdgcn architecture");
4975
4976 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4977 return TokError("directive only supported for amdhsa OS");
4978
4979 StringRef KernelName;
4980 if (getParser().parseIdentifier(KernelName))
4981 return true;
4982
4984
4985 StringSet<> Seen;
4986
4987 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4988
4989 SMRange VGPRRange;
4990 uint64_t NextFreeVGPR = 0;
4991 uint64_t AccumOffset = 0;
4992 uint64_t SharedVGPRCount = 0;
4993 SMRange SGPRRange;
4994 uint64_t NextFreeSGPR = 0;
4995
4996 // Count the number of user SGPRs implied from the enabled feature bits.
4997 unsigned ImpliedUserSGPRCount = 0;
4998
4999 // Track if the asm explicitly contains the directive for the user SGPR
5000 // count.
5001 std::optional<unsigned> ExplicitUserSGPRCount;
5002 bool ReserveVCC = true;
5003 bool ReserveFlatScr = true;
5004 std::optional<bool> EnableWavefrontSize32;
5005
5006 while (true) {
5007 while (trySkipToken(AsmToken::EndOfStatement));
5008
5009 StringRef ID;
5010 SMRange IDRange = getTok().getLocRange();
5011 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5012 return true;
5013
5014 if (ID == ".end_amdhsa_kernel")
5015 break;
5016
5017 if (!Seen.insert(ID).second)
5018 return TokError(".amdhsa_ directives cannot be repeated");
5019
5020 SMLoc ValStart = getLoc();
5021 int64_t IVal;
5022 if (getParser().parseAbsoluteExpression(IVal))
5023 return true;
5024 SMLoc ValEnd = getLoc();
5025 SMRange ValRange = SMRange(ValStart, ValEnd);
5026
5027 if (IVal < 0)
5028 return OutOfRangeError(ValRange);
5029
5030 uint64_t Val = IVal;
5031
5032#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5033 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
5034 return OutOfRangeError(RANGE); \
5035 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
5036
5037 if (ID == ".amdhsa_group_segment_fixed_size") {
5038 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
5039 return OutOfRangeError(ValRange);
5040 KD.group_segment_fixed_size = Val;
5041 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5042 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
5043 return OutOfRangeError(ValRange);
5045 } else if (ID == ".amdhsa_kernarg_size") {
5046 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
5047 return OutOfRangeError(ValRange);
5048 KD.kernarg_size = Val;
5049 } else if (ID == ".amdhsa_user_sgpr_count") {
5050 ExplicitUserSGPRCount = Val;
5051 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5053 return Error(IDRange.Start,
5054 "directive is not supported with architected flat scratch",
5055 IDRange);
5057 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5058 Val, ValRange);
5059 if (Val)
5060 ImpliedUserSGPRCount += 4;
5061 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5063 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
5064 ValRange);
5065 if (Val)
5066 ImpliedUserSGPRCount += 2;
5067 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5069 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
5070 ValRange);
5071 if (Val)
5072 ImpliedUserSGPRCount += 2;
5073 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5075 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5076 Val, ValRange);
5077 if (Val)
5078 ImpliedUserSGPRCount += 2;
5079 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5081 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
5082 ValRange);
5083 if (Val)
5084 ImpliedUserSGPRCount += 2;
5085 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5087 return Error(IDRange.Start,
5088 "directive is not supported with architected flat scratch",
5089 IDRange);
5091 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5092 ValRange);
5093 if (Val)
5094 ImpliedUserSGPRCount += 2;
5095 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5097 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5098 Val, ValRange);
5099 if (Val)
5100 ImpliedUserSGPRCount += 1;
5101 } else if (ID == ".amdhsa_wavefront_size32") {
5102 if (IVersion.Major < 10)
5103 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5104 EnableWavefrontSize32 = Val;
5106 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5107 Val, ValRange);
5108 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5110 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5111 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5113 return Error(IDRange.Start,
5114 "directive is not supported with architected flat scratch",
5115 IDRange);
5117 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5118 } else if (ID == ".amdhsa_enable_private_segment") {
5120 return Error(
5121 IDRange.Start,
5122 "directive is not supported without architected flat scratch",
5123 IDRange);
5125 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5126 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5128 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5129 ValRange);
5130 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5132 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5133 ValRange);
5134 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5136 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5137 ValRange);
5138 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5140 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5141 ValRange);
5142 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5144 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5145 ValRange);
5146 } else if (ID == ".amdhsa_next_free_vgpr") {
5147 VGPRRange = ValRange;
5148 NextFreeVGPR = Val;
5149 } else if (ID == ".amdhsa_next_free_sgpr") {
5150 SGPRRange = ValRange;
5151 NextFreeSGPR = Val;
5152 } else if (ID == ".amdhsa_accum_offset") {
5153 if (!isGFX90A())
5154 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5155 AccumOffset = Val;
5156 } else if (ID == ".amdhsa_reserve_vcc") {
5157 if (!isUInt<1>(Val))
5158 return OutOfRangeError(ValRange);
5159 ReserveVCC = Val;
5160 } else if (ID == ".amdhsa_reserve_flat_scratch") {
5161 if (IVersion.Major < 7)
5162 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5164 return Error(IDRange.Start,
5165 "directive is not supported with architected flat scratch",
5166 IDRange);
5167 if (!isUInt<1>(Val))
5168 return OutOfRangeError(ValRange);
5169 ReserveFlatScr = Val;
5170 } else if (ID == ".amdhsa_reserve_xnack_mask") {
5171 if (IVersion.Major < 8)
5172 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5173 if (!isUInt<1>(Val))
5174 return OutOfRangeError(ValRange);
5175 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5176 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5177 IDRange);
5178 } else if (ID == ".amdhsa_float_round_mode_32") {
5180 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5181 } else if (ID == ".amdhsa_float_round_mode_16_64") {
5183 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5184 } else if (ID == ".amdhsa_float_denorm_mode_32") {
5186 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5187 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5189 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5190 ValRange);
5191 } else if (ID == ".amdhsa_dx10_clamp") {
5193 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5194 } else if (ID == ".amdhsa_ieee_mode") {
5195 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5196 Val, ValRange);
5197 } else if (ID == ".amdhsa_fp16_overflow") {
5198 if (IVersion.Major < 9)
5199 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5200 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5201 ValRange);
5202 } else if (ID == ".amdhsa_tg_split") {
5203 if (!isGFX90A())
5204 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5205 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5206 ValRange);
5207 } else if (ID == ".amdhsa_workgroup_processor_mode") {
5208 if (IVersion.Major < 10)
5209 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5210 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5211 ValRange);
5212 } else if (ID == ".amdhsa_memory_ordered") {
5213 if (IVersion.Major < 10)
5214 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5215 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5216 ValRange);
5217 } else if (ID == ".amdhsa_forward_progress") {
5218 if (IVersion.Major < 10)
5219 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5220 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5221 ValRange);
5222 } else if (ID == ".amdhsa_shared_vgpr_count") {
5223 if (IVersion.Major < 10)
5224 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5225 SharedVGPRCount = Val;
5227 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
5228 ValRange);
5229 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5232 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5233 ValRange);
5234 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5236 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5237 Val, ValRange);
5238 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5241 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5242 ValRange);
5243 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5245 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5246 Val, ValRange);
5247 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5249 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5250 Val, ValRange);
5251 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5253 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5254 Val, ValRange);
5255 } else if (ID == ".amdhsa_exception_int_div_zero") {
5257 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5258 Val, ValRange);
5259 } else {
5260 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5261 }
5262
5263#undef PARSE_BITS_ENTRY
5264 }
5265
5266 if (!Seen.contains(".amdhsa_next_free_vgpr"))
5267 return TokError(".amdhsa_next_free_vgpr directive is required");
5268
5269 if (!Seen.contains(".amdhsa_next_free_sgpr"))
5270 return TokError(".amdhsa_next_free_sgpr directive is required");
5271
5272 unsigned VGPRBlocks;
5273 unsigned SGPRBlocks;
5274 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, Reserv