LLVM 19.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
14#include "SIDefines.h"
15#include "SIInstrInfo.h"
16#include "SIRegisterInfo.h"
21#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/StringSet.h"
24#include "llvm/ADT/Twine.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCContext.h"
29#include "llvm/MC/MCExpr.h"
30#include "llvm/MC/MCInst.h"
31#include "llvm/MC/MCInstrDesc.h"
36#include "llvm/MC/MCSymbol.h"
43#include <optional>
44
45using namespace llvm;
46using namespace llvm::AMDGPU;
47using namespace llvm::amdhsa;
48
49namespace {
50
51class AMDGPUAsmParser;
52
53enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
54
55//===----------------------------------------------------------------------===//
56// Operand
57//===----------------------------------------------------------------------===//
58
59class AMDGPUOperand : public MCParsedAsmOperand {
60 enum KindTy {
61 Token,
62 Immediate,
65 } Kind;
66
67 SMLoc StartLoc, EndLoc;
68 const AMDGPUAsmParser *AsmParser;
69
70public:
71 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
72 : Kind(Kind_), AsmParser(AsmParser_) {}
73
74 using Ptr = std::unique_ptr<AMDGPUOperand>;
75
76 struct Modifiers {
77 bool Abs = false;
78 bool Neg = false;
79 bool Sext = false;
80 bool Lit = false;
81
82 bool hasFPModifiers() const { return Abs || Neg; }
83 bool hasIntModifiers() const { return Sext; }
84 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
85
86 int64_t getFPModifiersOperand() const {
87 int64_t Operand = 0;
88 Operand |= Abs ? SISrcMods::ABS : 0u;
89 Operand |= Neg ? SISrcMods::NEG : 0u;
90 return Operand;
91 }
92
93 int64_t getIntModifiersOperand() const {
94 int64_t Operand = 0;
95 Operand |= Sext ? SISrcMods::SEXT : 0u;
96 return Operand;
97 }
98
99 int64_t getModifiersOperand() const {
100 assert(!(hasFPModifiers() && hasIntModifiers())
101 && "fp and int modifiers should not be used simultaneously");
102 if (hasFPModifiers()) {
103 return getFPModifiersOperand();
104 } else if (hasIntModifiers()) {
105 return getIntModifiersOperand();
106 } else {
107 return 0;
108 }
109 }
110
111 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
112 };
113
114 enum ImmTy {
115 ImmTyNone,
116 ImmTyGDS,
117 ImmTyLDS,
118 ImmTyOffen,
119 ImmTyIdxen,
120 ImmTyAddr64,
121 ImmTyOffset,
122 ImmTyInstOffset,
123 ImmTyOffset0,
124 ImmTyOffset1,
125 ImmTySMEMOffsetMod,
126 ImmTyCPol,
127 ImmTyTFE,
128 ImmTyD16,
129 ImmTyClamp,
130 ImmTyOModSI,
131 ImmTySDWADstSel,
132 ImmTySDWASrc0Sel,
133 ImmTySDWASrc1Sel,
134 ImmTySDWADstUnused,
135 ImmTyDMask,
136 ImmTyDim,
137 ImmTyUNorm,
138 ImmTyDA,
139 ImmTyR128A16,
140 ImmTyA16,
141 ImmTyLWE,
142 ImmTyExpTgt,
143 ImmTyExpCompr,
144 ImmTyExpVM,
145 ImmTyFORMAT,
146 ImmTyHwreg,
147 ImmTyOff,
148 ImmTySendMsg,
149 ImmTyInterpSlot,
150 ImmTyInterpAttr,
151 ImmTyInterpAttrChan,
152 ImmTyOpSel,
153 ImmTyOpSelHi,
154 ImmTyNegLo,
155 ImmTyNegHi,
156 ImmTyIndexKey8bit,
157 ImmTyIndexKey16bit,
158 ImmTyDPP8,
159 ImmTyDppCtrl,
160 ImmTyDppRowMask,
161 ImmTyDppBankMask,
162 ImmTyDppBoundCtrl,
163 ImmTyDppFI,
164 ImmTySwizzle,
165 ImmTyGprIdxMode,
166 ImmTyHigh,
167 ImmTyBLGP,
168 ImmTyCBSZ,
169 ImmTyABID,
170 ImmTyEndpgm,
171 ImmTyWaitVDST,
172 ImmTyWaitEXP,
173 ImmTyWaitVAVDst,
174 ImmTyWaitVMVSrc,
175 ImmTyByteSel,
176 };
177
178 // Immediate operand kind.
179 // It helps to identify the location of an offending operand after an error.
180 // Note that regular literals and mandatory literals (KImm) must be handled
181 // differently. When looking for an offending operand, we should usually
182 // ignore mandatory literals because they are part of the instruction and
183 // cannot be changed. Report location of mandatory operands only for VOPD,
184 // when both OpX and OpY have a KImm and there are no other literals.
185 enum ImmKindTy {
186 ImmKindTyNone,
187 ImmKindTyLiteral,
188 ImmKindTyMandatoryLiteral,
189 ImmKindTyConst,
190 };
191
192private:
193 struct TokOp {
194 const char *Data;
195 unsigned Length;
196 };
197
198 struct ImmOp {
199 int64_t Val;
200 ImmTy Type;
201 bool IsFPImm;
202 mutable ImmKindTy Kind;
203 Modifiers Mods;
204 };
205
206 struct RegOp {
207 unsigned RegNo;
208 Modifiers Mods;
209 };
210
211 union {
212 TokOp Tok;
213 ImmOp Imm;
214 RegOp Reg;
215 const MCExpr *Expr;
216 };
217
218public:
219 bool isToken() const override { return Kind == Token; }
220
221 bool isSymbolRefExpr() const {
222 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
223 }
224
225 bool isImm() const override {
226 return Kind == Immediate;
227 }
228
229 void setImmKindNone() const {
230 assert(isImm());
231 Imm.Kind = ImmKindTyNone;
232 }
233
234 void setImmKindLiteral() const {
235 assert(isImm());
236 Imm.Kind = ImmKindTyLiteral;
237 }
238
239 void setImmKindMandatoryLiteral() const {
240 assert(isImm());
241 Imm.Kind = ImmKindTyMandatoryLiteral;
242 }
243
244 void setImmKindConst() const {
245 assert(isImm());
246 Imm.Kind = ImmKindTyConst;
247 }
248
249 bool IsImmKindLiteral() const {
250 return isImm() && Imm.Kind == ImmKindTyLiteral;
251 }
252
253 bool IsImmKindMandatoryLiteral() const {
254 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
255 }
256
257 bool isImmKindConst() const {
258 return isImm() && Imm.Kind == ImmKindTyConst;
259 }
260
261 bool isInlinableImm(MVT type) const;
262 bool isLiteralImm(MVT type) const;
263
264 bool isRegKind() const {
265 return Kind == Register;
266 }
267
268 bool isReg() const override {
269 return isRegKind() && !hasModifiers();
270 }
271
272 bool isRegOrInline(unsigned RCID, MVT type) const {
273 return isRegClass(RCID) || isInlinableImm(type);
274 }
275
276 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
277 return isRegOrInline(RCID, type) || isLiteralImm(type);
278 }
279
280 bool isRegOrImmWithInt16InputMods() const {
281 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
282 }
283
284 bool isRegOrImmWithIntT16InputMods() const {
285 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
286 }
287
288 bool isRegOrImmWithInt32InputMods() const {
289 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
290 }
291
292 bool isRegOrInlineImmWithInt16InputMods() const {
293 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
294 }
295
296 bool isRegOrInlineImmWithInt32InputMods() const {
297 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
298 }
299
300 bool isRegOrImmWithInt64InputMods() const {
301 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
302 }
303
304 bool isRegOrImmWithFP16InputMods() const {
305 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
306 }
307
308 bool isRegOrImmWithFPT16InputMods() const {
309 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
310 }
311
312 bool isRegOrImmWithFP32InputMods() const {
313 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
314 }
315
316 bool isRegOrImmWithFP64InputMods() const {
317 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
318 }
319
320 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
321 return isRegOrInline(
322 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
323 }
324
325 bool isRegOrInlineImmWithFP32InputMods() const {
326 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
327 }
328
329 bool isPackedFP16InputMods() const {
330 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
331 }
332
333 bool isVReg() const {
334 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
335 isRegClass(AMDGPU::VReg_64RegClassID) ||
336 isRegClass(AMDGPU::VReg_96RegClassID) ||
337 isRegClass(AMDGPU::VReg_128RegClassID) ||
338 isRegClass(AMDGPU::VReg_160RegClassID) ||
339 isRegClass(AMDGPU::VReg_192RegClassID) ||
340 isRegClass(AMDGPU::VReg_256RegClassID) ||
341 isRegClass(AMDGPU::VReg_512RegClassID) ||
342 isRegClass(AMDGPU::VReg_1024RegClassID);
343 }
344
345 bool isVReg32() const {
346 return isRegClass(AMDGPU::VGPR_32RegClassID);
347 }
348
349 bool isVReg32OrOff() const {
350 return isOff() || isVReg32();
351 }
352
353 bool isNull() const {
354 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
355 }
356
357 bool isVRegWithInputMods() const;
358 template <bool IsFake16> bool isT16VRegWithInputMods() const;
359
360 bool isSDWAOperand(MVT type) const;
361 bool isSDWAFP16Operand() const;
362 bool isSDWAFP32Operand() const;
363 bool isSDWAInt16Operand() const;
364 bool isSDWAInt32Operand() const;
365
366 bool isImmTy(ImmTy ImmT) const {
367 return isImm() && Imm.Type == ImmT;
368 }
369
370 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
371
372 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
373
374 bool isImmModifier() const {
375 return isImm() && Imm.Type != ImmTyNone;
376 }
377
378 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
379 bool isDim() const { return isImmTy(ImmTyDim); }
380 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
381 bool isOff() const { return isImmTy(ImmTyOff); }
382 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
383 bool isOffen() const { return isImmTy(ImmTyOffen); }
384 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
385 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
386 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
387 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
388 bool isGDS() const { return isImmTy(ImmTyGDS); }
389 bool isLDS() const { return isImmTy(ImmTyLDS); }
390 bool isCPol() const { return isImmTy(ImmTyCPol); }
391 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
392 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
393 bool isTFE() const { return isImmTy(ImmTyTFE); }
394 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
395 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
396 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
397 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
398 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
399 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
400 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
401 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
402 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
403 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
404 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
405 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
406 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
407
408 bool isRegOrImm() const {
409 return isReg() || isImm();
410 }
411
412 bool isRegClass(unsigned RCID) const;
413
414 bool isInlineValue() const;
415
416 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
417 return isRegOrInline(RCID, type) && !hasModifiers();
418 }
419
420 bool isSCSrcB16() const {
421 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
422 }
423
424 bool isSCSrcV2B16() const {
425 return isSCSrcB16();
426 }
427
428 bool isSCSrc_b32() const {
429 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
430 }
431
432 bool isSCSrc_b64() const {
433 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
434 }
435
436 bool isBoolReg() const;
437
438 bool isSCSrcF16() const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
440 }
441
442 bool isSCSrcV2F16() const {
443 return isSCSrcF16();
444 }
445
446 bool isSCSrcF32() const {
447 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
448 }
449
450 bool isSCSrcF64() const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
452 }
453
454 bool isSSrc_b32() const {
455 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
456 }
457
458 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
459
460 bool isSSrcV2B16() const {
461 llvm_unreachable("cannot happen");
462 return isSSrc_b16();
463 }
464
465 bool isSSrc_b64() const {
466 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
467 // See isVSrc64().
468 return isSCSrc_b64() || isLiteralImm(MVT::i64);
469 }
470
471 bool isSSrc_f32() const {
472 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
473 }
474
475 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
476
477 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
478
479 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
480
481 bool isSSrcV2F16() const {
482 llvm_unreachable("cannot happen");
483 return isSSrc_f16();
484 }
485
486 bool isSSrcV2FP32() const {
487 llvm_unreachable("cannot happen");
488 return isSSrc_f32();
489 }
490
491 bool isSCSrcV2FP32() const {
492 llvm_unreachable("cannot happen");
493 return isSCSrcF32();
494 }
495
496 bool isSSrcV2INT32() const {
497 llvm_unreachable("cannot happen");
498 return isSSrc_b32();
499 }
500
501 bool isSCSrcV2INT32() const {
502 llvm_unreachable("cannot happen");
503 return isSCSrc_b32();
504 }
505
506 bool isSSrcOrLds_b32() const {
507 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
508 isLiteralImm(MVT::i32) || isExpr();
509 }
510
511 bool isVCSrc_b32() const {
512 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
513 }
514
515 bool isVCSrcB64() const {
516 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
517 }
518
519 bool isVCSrcTB16() const {
520 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
521 }
522
523 bool isVCSrcTB16_Lo128() const {
524 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
525 }
526
527 bool isVCSrcFake16B16_Lo128() const {
528 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
529 }
530
531 bool isVCSrc_b16() const {
532 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
533 }
534
535 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
536
537 bool isVCSrc_f32() const {
538 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
539 }
540
541 bool isVCSrcF64() const {
542 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
543 }
544
545 bool isVCSrcTBF16() const {
546 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
547 }
548
549 bool isVCSrcTF16() const {
550 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
551 }
552
553 bool isVCSrcTBF16_Lo128() const {
554 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
555 }
556
557 bool isVCSrcTF16_Lo128() const {
558 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
559 }
560
561 bool isVCSrcFake16BF16_Lo128() const {
562 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
563 }
564
565 bool isVCSrcFake16F16_Lo128() const {
566 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
567 }
568
569 bool isVCSrc_bf16() const {
570 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
571 }
572
573 bool isVCSrc_f16() const {
574 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
575 }
576
577 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
578
579 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
580
581 bool isVSrc_b32() const {
582 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
583 }
584
585 bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
586
587 bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
588
589 bool isVSrcT_b16_Lo128() const {
590 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
591 }
592
593 bool isVSrcFake16_b16_Lo128() const {
594 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
595 }
596
597 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
598
599 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
600
601 bool isVCSrcV2FP32() const {
602 return isVCSrcF64();
603 }
604
605 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
606
607 bool isVCSrcV2INT32() const {
608 return isVCSrcB64();
609 }
610
611 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
612
613 bool isVSrc_f32() const {
614 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
615 }
616
617 bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
618
619 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
620
621 bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
622
623 bool isVSrcT_bf16_Lo128() const {
624 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
625 }
626
627 bool isVSrcT_f16_Lo128() const {
628 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
629 }
630
631 bool isVSrcFake16_bf16_Lo128() const {
632 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
633 }
634
635 bool isVSrcFake16_f16_Lo128() const {
636 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
637 }
638
639 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
640
641 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
642
643 bool isVSrc_v2bf16() const {
644 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
645 }
646
647 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
648
649 bool isVISrcB32() const {
650 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
651 }
652
653 bool isVISrcB16() const {
654 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
655 }
656
657 bool isVISrcV2B16() const {
658 return isVISrcB16();
659 }
660
661 bool isVISrcF32() const {
662 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
663 }
664
665 bool isVISrcF16() const {
666 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
667 }
668
669 bool isVISrcV2F16() const {
670 return isVISrcF16() || isVISrcB32();
671 }
672
673 bool isVISrc_64_bf16() const {
674 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
675 }
676
677 bool isVISrc_64_f16() const {
678 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
679 }
680
681 bool isVISrc_64_b32() const {
682 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
683 }
684
685 bool isVISrc_64B64() const {
686 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
687 }
688
689 bool isVISrc_64_f64() const {
690 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
691 }
692
693 bool isVISrc_64V2FP32() const {
694 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
695 }
696
697 bool isVISrc_64V2INT32() const {
698 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
699 }
700
701 bool isVISrc_256_b32() const {
702 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
703 }
704
705 bool isVISrc_256_f32() const {
706 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
707 }
708
709 bool isVISrc_256B64() const {
710 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
711 }
712
713 bool isVISrc_256_f64() const {
714 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
715 }
716
717 bool isVISrc_128B16() const {
718 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
719 }
720
721 bool isVISrc_128V2B16() const {
722 return isVISrc_128B16();
723 }
724
725 bool isVISrc_128_b32() const {
726 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
727 }
728
729 bool isVISrc_128_f32() const {
730 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
731 }
732
733 bool isVISrc_256V2FP32() const {
734 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
735 }
736
737 bool isVISrc_256V2INT32() const {
738 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
739 }
740
741 bool isVISrc_512_b32() const {
742 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
743 }
744
745 bool isVISrc_512B16() const {
746 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
747 }
748
749 bool isVISrc_512V2B16() const {
750 return isVISrc_512B16();
751 }
752
753 bool isVISrc_512_f32() const {
754 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
755 }
756
757 bool isVISrc_512F16() const {
758 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
759 }
760
761 bool isVISrc_512V2F16() const {
762 return isVISrc_512F16() || isVISrc_512_b32();
763 }
764
765 bool isVISrc_1024_b32() const {
766 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
767 }
768
769 bool isVISrc_1024B16() const {
770 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
771 }
772
773 bool isVISrc_1024V2B16() const {
774 return isVISrc_1024B16();
775 }
776
777 bool isVISrc_1024_f32() const {
778 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
779 }
780
781 bool isVISrc_1024F16() const {
782 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
783 }
784
785 bool isVISrc_1024V2F16() const {
786 return isVISrc_1024F16() || isVISrc_1024_b32();
787 }
788
789 bool isAISrcB32() const {
790 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
791 }
792
793 bool isAISrcB16() const {
794 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
795 }
796
797 bool isAISrcV2B16() const {
798 return isAISrcB16();
799 }
800
801 bool isAISrcF32() const {
802 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
803 }
804
805 bool isAISrcF16() const {
806 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
807 }
808
809 bool isAISrcV2F16() const {
810 return isAISrcF16() || isAISrcB32();
811 }
812
813 bool isAISrc_64B64() const {
814 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
815 }
816
817 bool isAISrc_64_f64() const {
818 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
819 }
820
821 bool isAISrc_128_b32() const {
822 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
823 }
824
825 bool isAISrc_128B16() const {
826 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
827 }
828
829 bool isAISrc_128V2B16() const {
830 return isAISrc_128B16();
831 }
832
833 bool isAISrc_128_f32() const {
834 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
835 }
836
837 bool isAISrc_128F16() const {
838 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
839 }
840
841 bool isAISrc_128V2F16() const {
842 return isAISrc_128F16() || isAISrc_128_b32();
843 }
844
845 bool isVISrc_128_bf16() const {
846 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
847 }
848
849 bool isVISrc_128_f16() const {
850 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
851 }
852
853 bool isVISrc_128V2F16() const {
854 return isVISrc_128_f16() || isVISrc_128_b32();
855 }
856
857 bool isAISrc_256B64() const {
858 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
859 }
860
861 bool isAISrc_256_f64() const {
862 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
863 }
864
865 bool isAISrc_512_b32() const {
866 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
867 }
868
869 bool isAISrc_512B16() const {
870 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
871 }
872
873 bool isAISrc_512V2B16() const {
874 return isAISrc_512B16();
875 }
876
877 bool isAISrc_512_f32() const {
878 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
879 }
880
881 bool isAISrc_512F16() const {
882 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
883 }
884
885 bool isAISrc_512V2F16() const {
886 return isAISrc_512F16() || isAISrc_512_b32();
887 }
888
889 bool isAISrc_1024_b32() const {
890 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
891 }
892
893 bool isAISrc_1024B16() const {
894 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
895 }
896
897 bool isAISrc_1024V2B16() const {
898 return isAISrc_1024B16();
899 }
900
901 bool isAISrc_1024_f32() const {
902 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
903 }
904
905 bool isAISrc_1024F16() const {
906 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
907 }
908
909 bool isAISrc_1024V2F16() const {
910 return isAISrc_1024F16() || isAISrc_1024_b32();
911 }
912
913 bool isKImmFP32() const {
914 return isLiteralImm(MVT::f32);
915 }
916
917 bool isKImmFP16() const {
918 return isLiteralImm(MVT::f16);
919 }
920
921 bool isMem() const override {
922 return false;
923 }
924
925 bool isExpr() const {
926 return Kind == Expression;
927 }
928
929 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
930
931 bool isSWaitCnt() const;
932 bool isDepCtr() const;
933 bool isSDelayALU() const;
934 bool isHwreg() const;
935 bool isSendMsg() const;
936 bool isSplitBarrier() const;
937 bool isSwizzle() const;
938 bool isSMRDOffset8() const;
939 bool isSMEMOffset() const;
940 bool isSMRDLiteralOffset() const;
941 bool isDPP8() const;
942 bool isDPPCtrl() const;
943 bool isBLGP() const;
944 bool isGPRIdxMode() const;
945 bool isS16Imm() const;
946 bool isU16Imm() const;
947 bool isEndpgm() const;
948
949 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
950 return [=](){ return P(*this); };
951 }
952
953 StringRef getToken() const {
954 assert(isToken());
955 return StringRef(Tok.Data, Tok.Length);
956 }
957
958 int64_t getImm() const {
959 assert(isImm());
960 return Imm.Val;
961 }
962
963 void setImm(int64_t Val) {
964 assert(isImm());
965 Imm.Val = Val;
966 }
967
968 ImmTy getImmTy() const {
969 assert(isImm());
970 return Imm.Type;
971 }
972
973 MCRegister getReg() const override {
974 assert(isRegKind());
975 return Reg.RegNo;
976 }
977
978 SMLoc getStartLoc() const override {
979 return StartLoc;
980 }
981
982 SMLoc getEndLoc() const override {
983 return EndLoc;
984 }
985
986 SMRange getLocRange() const {
987 return SMRange(StartLoc, EndLoc);
988 }
989
990 Modifiers getModifiers() const {
991 assert(isRegKind() || isImmTy(ImmTyNone));
992 return isRegKind() ? Reg.Mods : Imm.Mods;
993 }
994
995 void setModifiers(Modifiers Mods) {
996 assert(isRegKind() || isImmTy(ImmTyNone));
997 if (isRegKind())
998 Reg.Mods = Mods;
999 else
1000 Imm.Mods = Mods;
1001 }
1002
1003 bool hasModifiers() const {
1004 return getModifiers().hasModifiers();
1005 }
1006
1007 bool hasFPModifiers() const {
1008 return getModifiers().hasFPModifiers();
1009 }
1010
1011 bool hasIntModifiers() const {
1012 return getModifiers().hasIntModifiers();
1013 }
1014
1015 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1016
1017 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1018
1019 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1020
1021 void addRegOperands(MCInst &Inst, unsigned N) const;
1022
1023 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1024 if (isRegKind())
1025 addRegOperands(Inst, N);
1026 else
1027 addImmOperands(Inst, N);
1028 }
1029
1030 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1031 Modifiers Mods = getModifiers();
1032 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1033 if (isRegKind()) {
1034 addRegOperands(Inst, N);
1035 } else {
1036 addImmOperands(Inst, N, false);
1037 }
1038 }
1039
1040 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1041 assert(!hasIntModifiers());
1042 addRegOrImmWithInputModsOperands(Inst, N);
1043 }
1044
1045 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1046 assert(!hasFPModifiers());
1047 addRegOrImmWithInputModsOperands(Inst, N);
1048 }
1049
1050 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1051 Modifiers Mods = getModifiers();
1052 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1053 assert(isRegKind());
1054 addRegOperands(Inst, N);
1055 }
1056
1057 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1058 assert(!hasIntModifiers());
1059 addRegWithInputModsOperands(Inst, N);
1060 }
1061
1062 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1063 assert(!hasFPModifiers());
1064 addRegWithInputModsOperands(Inst, N);
1065 }
1066
1067 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1068 // clang-format off
1069 switch (Type) {
1070 case ImmTyNone: OS << "None"; break;
1071 case ImmTyGDS: OS << "GDS"; break;
1072 case ImmTyLDS: OS << "LDS"; break;
1073 case ImmTyOffen: OS << "Offen"; break;
1074 case ImmTyIdxen: OS << "Idxen"; break;
1075 case ImmTyAddr64: OS << "Addr64"; break;
1076 case ImmTyOffset: OS << "Offset"; break;
1077 case ImmTyInstOffset: OS << "InstOffset"; break;
1078 case ImmTyOffset0: OS << "Offset0"; break;
1079 case ImmTyOffset1: OS << "Offset1"; break;
1080 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1081 case ImmTyCPol: OS << "CPol"; break;
1082 case ImmTyIndexKey8bit: OS << "index_key"; break;
1083 case ImmTyIndexKey16bit: OS << "index_key"; break;
1084 case ImmTyTFE: OS << "TFE"; break;
1085 case ImmTyD16: OS << "D16"; break;
1086 case ImmTyFORMAT: OS << "FORMAT"; break;
1087 case ImmTyClamp: OS << "Clamp"; break;
1088 case ImmTyOModSI: OS << "OModSI"; break;
1089 case ImmTyDPP8: OS << "DPP8"; break;
1090 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1091 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1092 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1093 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1094 case ImmTyDppFI: OS << "DppFI"; break;
1095 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1096 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1097 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1098 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1099 case ImmTyDMask: OS << "DMask"; break;
1100 case ImmTyDim: OS << "Dim"; break;
1101 case ImmTyUNorm: OS << "UNorm"; break;
1102 case ImmTyDA: OS << "DA"; break;
1103 case ImmTyR128A16: OS << "R128A16"; break;
1104 case ImmTyA16: OS << "A16"; break;
1105 case ImmTyLWE: OS << "LWE"; break;
1106 case ImmTyOff: OS << "Off"; break;
1107 case ImmTyExpTgt: OS << "ExpTgt"; break;
1108 case ImmTyExpCompr: OS << "ExpCompr"; break;
1109 case ImmTyExpVM: OS << "ExpVM"; break;
1110 case ImmTyHwreg: OS << "Hwreg"; break;
1111 case ImmTySendMsg: OS << "SendMsg"; break;
1112 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1113 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1114 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1115 case ImmTyOpSel: OS << "OpSel"; break;
1116 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1117 case ImmTyNegLo: OS << "NegLo"; break;
1118 case ImmTyNegHi: OS << "NegHi"; break;
1119 case ImmTySwizzle: OS << "Swizzle"; break;
1120 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1121 case ImmTyHigh: OS << "High"; break;
1122 case ImmTyBLGP: OS << "BLGP"; break;
1123 case ImmTyCBSZ: OS << "CBSZ"; break;
1124 case ImmTyABID: OS << "ABID"; break;
1125 case ImmTyEndpgm: OS << "Endpgm"; break;
1126 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1127 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1128 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1129 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1130 case ImmTyByteSel: OS << "ByteSel" ; break;
1131 }
1132 // clang-format on
1133 }
1134
1135 void print(raw_ostream &OS) const override {
1136 switch (Kind) {
1137 case Register:
1138 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1139 break;
1140 case Immediate:
1141 OS << '<' << getImm();
1142 if (getImmTy() != ImmTyNone) {
1143 OS << " type: "; printImmTy(OS, getImmTy());
1144 }
1145 OS << " mods: " << Imm.Mods << '>';
1146 break;
1147 case Token:
1148 OS << '\'' << getToken() << '\'';
1149 break;
1150 case Expression:
1151 OS << "<expr " << *Expr << '>';
1152 break;
1153 }
1154 }
1155
1156 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1157 int64_t Val, SMLoc Loc,
1158 ImmTy Type = ImmTyNone,
1159 bool IsFPImm = false) {
1160 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1161 Op->Imm.Val = Val;
1162 Op->Imm.IsFPImm = IsFPImm;
1163 Op->Imm.Kind = ImmKindTyNone;
1164 Op->Imm.Type = Type;
1165 Op->Imm.Mods = Modifiers();
1166 Op->StartLoc = Loc;
1167 Op->EndLoc = Loc;
1168 return Op;
1169 }
1170
1171 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1172 StringRef Str, SMLoc Loc,
1173 bool HasExplicitEncodingSize = true) {
1174 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1175 Res->Tok.Data = Str.data();
1176 Res->Tok.Length = Str.size();
1177 Res->StartLoc = Loc;
1178 Res->EndLoc = Loc;
1179 return Res;
1180 }
1181
1182 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1183 unsigned RegNo, SMLoc S,
1184 SMLoc E) {
1185 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1186 Op->Reg.RegNo = RegNo;
1187 Op->Reg.Mods = Modifiers();
1188 Op->StartLoc = S;
1189 Op->EndLoc = E;
1190 return Op;
1191 }
1192
1193 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1194 const class MCExpr *Expr, SMLoc S) {
1195 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1196 Op->Expr = Expr;
1197 Op->StartLoc = S;
1198 Op->EndLoc = S;
1199 return Op;
1200 }
1201};
1202
1203raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1204 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1205 return OS;
1206}
1207
1208//===----------------------------------------------------------------------===//
1209// AsmParser
1210//===----------------------------------------------------------------------===//
1211
1212// Holds info related to the current kernel, e.g. count of SGPRs used.
1213// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1214// .amdgpu_hsa_kernel or at EOF.
1215class KernelScopeInfo {
1216 int SgprIndexUnusedMin = -1;
1217 int VgprIndexUnusedMin = -1;
1218 int AgprIndexUnusedMin = -1;
1219 MCContext *Ctx = nullptr;
1220 MCSubtargetInfo const *MSTI = nullptr;
1221
1222 void usesSgprAt(int i) {
1223 if (i >= SgprIndexUnusedMin) {
1224 SgprIndexUnusedMin = ++i;
1225 if (Ctx) {
1226 MCSymbol* const Sym =
1227 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1228 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1229 }
1230 }
1231 }
1232
1233 void usesVgprAt(int i) {
1234 if (i >= VgprIndexUnusedMin) {
1235 VgprIndexUnusedMin = ++i;
1236 if (Ctx) {
1237 MCSymbol* const Sym =
1238 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1239 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1240 VgprIndexUnusedMin);
1241 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1242 }
1243 }
1244 }
1245
1246 void usesAgprAt(int i) {
1247 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1248 if (!hasMAIInsts(*MSTI))
1249 return;
1250
1251 if (i >= AgprIndexUnusedMin) {
1252 AgprIndexUnusedMin = ++i;
1253 if (Ctx) {
1254 MCSymbol* const Sym =
1255 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1256 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1257
1258 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1259 MCSymbol* const vSym =
1260 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1261 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1262 VgprIndexUnusedMin);
1263 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1264 }
1265 }
1266 }
1267
1268public:
1269 KernelScopeInfo() = default;
1270
1271 void initialize(MCContext &Context) {
1272 Ctx = &Context;
1273 MSTI = Ctx->getSubtargetInfo();
1274
1275 usesSgprAt(SgprIndexUnusedMin = -1);
1276 usesVgprAt(VgprIndexUnusedMin = -1);
1277 if (hasMAIInsts(*MSTI)) {
1278 usesAgprAt(AgprIndexUnusedMin = -1);
1279 }
1280 }
1281
1282 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1283 unsigned RegWidth) {
1284 switch (RegKind) {
1285 case IS_SGPR:
1286 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1287 break;
1288 case IS_AGPR:
1289 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1290 break;
1291 case IS_VGPR:
1292 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1293 break;
1294 default:
1295 break;
1296 }
1297 }
1298};
1299
1300class AMDGPUAsmParser : public MCTargetAsmParser {
1301 MCAsmParser &Parser;
1302
1303 unsigned ForcedEncodingSize = 0;
1304 bool ForcedDPP = false;
1305 bool ForcedSDWA = false;
1306 KernelScopeInfo KernelScope;
1307
1308 /// @name Auto-generated Match Functions
1309 /// {
1310
1311#define GET_ASSEMBLER_HEADER
1312#include "AMDGPUGenAsmMatcher.inc"
1313
1314 /// }
1315
1316private:
1317 void createConstantSymbol(StringRef Id, int64_t Val);
1318
1319 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1320 bool OutOfRangeError(SMRange Range);
1321 /// Calculate VGPR/SGPR blocks required for given target, reserved
1322 /// registers, and user-specified NextFreeXGPR values.
1323 ///
1324 /// \param Features [in] Target features, used for bug corrections.
1325 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1326 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1327 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1328 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1329 /// descriptor field, if valid.
1330 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1331 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1332 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1333 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1334 /// \param VGPRBlocks [out] Result VGPR block count.
1335 /// \param SGPRBlocks [out] Result SGPR block count.
1336 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1337 const MCExpr *FlatScrUsed, bool XNACKUsed,
1338 std::optional<bool> EnableWavefrontSize32,
1339 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1340 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1341 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1342 bool ParseDirectiveAMDGCNTarget();
1343 bool ParseDirectiveAMDHSACodeObjectVersion();
1344 bool ParseDirectiveAMDHSAKernel();
1345 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1346 bool ParseDirectiveAMDKernelCodeT();
1347 // TODO: Possibly make subtargetHasRegister const.
1348 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1349 bool ParseDirectiveAMDGPUHsaKernel();
1350
1351 bool ParseDirectiveISAVersion();
1352 bool ParseDirectiveHSAMetadata();
1353 bool ParseDirectivePALMetadataBegin();
1354 bool ParseDirectivePALMetadata();
1355 bool ParseDirectiveAMDGPULDS();
1356
1357 /// Common code to parse out a block of text (typically YAML) between start and
1358 /// end directives.
1359 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1360 const char *AssemblerDirectiveEnd,
1361 std::string &CollectString);
1362
1363 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1364 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1365 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1366 unsigned &RegNum, unsigned &RegWidth,
1367 bool RestoreOnFailure = false);
1368 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1369 unsigned &RegNum, unsigned &RegWidth,
1371 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1372 unsigned &RegWidth,
1374 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1375 unsigned &RegWidth,
1377 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1378 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1379 bool ParseRegRange(unsigned& Num, unsigned& Width);
1380 unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
1381 unsigned RegWidth, SMLoc Loc);
1382
1383 bool isRegister();
1384 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1385 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1386 void initializeGprCountSymbol(RegisterKind RegKind);
1387 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1388 unsigned RegWidth);
1389 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1390 bool IsAtomic);
1391
1392public:
1393 enum OperandMode {
1394 OperandMode_Default,
1395 OperandMode_NSA,
1396 };
1397
1398 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1399
1400 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1401 const MCInstrInfo &MII,
1402 const MCTargetOptions &Options)
1403 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1405
1406 if (getFeatureBits().none()) {
1407 // Set default features.
1408 copySTI().ToggleFeature("southern-islands");
1409 }
1410
1411 FeatureBitset FB = getFeatureBits();
1412 if (!FB[AMDGPU::FeatureWavefrontSize64] &&
1413 !FB[AMDGPU::FeatureWavefrontSize32]) {
1414 // If there is no default wave size it must be a generation before gfx10,
1415 // these have FeatureWavefrontSize64 in their definition already. For
1416 // gfx10+ set wave32 as a default.
1417 copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
1418 }
1419
1420 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1421
1423 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1424 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1425 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1426 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1427 } else {
1428 createConstantSymbol(".option.machine_version_major", ISA.Major);
1429 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1430 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1431 }
1432 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1433 initializeGprCountSymbol(IS_VGPR);
1434 initializeGprCountSymbol(IS_SGPR);
1435 } else
1436 KernelScope.initialize(getContext());
1437
1438 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1439 createConstantSymbol(Symbol, Code);
1440
1441 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1442 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1443 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1444 }
1445
1446 bool hasMIMG_R128() const {
1447 return AMDGPU::hasMIMG_R128(getSTI());
1448 }
1449
1450 bool hasPackedD16() const {
1451 return AMDGPU::hasPackedD16(getSTI());
1452 }
1453
1454 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1455
1456 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1457
1458 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1459
1460 bool isSI() const {
1461 return AMDGPU::isSI(getSTI());
1462 }
1463
1464 bool isCI() const {
1465 return AMDGPU::isCI(getSTI());
1466 }
1467
1468 bool isVI() const {
1469 return AMDGPU::isVI(getSTI());
1470 }
1471
1472 bool isGFX9() const {
1473 return AMDGPU::isGFX9(getSTI());
1474 }
1475
1476 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1477 bool isGFX90A() const {
1478 return AMDGPU::isGFX90A(getSTI());
1479 }
1480
1481 bool isGFX940() const {
1482 return AMDGPU::isGFX940(getSTI());
1483 }
1484
1485 bool isGFX9Plus() const {
1486 return AMDGPU::isGFX9Plus(getSTI());
1487 }
1488
1489 bool isGFX10() const {
1490 return AMDGPU::isGFX10(getSTI());
1491 }
1492
1493 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1494
1495 bool isGFX11() const {
1496 return AMDGPU::isGFX11(getSTI());
1497 }
1498
1499 bool isGFX11Plus() const {
1500 return AMDGPU::isGFX11Plus(getSTI());
1501 }
1502
1503 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1504
1505 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1506
1507 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1508
1509 bool isGFX10_BEncoding() const {
1511 }
1512
1513 bool hasInv2PiInlineImm() const {
1514 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1515 }
1516
1517 bool hasFlatOffsets() const {
1518 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1519 }
1520
1521 bool hasArchitectedFlatScratch() const {
1522 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1523 }
1524
1525 bool hasSGPR102_SGPR103() const {
1526 return !isVI() && !isGFX9();
1527 }
1528
1529 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1530
1531 bool hasIntClamp() const {
1532 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1533 }
1534
1535 bool hasPartialNSAEncoding() const {
1536 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1537 }
1538
1539 unsigned getNSAMaxSize(bool HasSampler = false) const {
1540 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1541 }
1542
1543 unsigned getMaxNumUserSGPRs() const {
1545 }
1546
1547 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1548
1549 AMDGPUTargetStreamer &getTargetStreamer() {
1551 return static_cast<AMDGPUTargetStreamer &>(TS);
1552 }
1553
1554 const MCRegisterInfo *getMRI() const {
1555 // We need this const_cast because for some reason getContext() is not const
1556 // in MCAsmParser.
1557 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1558 }
1559
1560 const MCInstrInfo *getMII() const {
1561 return &MII;
1562 }
1563
1564 const FeatureBitset &getFeatureBits() const {
1565 return getSTI().getFeatureBits();
1566 }
1567
1568 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1569 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1570 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1571
1572 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1573 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1574 bool isForcedDPP() const { return ForcedDPP; }
1575 bool isForcedSDWA() const { return ForcedSDWA; }
1576 ArrayRef<unsigned> getMatchedVariants() const;
1577 StringRef getMatchedVariantName() const;
1578
1579 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1580 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1581 bool RestoreOnFailure);
1582 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1584 SMLoc &EndLoc) override;
1585 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1587 unsigned Kind) override;
1588 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1591 bool MatchingInlineAsm) override;
1592 bool ParseDirective(AsmToken DirectiveID) override;
1593 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1594 OperandMode Mode = OperandMode_Default);
1595 StringRef parseMnemonicSuffix(StringRef Name);
1597 SMLoc NameLoc, OperandVector &Operands) override;
1598 //bool ProcessInstruction(MCInst &Inst);
1599
1601
1602 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1603
1605 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1606 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1607 std::function<bool(int64_t &)> ConvertResult = nullptr);
1608
1609 ParseStatus parseOperandArrayWithPrefix(
1610 const char *Prefix, OperandVector &Operands,
1611 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1612 bool (*ConvertResult)(int64_t &) = nullptr);
1613
1615 parseNamedBit(StringRef Name, OperandVector &Operands,
1616 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1617 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1619 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1620 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1621 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1622 SMLoc &StringLoc);
1623
1624 bool isModifier();
1625 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1626 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1627 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1628 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1629 bool parseSP3NegModifier();
1630 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1631 bool HasLit = false);
1633 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1634 bool HasLit = false);
1635 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1636 bool AllowImm = true);
1637 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1638 bool AllowImm = true);
1639 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1640 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1641 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1642 ParseStatus tryParseIndexKey(OperandVector &Operands,
1643 AMDGPUOperand::ImmTy ImmTy);
1644 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1645 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1646
1647 ParseStatus parseDfmtNfmt(int64_t &Format);
1648 ParseStatus parseUfmt(int64_t &Format);
1649 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1650 int64_t &Format);
1651 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1652 int64_t &Format);
1653 ParseStatus parseFORMAT(OperandVector &Operands);
1654 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1655 ParseStatus parseNumericFormat(int64_t &Format);
1656 ParseStatus parseFlatOffset(OperandVector &Operands);
1657 ParseStatus parseR128A16(OperandVector &Operands);
1659 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1660 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1661
1662 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1663
1664 bool parseCnt(int64_t &IntVal);
1665 ParseStatus parseSWaitCnt(OperandVector &Operands);
1666
1667 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1668 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1669 ParseStatus parseDepCtr(OperandVector &Operands);
1670
1671 bool parseDelay(int64_t &Delay);
1672 ParseStatus parseSDelayALU(OperandVector &Operands);
1673
1674 ParseStatus parseHwreg(OperandVector &Operands);
1675
1676private:
1677 struct OperandInfoTy {
1678 SMLoc Loc;
1679 int64_t Val;
1680 bool IsSymbolic = false;
1681 bool IsDefined = false;
1682
1683 OperandInfoTy(int64_t Val) : Val(Val) {}
1684 };
1685
1686 struct StructuredOpField : OperandInfoTy {
1689 unsigned Width;
1690 bool IsDefined = false;
1691
1692 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1693 int64_t Default)
1694 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1695 virtual ~StructuredOpField() = default;
1696
1697 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1698 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1699 return false;
1700 }
1701
1702 virtual bool validate(AMDGPUAsmParser &Parser) const {
1703 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1704 return Error(Parser, "not supported on this GPU");
1705 if (!isUIntN(Width, Val))
1706 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1707 return true;
1708 }
1709 };
1710
1711 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1712 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1713
1714 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1715 bool validateSendMsg(const OperandInfoTy &Msg,
1716 const OperandInfoTy &Op,
1717 const OperandInfoTy &Stream);
1718
1719 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1720 OperandInfoTy &Width);
1721
1722 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1723 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1724 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1725
1726 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1727 const OperandVector &Operands) const;
1728 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1729 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1730 SMLoc getLitLoc(const OperandVector &Operands,
1731 bool SearchMandatoryLiterals = false) const;
1732 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1733 SMLoc getConstLoc(const OperandVector &Operands) const;
1734 SMLoc getInstLoc(const OperandVector &Operands) const;
1735
1736 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1737 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1738 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1739 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1740 bool validateSOPLiteral(const MCInst &Inst) const;
1741 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1742 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1743 const OperandVector &Operands);
1744 bool validateIntClampSupported(const MCInst &Inst);
1745 bool validateMIMGAtomicDMask(const MCInst &Inst);
1746 bool validateMIMGGatherDMask(const MCInst &Inst);
1747 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1748 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1749 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1750 bool validateMIMGD16(const MCInst &Inst);
1751 bool validateMIMGMSAA(const MCInst &Inst);
1752 bool validateOpSel(const MCInst &Inst);
1753 bool validateNeg(const MCInst &Inst, int OpName);
1754 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1755 bool validateVccOperand(unsigned Reg) const;
1756 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1757 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1758 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1759 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1760 bool validateAGPRLdSt(const MCInst &Inst) const;
1761 bool validateVGPRAlign(const MCInst &Inst) const;
1762 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1763 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1764 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1765 bool validateDivScale(const MCInst &Inst);
1766 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1767 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1768 const SMLoc &IDLoc);
1769 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1770 const unsigned CPol);
1771 bool validateExeczVcczOperands(const OperandVector &Operands);
1772 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1773 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1774 unsigned getConstantBusLimit(unsigned Opcode) const;
1775 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1776 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1777 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1778
1779 bool isSupportedMnemo(StringRef Mnemo,
1780 const FeatureBitset &FBS);
1781 bool isSupportedMnemo(StringRef Mnemo,
1782 const FeatureBitset &FBS,
1783 ArrayRef<unsigned> Variants);
1784 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1785
1786 bool isId(const StringRef Id) const;
1787 bool isId(const AsmToken &Token, const StringRef Id) const;
1788 bool isToken(const AsmToken::TokenKind Kind) const;
1789 StringRef getId() const;
1790 bool trySkipId(const StringRef Id);
1791 bool trySkipId(const StringRef Pref, const StringRef Id);
1792 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1793 bool trySkipToken(const AsmToken::TokenKind Kind);
1794 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1795 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1796 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1797
1798 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1799 AsmToken::TokenKind getTokenKind() const;
1800 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1802 StringRef getTokenStr() const;
1803 AsmToken peekToken(bool ShouldSkipSpace = true);
1804 AsmToken getToken() const;
1805 SMLoc getLoc() const;
1806 void lex();
1807
1808public:
1809 void onBeginOfFile() override;
1810 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1811
1812 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1813
1814 ParseStatus parseExpTgt(OperandVector &Operands);
1815 ParseStatus parseSendMsg(OperandVector &Operands);
1816 ParseStatus parseInterpSlot(OperandVector &Operands);
1817 ParseStatus parseInterpAttr(OperandVector &Operands);
1818 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1819 ParseStatus parseBoolReg(OperandVector &Operands);
1820
1821 bool parseSwizzleOperand(int64_t &Op,
1822 const unsigned MinVal,
1823 const unsigned MaxVal,
1824 const StringRef ErrMsg,
1825 SMLoc &Loc);
1826 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1827 const unsigned MinVal,
1828 const unsigned MaxVal,
1829 const StringRef ErrMsg);
1830 ParseStatus parseSwizzle(OperandVector &Operands);
1831 bool parseSwizzleOffset(int64_t &Imm);
1832 bool parseSwizzleMacro(int64_t &Imm);
1833 bool parseSwizzleQuadPerm(int64_t &Imm);
1834 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1835 bool parseSwizzleBroadcast(int64_t &Imm);
1836 bool parseSwizzleSwap(int64_t &Imm);
1837 bool parseSwizzleReverse(int64_t &Imm);
1838
1839 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1840 int64_t parseGPRIdxMacro();
1841
1842 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1843 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1844
1845 ParseStatus parseOModSI(OperandVector &Operands);
1846
1847 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1848 OptionalImmIndexMap &OptionalIdx);
1849 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1850 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1851 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1852 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1853
1854 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1855 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1856 OptionalImmIndexMap &OptionalIdx);
1857 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1858 OptionalImmIndexMap &OptionalIdx);
1859
1860 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1861 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1862
1863 bool parseDimId(unsigned &Encoding);
1865 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1867 ParseStatus parseDPPCtrl(OperandVector &Operands);
1868 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1869 int64_t parseDPPCtrlSel(StringRef Ctrl);
1870 int64_t parseDPPCtrlPerm();
1871 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1872 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1873 cvtDPP(Inst, Operands, true);
1874 }
1875 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1876 bool IsDPP8 = false);
1877 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1878 cvtVOP3DPP(Inst, Operands, true);
1879 }
1880
1881 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1882 AMDGPUOperand::ImmTy Type);
1883 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1884 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1885 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1886 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1887 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1888 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1889 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1890 uint64_t BasicInstType,
1891 bool SkipDstVcc = false,
1892 bool SkipSrcVcc = false);
1893
1894 ParseStatus parseEndpgm(OperandVector &Operands);
1895
1897};
1898
1899} // end anonymous namespace
1900
1901// May be called with integer type with equivalent bitwidth.
1902static const fltSemantics *getFltSemantics(unsigned Size) {
1903 switch (Size) {
1904 case 4:
1905 return &APFloat::IEEEsingle();
1906 case 8:
1907 return &APFloat::IEEEdouble();
1908 case 2:
1909 return &APFloat::IEEEhalf();
1910 default:
1911 llvm_unreachable("unsupported fp type");
1912 }
1913}
1914
1916 return getFltSemantics(VT.getSizeInBits() / 8);
1917}
1918
1920 switch (OperandType) {
1921 // When floating-point immediate is used as operand of type i16, the 32-bit
1922 // representation of the constant truncated to the 16 LSBs should be used.
1942 return &APFloat::IEEEsingle();
1948 return &APFloat::IEEEdouble();
1957 return &APFloat::IEEEhalf();
1965 return &APFloat::BFloat();
1966 default:
1967 llvm_unreachable("unsupported fp type");
1968 }
1969}
1970
1971//===----------------------------------------------------------------------===//
1972// Operand
1973//===----------------------------------------------------------------------===//
1974
1975static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1976 bool Lost;
1977
1978 // Convert literal to single precision
1980 APFloat::rmNearestTiesToEven,
1981 &Lost);
1982 // We allow precision lost but not overflow or underflow
1983 if (Status != APFloat::opOK &&
1984 Lost &&
1985 ((Status & APFloat::opOverflow) != 0 ||
1986 (Status & APFloat::opUnderflow) != 0)) {
1987 return false;
1988 }
1989
1990 return true;
1991}
1992
1993static bool isSafeTruncation(int64_t Val, unsigned Size) {
1994 return isUIntN(Size, Val) || isIntN(Size, Val);
1995}
1996
1997static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1998 if (VT.getScalarType() == MVT::i16)
1999 return isInlinableLiteral32(Val, HasInv2Pi);
2000
2001 if (VT.getScalarType() == MVT::f16)
2002 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2003
2004 assert(VT.getScalarType() == MVT::bf16);
2005
2006 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2007}
2008
2009bool AMDGPUOperand::isInlinableImm(MVT type) const {
2010
2011 // This is a hack to enable named inline values like
2012 // shared_base with both 32-bit and 64-bit operands.
2013 // Note that these values are defined as
2014 // 32-bit operands only.
2015 if (isInlineValue()) {
2016 return true;
2017 }
2018
2019 if (!isImmTy(ImmTyNone)) {
2020 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2021 return false;
2022 }
2023 // TODO: We should avoid using host float here. It would be better to
2024 // check the float bit values which is what a few other places do.
2025 // We've had bot failures before due to weird NaN support on mips hosts.
2026
2027 APInt Literal(64, Imm.Val);
2028
2029 if (Imm.IsFPImm) { // We got fp literal token
2030 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2032 AsmParser->hasInv2PiInlineImm());
2033 }
2034
2035 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2036 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2037 return false;
2038
2039 if (type.getScalarSizeInBits() == 16) {
2040 bool Lost = false;
2041 switch (type.getScalarType().SimpleTy) {
2042 default:
2043 llvm_unreachable("unknown 16-bit type");
2044 case MVT::bf16:
2045 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2046 &Lost);
2047 break;
2048 case MVT::f16:
2049 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2050 &Lost);
2051 break;
2052 case MVT::i16:
2053 FPLiteral.convert(APFloatBase::IEEEsingle(),
2054 APFloat::rmNearestTiesToEven, &Lost);
2055 break;
2056 }
2057 // We need to use 32-bit representation here because when a floating-point
2058 // inline constant is used as an i16 operand, its 32-bit representation
2059 // representation will be used. We will need the 32-bit value to check if
2060 // it is FP inline constant.
2061 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2062 return isInlineableLiteralOp16(ImmVal, type,
2063 AsmParser->hasInv2PiInlineImm());
2064 }
2065
2066 // Check if single precision literal is inlinable
2068 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2069 AsmParser->hasInv2PiInlineImm());
2070 }
2071
2072 // We got int literal token.
2073 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2075 AsmParser->hasInv2PiInlineImm());
2076 }
2077
2078 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2079 return false;
2080 }
2081
2082 if (type.getScalarSizeInBits() == 16) {
2084 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2085 type, AsmParser->hasInv2PiInlineImm());
2086 }
2087
2089 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2090 AsmParser->hasInv2PiInlineImm());
2091}
2092
2093bool AMDGPUOperand::isLiteralImm(MVT type) const {
2094 // Check that this immediate can be added as literal
2095 if (!isImmTy(ImmTyNone)) {
2096 return false;
2097 }
2098
2099 if (!Imm.IsFPImm) {
2100 // We got int literal token.
2101
2102 if (type == MVT::f64 && hasFPModifiers()) {
2103 // Cannot apply fp modifiers to int literals preserving the same semantics
2104 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2105 // disable these cases.
2106 return false;
2107 }
2108
2109 unsigned Size = type.getSizeInBits();
2110 if (Size == 64)
2111 Size = 32;
2112
2113 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2114 // types.
2115 return isSafeTruncation(Imm.Val, Size);
2116 }
2117
2118 // We got fp literal token
2119 if (type == MVT::f64) { // Expected 64-bit fp operand
2120 // We would set low 64-bits of literal to zeroes but we accept this literals
2121 return true;
2122 }
2123
2124 if (type == MVT::i64) { // Expected 64-bit int operand
2125 // We don't allow fp literals in 64-bit integer instructions. It is
2126 // unclear how we should encode them.
2127 return false;
2128 }
2129
2130 // We allow fp literals with f16x2 operands assuming that the specified
2131 // literal goes into the lower half and the upper half is zero. We also
2132 // require that the literal may be losslessly converted to f16.
2133 //
2134 // For i16x2 operands, we assume that the specified literal is encoded as a
2135 // single-precision float. This is pretty odd, but it matches SP3 and what
2136 // happens in hardware.
2137 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2138 : (type == MVT::v2i16) ? MVT::f32
2139 : (type == MVT::v2f32) ? MVT::f32
2140 : type;
2141
2142 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2143 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2144}
2145
2146bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2147 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2148}
2149
2150bool AMDGPUOperand::isVRegWithInputMods() const {
2151 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2152 // GFX90A allows DPP on 64-bit operands.
2153 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2154 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2155}
2156
2157template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2158 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2159 : AMDGPU::VGPR_16_Lo128RegClassID);
2160}
2161
2162bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2163 if (AsmParser->isVI())
2164 return isVReg32();
2165 else if (AsmParser->isGFX9Plus())
2166 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2167 else
2168 return false;
2169}
2170
2171bool AMDGPUOperand::isSDWAFP16Operand() const {
2172 return isSDWAOperand(MVT::f16);
2173}
2174
2175bool AMDGPUOperand::isSDWAFP32Operand() const {
2176 return isSDWAOperand(MVT::f32);
2177}
2178
2179bool AMDGPUOperand::isSDWAInt16Operand() const {
2180 return isSDWAOperand(MVT::i16);
2181}
2182
2183bool AMDGPUOperand::isSDWAInt32Operand() const {
2184 return isSDWAOperand(MVT::i32);
2185}
2186
2187bool AMDGPUOperand::isBoolReg() const {
2188 auto FB = AsmParser->getFeatureBits();
2189 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2190 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2191}
2192
2193uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2194{
2195 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2196 assert(Size == 2 || Size == 4 || Size == 8);
2197
2198 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2199
2200 if (Imm.Mods.Abs) {
2201 Val &= ~FpSignMask;
2202 }
2203 if (Imm.Mods.Neg) {
2204 Val ^= FpSignMask;
2205 }
2206
2207 return Val;
2208}
2209
2210void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2211 if (isExpr()) {
2213 return;
2214 }
2215
2216 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2217 Inst.getNumOperands())) {
2218 addLiteralImmOperand(Inst, Imm.Val,
2219 ApplyModifiers &
2220 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2221 } else {
2222 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2224 setImmKindNone();
2225 }
2226}
2227
2228void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2229 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2230 auto OpNum = Inst.getNumOperands();
2231 // Check that this operand accepts literals
2232 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2233
2234 if (ApplyModifiers) {
2235 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2236 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2237 Val = applyInputFPModifiers(Val, Size);
2238 }
2239
2240 APInt Literal(64, Val);
2241 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2242
2243 if (Imm.IsFPImm) { // We got fp literal token
2244 switch (OpTy) {
2250 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2251 AsmParser->hasInv2PiInlineImm())) {
2252 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2253 setImmKindConst();
2254 return;
2255 }
2256
2257 // Non-inlineable
2258 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2259 // For fp operands we check if low 32 bits are zeros
2260 if (Literal.getLoBits(32) != 0) {
2261 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2262 "Can't encode literal as exact 64-bit floating-point operand. "
2263 "Low 32-bits will be set to zero");
2264 Val &= 0xffffffff00000000u;
2265 }
2266
2268 setImmKindLiteral();
2269 return;
2270 }
2271
2272 // We don't allow fp literals in 64-bit integer instructions. It is
2273 // unclear how we should encode them. This case should be checked earlier
2274 // in predicate methods (isLiteralImm())
2275 llvm_unreachable("fp literal in 64-bit integer instruction.");
2276
2284 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2285 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2286 // loss of precision. The constant represents ideomatic fp32 value of
2287 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2288 // bits. Prevent rounding below.
2289 Inst.addOperand(MCOperand::createImm(0x3e22));
2290 setImmKindLiteral();
2291 return;
2292 }
2293 [[fallthrough]];
2294
2322 bool lost;
2323 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2324 // Convert literal to single precision
2325 FPLiteral.convert(*getOpFltSemantics(OpTy),
2326 APFloat::rmNearestTiesToEven, &lost);
2327 // We allow precision lost but not overflow or underflow. This should be
2328 // checked earlier in isLiteralImm()
2329
2330 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2331 Inst.addOperand(MCOperand::createImm(ImmVal));
2332 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2333 setImmKindMandatoryLiteral();
2334 } else {
2335 setImmKindLiteral();
2336 }
2337 return;
2338 }
2339 default:
2340 llvm_unreachable("invalid operand size");
2341 }
2342
2343 return;
2344 }
2345
2346 // We got int literal token.
2347 // Only sign extend inline immediates.
2348 switch (OpTy) {
2364 if (isSafeTruncation(Val, 32) &&
2365 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2366 AsmParser->hasInv2PiInlineImm())) {
2368 setImmKindConst();
2369 return;
2370 }
2371
2372 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2373 setImmKindLiteral();
2374 return;
2375
2381 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2383 setImmKindConst();
2384 return;
2385 }
2386
2387 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2388 : Lo_32(Val);
2389
2391 setImmKindLiteral();
2392 return;
2393
2397 if (isSafeTruncation(Val, 16) &&
2398 AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2399 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2400 setImmKindConst();
2401 return;
2402 }
2403
2404 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2405 setImmKindLiteral();
2406 return;
2407
2412 if (isSafeTruncation(Val, 16) &&
2413 AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2414 AsmParser->hasInv2PiInlineImm())) {
2416 setImmKindConst();
2417 return;
2418 }
2419
2420 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2421 setImmKindLiteral();
2422 return;
2423
2428 if (isSafeTruncation(Val, 16) &&
2429 AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2430 AsmParser->hasInv2PiInlineImm())) {
2432 setImmKindConst();
2433 return;
2434 }
2435
2436 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2437 setImmKindLiteral();
2438 return;
2439
2442 assert(isSafeTruncation(Val, 16));
2443 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2445 return;
2446 }
2449 assert(isSafeTruncation(Val, 16));
2450 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2451 AsmParser->hasInv2PiInlineImm()));
2452
2454 return;
2455 }
2456
2459 assert(isSafeTruncation(Val, 16));
2460 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2461 AsmParser->hasInv2PiInlineImm()));
2462
2464 return;
2465 }
2466
2468 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2469 setImmKindMandatoryLiteral();
2470 return;
2472 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2473 setImmKindMandatoryLiteral();
2474 return;
2475 default:
2476 llvm_unreachable("invalid operand size");
2477 }
2478}
2479
2480void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2481 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2482}
2483
2484bool AMDGPUOperand::isInlineValue() const {
2485 return isRegKind() && ::isInlineValue(getReg());
2486}
2487
2488//===----------------------------------------------------------------------===//
2489// AsmParser
2490//===----------------------------------------------------------------------===//
2491
2492void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2493 // TODO: make those pre-defined variables read-only.
2494 // Currently there is none suitable machinery in the core llvm-mc for this.
2495 // MCSymbol::isRedefinable is intended for another purpose, and
2496 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2497 MCContext &Ctx = getContext();
2498 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2499 Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
2500}
2501
2502static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2503 if (Is == IS_VGPR) {
2504 switch (RegWidth) {
2505 default: return -1;
2506 case 32:
2507 return AMDGPU::VGPR_32RegClassID;
2508 case 64:
2509 return AMDGPU::VReg_64RegClassID;
2510 case 96:
2511 return AMDGPU::VReg_96RegClassID;
2512 case 128:
2513 return AMDGPU::VReg_128RegClassID;
2514 case 160:
2515 return AMDGPU::VReg_160RegClassID;
2516 case 192:
2517 return AMDGPU::VReg_192RegClassID;
2518 case 224:
2519 return AMDGPU::VReg_224RegClassID;
2520 case 256:
2521 return AMDGPU::VReg_256RegClassID;
2522 case 288:
2523 return AMDGPU::VReg_288RegClassID;
2524 case 320:
2525 return AMDGPU::VReg_320RegClassID;
2526 case 352:
2527 return AMDGPU::VReg_352RegClassID;
2528 case 384:
2529 return AMDGPU::VReg_384RegClassID;
2530 case 512:
2531 return AMDGPU::VReg_512RegClassID;
2532 case 1024:
2533 return AMDGPU::VReg_1024RegClassID;
2534 }
2535 } else if (Is == IS_TTMP) {
2536 switch (RegWidth) {
2537 default: return -1;
2538 case 32:
2539 return AMDGPU::TTMP_32RegClassID;
2540 case 64:
2541 return AMDGPU::TTMP_64RegClassID;
2542 case 128:
2543 return AMDGPU::TTMP_128RegClassID;
2544 case 256:
2545 return AMDGPU::TTMP_256RegClassID;
2546 case 512:
2547 return AMDGPU::TTMP_512RegClassID;
2548 }
2549 } else if (Is == IS_SGPR) {
2550 switch (RegWidth) {
2551 default: return -1;
2552 case 32:
2553 return AMDGPU::SGPR_32RegClassID;
2554 case 64:
2555 return AMDGPU::SGPR_64RegClassID;
2556 case 96:
2557 return AMDGPU::SGPR_96RegClassID;
2558 case 128:
2559 return AMDGPU::SGPR_128RegClassID;
2560 case 160:
2561 return AMDGPU::SGPR_160RegClassID;
2562 case 192:
2563 return AMDGPU::SGPR_192RegClassID;
2564 case 224:
2565 return AMDGPU::SGPR_224RegClassID;
2566 case 256:
2567 return AMDGPU::SGPR_256RegClassID;
2568 case 288:
2569 return AMDGPU::SGPR_288RegClassID;
2570 case 320:
2571 return AMDGPU::SGPR_320RegClassID;
2572 case 352:
2573 return AMDGPU::SGPR_352RegClassID;
2574 case 384:
2575 return AMDGPU::SGPR_384RegClassID;
2576 case 512:
2577 return AMDGPU::SGPR_512RegClassID;
2578 }
2579 } else if (Is == IS_AGPR) {
2580 switch (RegWidth) {
2581 default: return -1;
2582 case 32:
2583 return AMDGPU::AGPR_32RegClassID;
2584 case 64:
2585 return AMDGPU::AReg_64RegClassID;
2586 case 96:
2587 return AMDGPU::AReg_96RegClassID;
2588 case 128:
2589 return AMDGPU::AReg_128RegClassID;
2590 case 160:
2591 return AMDGPU::AReg_160RegClassID;
2592 case 192:
2593 return AMDGPU::AReg_192RegClassID;
2594 case 224:
2595 return AMDGPU::AReg_224RegClassID;
2596 case 256:
2597 return AMDGPU::AReg_256RegClassID;
2598 case 288:
2599 return AMDGPU::AReg_288RegClassID;
2600 case 320:
2601 return AMDGPU::AReg_320RegClassID;
2602 case 352:
2603 return AMDGPU::AReg_352RegClassID;
2604 case 384:
2605 return AMDGPU::AReg_384RegClassID;
2606 case 512:
2607 return AMDGPU::AReg_512RegClassID;
2608 case 1024:
2609 return AMDGPU::AReg_1024RegClassID;
2610 }
2611 }
2612 return -1;
2613}
2614
2617 .Case("exec", AMDGPU::EXEC)
2618 .Case("vcc", AMDGPU::VCC)
2619 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2620 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2621 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2622 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2623 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2624 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2625 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2626 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2627 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2628 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2629 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2630 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2631 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2632 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2633 .Case("m0", AMDGPU::M0)
2634 .Case("vccz", AMDGPU::SRC_VCCZ)
2635 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2636 .Case("execz", AMDGPU::SRC_EXECZ)
2637 .Case("src_execz", AMDGPU::SRC_EXECZ)
2638 .Case("scc", AMDGPU::SRC_SCC)
2639 .Case("src_scc", AMDGPU::SRC_SCC)
2640 .Case("tba", AMDGPU::TBA)
2641 .Case("tma", AMDGPU::TMA)
2642 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2643 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2644 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2645 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2646 .Case("vcc_lo", AMDGPU::VCC_LO)
2647 .Case("vcc_hi", AMDGPU::VCC_HI)
2648 .Case("exec_lo", AMDGPU::EXEC_LO)
2649 .Case("exec_hi", AMDGPU::EXEC_HI)
2650 .Case("tma_lo", AMDGPU::TMA_LO)
2651 .Case("tma_hi", AMDGPU::TMA_HI)
2652 .Case("tba_lo", AMDGPU::TBA_LO)
2653 .Case("tba_hi", AMDGPU::TBA_HI)
2654 .Case("pc", AMDGPU::PC_REG)
2655 .Case("null", AMDGPU::SGPR_NULL)
2656 .Default(AMDGPU::NoRegister);
2657}
2658
2659bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2660 SMLoc &EndLoc, bool RestoreOnFailure) {
2661 auto R = parseRegister();
2662 if (!R) return true;
2663 assert(R->isReg());
2664 RegNo = R->getReg();
2665 StartLoc = R->getStartLoc();
2666 EndLoc = R->getEndLoc();
2667 return false;
2668}
2669
2670bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2671 SMLoc &EndLoc) {
2672 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2673}
2674
2675ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2676 SMLoc &EndLoc) {
2677 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2678 bool PendingErrors = getParser().hasPendingError();
2679 getParser().clearPendingErrors();
2680 if (PendingErrors)
2681 return ParseStatus::Failure;
2682 if (Result)
2683 return ParseStatus::NoMatch;
2684 return ParseStatus::Success;
2685}
2686
2687bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2688 RegisterKind RegKind, unsigned Reg1,
2689 SMLoc Loc) {
2690 switch (RegKind) {
2691 case IS_SPECIAL:
2692 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2693 Reg = AMDGPU::EXEC;
2694 RegWidth = 64;
2695 return true;
2696 }
2697 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2698 Reg = AMDGPU::FLAT_SCR;
2699 RegWidth = 64;
2700 return true;
2701 }
2702 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2703 Reg = AMDGPU::XNACK_MASK;
2704 RegWidth = 64;
2705 return true;
2706 }
2707 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2708 Reg = AMDGPU::VCC;
2709 RegWidth = 64;
2710 return true;
2711 }
2712 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2713 Reg = AMDGPU::TBA;
2714 RegWidth = 64;
2715 return true;
2716 }
2717 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2718 Reg = AMDGPU::TMA;
2719 RegWidth = 64;
2720 return true;
2721 }
2722 Error(Loc, "register does not fit in the list");
2723 return false;
2724 case IS_VGPR:
2725 case IS_SGPR:
2726 case IS_AGPR:
2727 case IS_TTMP:
2728 if (Reg1 != Reg + RegWidth / 32) {
2729 Error(Loc, "registers in a list must have consecutive indices");
2730 return false;
2731 }
2732 RegWidth += 32;
2733 return true;
2734 default:
2735 llvm_unreachable("unexpected register kind");
2736 }
2737}
2738
2739struct RegInfo {
2741 RegisterKind Kind;
2742};
2743
2744static constexpr RegInfo RegularRegisters[] = {
2745 {{"v"}, IS_VGPR},
2746 {{"s"}, IS_SGPR},
2747 {{"ttmp"}, IS_TTMP},
2748 {{"acc"}, IS_AGPR},
2749 {{"a"}, IS_AGPR},
2750};
2751
2752static bool isRegularReg(RegisterKind Kind) {
2753 return Kind == IS_VGPR ||
2754 Kind == IS_SGPR ||
2755 Kind == IS_TTMP ||
2756 Kind == IS_AGPR;
2757}
2758
2760 for (const RegInfo &Reg : RegularRegisters)
2761 if (Str.starts_with(Reg.Name))
2762 return &Reg;
2763 return nullptr;
2764}
2765
2766static bool getRegNum(StringRef Str, unsigned& Num) {
2767 return !Str.getAsInteger(10, Num);
2768}
2769
2770bool
2771AMDGPUAsmParser::isRegister(const AsmToken &Token,
2772 const AsmToken &NextToken) const {
2773
2774 // A list of consecutive registers: [s0,s1,s2,s3]
2775 if (Token.is(AsmToken::LBrac))
2776 return true;
2777
2778 if (!Token.is(AsmToken::Identifier))
2779 return false;
2780
2781 // A single register like s0 or a range of registers like s[0:1]
2782
2783 StringRef Str = Token.getString();
2784 const RegInfo *Reg = getRegularRegInfo(Str);
2785 if (Reg) {
2786 StringRef RegName = Reg->Name;
2787 StringRef RegSuffix = Str.substr(RegName.size());
2788 if (!RegSuffix.empty()) {
2789 RegSuffix.consume_back(".l");
2790 RegSuffix.consume_back(".h");
2791 unsigned Num;
2792 // A single register with an index: rXX
2793 if (getRegNum(RegSuffix, Num))
2794 return true;
2795 } else {
2796 // A range of registers: r[XX:YY].
2797 if (NextToken.is(AsmToken::LBrac))
2798 return true;
2799 }
2800 }
2801
2802 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2803}
2804
2805bool
2806AMDGPUAsmParser::isRegister()
2807{
2808 return isRegister(getToken(), peekToken());
2809}
2810
2811unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2812 unsigned SubReg, unsigned RegWidth,
2813 SMLoc Loc) {
2814 assert(isRegularReg(RegKind));
2815
2816 unsigned AlignSize = 1;
2817 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2818 // SGPR and TTMP registers must be aligned.
2819 // Max required alignment is 4 dwords.
2820 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2821 }
2822
2823 if (RegNum % AlignSize != 0) {
2824 Error(Loc, "invalid register alignment");
2825 return AMDGPU::NoRegister;
2826 }
2827
2828 unsigned RegIdx = RegNum / AlignSize;
2829 int RCID = getRegClass(RegKind, RegWidth);
2830 if (RCID == -1) {
2831 Error(Loc, "invalid or unsupported register size");
2832 return AMDGPU::NoRegister;
2833 }
2834
2835 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2836 const MCRegisterClass RC = TRI->getRegClass(RCID);
2837 if (RegIdx >= RC.getNumRegs()) {
2838 Error(Loc, "register index is out of range");
2839 return AMDGPU::NoRegister;
2840 }
2841
2842 unsigned Reg = RC.getRegister(RegIdx);
2843
2844 if (SubReg) {
2845 Reg = TRI->getSubReg(Reg, SubReg);
2846
2847 // Currently all regular registers have their .l and .h subregisters, so
2848 // we should never need to generate an error here.
2849 assert(Reg && "Invalid subregister!");
2850 }
2851
2852 return Reg;
2853}
2854
2855bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2856 int64_t RegLo, RegHi;
2857 if (!skipToken(AsmToken::LBrac, "missing register index"))
2858 return false;
2859
2860 SMLoc FirstIdxLoc = getLoc();
2861 SMLoc SecondIdxLoc;
2862
2863 if (!parseExpr(RegLo))
2864 return false;
2865
2866 if (trySkipToken(AsmToken::Colon)) {
2867 SecondIdxLoc = getLoc();
2868 if (!parseExpr(RegHi))
2869 return false;
2870 } else {
2871 RegHi = RegLo;
2872 }
2873
2874 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2875 return false;
2876
2877 if (!isUInt<32>(RegLo)) {
2878 Error(FirstIdxLoc, "invalid register index");
2879 return false;
2880 }
2881
2882 if (!isUInt<32>(RegHi)) {
2883 Error(SecondIdxLoc, "invalid register index");
2884 return false;
2885 }
2886
2887 if (RegLo > RegHi) {
2888 Error(FirstIdxLoc, "first register index should not exceed second index");
2889 return false;
2890 }
2891
2892 Num = static_cast<unsigned>(RegLo);
2893 RegWidth = 32 * ((RegHi - RegLo) + 1);
2894 return true;
2895}
2896
2897unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2898 unsigned &RegNum, unsigned &RegWidth,
2899 SmallVectorImpl<AsmToken> &Tokens) {
2900 assert(isToken(AsmToken::Identifier));
2901 unsigned Reg = getSpecialRegForName(getTokenStr());
2902 if (Reg) {
2903 RegNum = 0;
2904 RegWidth = 32;
2905 RegKind = IS_SPECIAL;
2906 Tokens.push_back(getToken());
2907 lex(); // skip register name
2908 }
2909 return Reg;
2910}
2911
2912unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2913 unsigned &RegNum, unsigned &RegWidth,
2914 SmallVectorImpl<AsmToken> &Tokens) {
2915 assert(isToken(AsmToken::Identifier));
2916 StringRef RegName = getTokenStr();
2917 auto Loc = getLoc();
2918
2919 const RegInfo *RI = getRegularRegInfo(RegName);
2920 if (!RI) {
2921 Error(Loc, "invalid register name");
2922 return AMDGPU::NoRegister;
2923 }
2924
2925 Tokens.push_back(getToken());
2926 lex(); // skip register name
2927
2928 RegKind = RI->Kind;
2929 StringRef RegSuffix = RegName.substr(RI->Name.size());
2930 unsigned SubReg = NoSubRegister;
2931 if (!RegSuffix.empty()) {
2932 // We don't know the opcode till we are done parsing, so we don't know if
2933 // registers should be 16 or 32 bit. It is therefore mandatory to put .l or
2934 // .h to correctly specify 16 bit registers. We also can't determine class
2935 // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16.
2936 if (RegSuffix.consume_back(".l"))
2937 SubReg = AMDGPU::lo16;
2938 else if (RegSuffix.consume_back(".h"))
2939 SubReg = AMDGPU::hi16;
2940
2941 // Single 32-bit register: vXX.
2942 if (!getRegNum(RegSuffix, RegNum)) {
2943 Error(Loc, "invalid register index");
2944 return AMDGPU::NoRegister;
2945 }
2946 RegWidth = 32;
2947 } else {
2948 // Range of registers: v[XX:YY]. ":YY" is optional.
2949 if (!ParseRegRange(RegNum, RegWidth))
2950 return AMDGPU::NoRegister;
2951 }
2952
2953 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2954}
2955
2956unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2957 unsigned &RegWidth,
2958 SmallVectorImpl<AsmToken> &Tokens) {
2959 unsigned Reg = AMDGPU::NoRegister;
2960 auto ListLoc = getLoc();
2961
2962 if (!skipToken(AsmToken::LBrac,
2963 "expected a register or a list of registers")) {
2964 return AMDGPU::NoRegister;
2965 }
2966
2967 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2968
2969 auto Loc = getLoc();
2970 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2971 return AMDGPU::NoRegister;
2972 if (RegWidth != 32) {
2973 Error(Loc, "expected a single 32-bit register");
2974 return AMDGPU::NoRegister;
2975 }
2976
2977 for (; trySkipToken(AsmToken::Comma); ) {
2978 RegisterKind NextRegKind;
2979 unsigned NextReg, NextRegNum, NextRegWidth;
2980 Loc = getLoc();
2981
2982 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2983 NextRegNum, NextRegWidth,
2984 Tokens)) {
2985 return AMDGPU::NoRegister;
2986 }
2987 if (NextRegWidth != 32) {
2988 Error(Loc, "expected a single 32-bit register");
2989 return AMDGPU::NoRegister;
2990 }
2991 if (NextRegKind != RegKind) {
2992 Error(Loc, "registers in a list must be of the same kind");
2993 return AMDGPU::NoRegister;
2994 }
2995 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2996 return AMDGPU::NoRegister;
2997 }
2998
2999 if (!skipToken(AsmToken::RBrac,
3000 "expected a comma or a closing square bracket")) {
3001 return AMDGPU::NoRegister;
3002 }
3003
3004 if (isRegularReg(RegKind))
3005 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3006
3007 return Reg;
3008}
3009
3010bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3011 unsigned &RegNum, unsigned &RegWidth,
3012 SmallVectorImpl<AsmToken> &Tokens) {
3013 auto Loc = getLoc();
3014 Reg = AMDGPU::NoRegister;
3015
3016 if (isToken(AsmToken::Identifier)) {
3017 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3018 if (Reg == AMDGPU::NoRegister)
3019 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3020 } else {
3021 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3022 }
3023
3024 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3025 if (Reg == AMDGPU::NoRegister) {
3026 assert(Parser.hasPendingError());
3027 return false;
3028 }
3029
3030 if (!subtargetHasRegister(*TRI, Reg)) {
3031 if (Reg == AMDGPU::SGPR_NULL) {
3032 Error(Loc, "'null' operand is not supported on this GPU");
3033 } else {
3034 Error(Loc, "register not available on this GPU");
3035 }
3036 return false;
3037 }
3038
3039 return true;
3040}
3041
3042bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3043 unsigned &RegNum, unsigned &RegWidth,
3044 bool RestoreOnFailure /*=false*/) {
3045 Reg = AMDGPU::NoRegister;
3046
3048 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3049 if (RestoreOnFailure) {
3050 while (!Tokens.empty()) {
3051 getLexer().UnLex(Tokens.pop_back_val());
3052 }
3053 }
3054 return true;
3055 }
3056 return false;
3057}
3058
3059std::optional<StringRef>
3060AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3061 switch (RegKind) {
3062 case IS_VGPR:
3063 return StringRef(".amdgcn.next_free_vgpr");
3064 case IS_SGPR:
3065 return StringRef(".amdgcn.next_free_sgpr");
3066 default:
3067 return std::nullopt;
3068 }
3069}
3070
3071void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3072 auto SymbolName = getGprCountSymbolName(RegKind);
3073 assert(SymbolName && "initializing invalid register kind");
3074 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3075 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
3076}
3077
3078bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3079 unsigned DwordRegIndex,
3080 unsigned RegWidth) {
3081 // Symbols are only defined for GCN targets
3082 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3083 return true;
3084
3085 auto SymbolName = getGprCountSymbolName(RegKind);
3086 if (!SymbolName)
3087 return true;
3088 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3089
3090 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3091 int64_t OldCount;
3092
3093 if (!Sym->isVariable())
3094 return !Error(getLoc(),
3095 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3096 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
3097 return !Error(
3098 getLoc(),
3099 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3100
3101 if (OldCount <= NewMax)
3102 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
3103
3104 return true;
3105}
3106
3107std::unique_ptr<AMDGPUOperand>
3108AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3109 const auto &Tok = getToken();
3110 SMLoc StartLoc = Tok.getLoc();
3111 SMLoc EndLoc = Tok.getEndLoc();
3112 RegisterKind RegKind;
3113 unsigned Reg, RegNum, RegWidth;
3114
3115 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3116 return nullptr;
3117 }
3118 if (isHsaAbi(getSTI())) {
3119 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3120 return nullptr;
3121 } else
3122 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3123 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3124}
3125
3126ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3127 bool HasSP3AbsModifier, bool HasLit) {
3128 // TODO: add syntactic sugar for 1/(2*PI)
3129
3130 if (isRegister())
3131 return ParseStatus::NoMatch;
3132 assert(!isModifier());
3133
3134 if (!HasLit) {
3135 HasLit = trySkipId("lit");
3136 if (HasLit) {
3137 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3138 return ParseStatus::Failure;
3139 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
3140 if (S.isSuccess() &&
3141 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3142 return ParseStatus::Failure;
3143 return S;
3144 }
3145 }
3146
3147 const auto& Tok = getToken();
3148 const auto& NextTok = peekToken();
3149 bool IsReal = Tok.is(AsmToken::Real);
3150 SMLoc S = getLoc();
3151 bool Negate = false;
3152
3153 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3154 lex();
3155 IsReal = true;
3156 Negate = true;
3157 }
3158
3159 AMDGPUOperand::Modifiers Mods;
3160 Mods.Lit = HasLit;
3161
3162 if (IsReal) {
3163 // Floating-point expressions are not supported.
3164 // Can only allow floating-point literals with an
3165 // optional sign.
3166
3167 StringRef Num = getTokenStr();
3168 lex();
3169
3170 APFloat RealVal(APFloat::IEEEdouble());
3171 auto roundMode = APFloat::rmNearestTiesToEven;
3172 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3173 return ParseStatus::Failure;
3174 if (Negate)
3175 RealVal.changeSign();
3176
3177 Operands.push_back(
3178 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3179 AMDGPUOperand::ImmTyNone, true));
3180 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3181 Op.setModifiers(Mods);
3182
3183 return ParseStatus::Success;
3184
3185 } else {
3186 int64_t IntVal;
3187 const MCExpr *Expr;
3188 SMLoc S = getLoc();
3189
3190 if (HasSP3AbsModifier) {
3191 // This is a workaround for handling expressions
3192 // as arguments of SP3 'abs' modifier, for example:
3193 // |1.0|
3194 // |-1|
3195 // |1+x|
3196 // This syntax is not compatible with syntax of standard
3197 // MC expressions (due to the trailing '|').
3198 SMLoc EndLoc;
3199 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3200 return ParseStatus::Failure;
3201 } else {
3202 if (Parser.parseExpression(Expr))
3203 return ParseStatus::Failure;
3204 }
3205
3206 if (Expr->evaluateAsAbsolute(IntVal)) {
3207 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3208 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3209 Op.setModifiers(Mods);
3210 } else {
3211 if (HasLit)
3212 return ParseStatus::NoMatch;
3213 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3214 }
3215
3216 return ParseStatus::Success;
3217 }
3218
3219 return ParseStatus::NoMatch;
3220}
3221
3222ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3223 if (!isRegister())
3224 return ParseStatus::NoMatch;
3225
3226 if (auto R = parseRegister()) {
3227 assert(R->isReg());
3228 Operands.push_back(std::move(R));
3229 return ParseStatus::Success;
3230 }
3231 return ParseStatus::Failure;
3232}
3233
3234ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3235 bool HasSP3AbsMod, bool HasLit) {
3236 ParseStatus Res = parseReg(Operands);
3237 if (!Res.isNoMatch())
3238 return Res;
3239 if (isModifier())
3240 return ParseStatus::NoMatch;
3241 return parseImm(Operands, HasSP3AbsMod, HasLit);
3242}
3243
3244bool
3245AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3246 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3247 const auto &str = Token.getString();
3248 return str == "abs" || str == "neg" || str == "sext";
3249 }
3250 return false;
3251}
3252
3253bool
3254AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3255 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3256}
3257
3258bool
3259AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3260 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3261}
3262
3263bool
3264AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3265 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3266}
3267
3268// Check if this is an operand modifier or an opcode modifier
3269// which may look like an expression but it is not. We should
3270// avoid parsing these modifiers as expressions. Currently
3271// recognized sequences are:
3272// |...|
3273// abs(...)
3274// neg(...)
3275// sext(...)
3276// -reg
3277// -|...|
3278// -abs(...)
3279// name:...
3280//
3281bool
3282AMDGPUAsmParser::isModifier() {
3283
3284 AsmToken Tok = getToken();
3285 AsmToken NextToken[2];
3286 peekTokens(NextToken);
3287
3288 return isOperandModifier(Tok, NextToken[0]) ||
3289 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3290 isOpcodeModifierWithVal(Tok, NextToken[0]);
3291}
3292
3293// Check if the current token is an SP3 'neg' modifier.
3294// Currently this modifier is allowed in the following context:
3295//
3296// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3297// 2. Before an 'abs' modifier: -abs(...)
3298// 3. Before an SP3 'abs' modifier: -|...|
3299//
3300// In all other cases "-" is handled as a part
3301// of an expression that follows the sign.
3302//
3303// Note: When "-" is followed by an integer literal,
3304// this is interpreted as integer negation rather
3305// than a floating-point NEG modifier applied to N.
3306// Beside being contr-intuitive, such use of floating-point
3307// NEG modifier would have resulted in different meaning
3308// of integer literals used with VOP1/2/C and VOP3,
3309// for example:
3310// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3311// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3312// Negative fp literals with preceding "-" are
3313// handled likewise for uniformity
3314//
3315bool
3316AMDGPUAsmParser::parseSP3NegModifier() {
3317
3318 AsmToken NextToken[2];
3319 peekTokens(NextToken);
3320
3321 if (isToken(AsmToken::Minus) &&
3322 (isRegister(NextToken[0], NextToken[1]) ||
3323 NextToken[0].is(AsmToken::Pipe) ||
3324 isId(NextToken[0], "abs"))) {
3325 lex();
3326 return true;
3327 }
3328
3329 return false;
3330}
3331
3333AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3334 bool AllowImm) {
3335 bool Neg, SP3Neg;
3336 bool Abs, SP3Abs;
3337 bool Lit;
3338 SMLoc Loc;
3339
3340 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3341 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3342 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3343
3344 SP3Neg = parseSP3NegModifier();
3345
3346 Loc = getLoc();
3347 Neg = trySkipId("neg");
3348 if (Neg && SP3Neg)
3349 return Error(Loc, "expected register or immediate");
3350 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3351 return ParseStatus::Failure;
3352
3353 Abs = trySkipId("abs");
3354 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3355 return ParseStatus::Failure;
3356
3357 Lit = trySkipId("lit");
3358 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3359 return ParseStatus::Failure;
3360
3361 Loc = getLoc();
3362 SP3Abs = trySkipToken(AsmToken::Pipe);
3363 if (Abs && SP3Abs)
3364 return Error(Loc, "expected register or immediate");
3365
3366 ParseStatus Res;
3367 if (AllowImm) {
3368 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3369 } else {
3370 Res = parseReg(Operands);
3371 }
3372 if (!Res.isSuccess())
3373 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3374
3375 if (Lit && !Operands.back()->isImm())
3376 Error(Loc, "expected immediate with lit modifier");
3377
3378 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3379 return ParseStatus::Failure;
3380 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3381 return ParseStatus::Failure;
3382 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3383 return ParseStatus::Failure;
3384 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3385 return ParseStatus::Failure;
3386
3387 AMDGPUOperand::Modifiers Mods;
3388 Mods.Abs = Abs || SP3Abs;
3389 Mods.Neg = Neg || SP3Neg;
3390 Mods.Lit = Lit;
3391
3392 if (Mods.hasFPModifiers() || Lit) {
3393 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3394 if (Op.isExpr())
3395 return Error(Op.getStartLoc(), "expected an absolute expression");
3396 Op.setModifiers(Mods);
3397 }
3398 return ParseStatus::Success;
3399}
3400
3402AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3403 bool AllowImm) {
3404 bool Sext = trySkipId("sext");
3405 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3406 return ParseStatus::Failure;
3407
3408 ParseStatus Res;
3409 if (AllowImm) {
3410 Res = parseRegOrImm(Operands);
3411 } else {
3412 Res = parseReg(Operands);
3413 }
3414 if (!Res.isSuccess())
3415 return Sext ? ParseStatus::Failure : Res;
3416
3417 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3418 return ParseStatus::Failure;
3419
3420 AMDGPUOperand::Modifiers Mods;
3421 Mods.Sext = Sext;
3422
3423 if (Mods.hasIntModifiers()) {
3424 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3425 if (Op.isExpr())
3426 return Error(Op.getStartLoc(), "expected an absolute expression");
3427 Op.setModifiers(Mods);
3428 }
3429
3430 return ParseStatus::Success;
3431}
3432
3433ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3434 return parseRegOrImmWithFPInputMods(Operands, false);
3435}
3436
3437ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3438 return parseRegOrImmWithIntInputMods(Operands, false);
3439}
3440
3441ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3442 auto Loc = getLoc();
3443 if (trySkipId("off")) {
3444 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3445 AMDGPUOperand::ImmTyOff, false));
3446 return ParseStatus::Success;
3447 }
3448
3449 if (!isRegister())
3450 return ParseStatus::NoMatch;
3451
3452 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3453 if (Reg) {
3454 Operands.push_back(std::move(Reg));
3455 return ParseStatus::Success;
3456 }
3457
3458 return ParseStatus::Failure;
3459}
3460
3461unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3462 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3463
3464 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3465 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3466 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3467 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3468 return Match_InvalidOperand;
3469
3470 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3471 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3472 // v_mac_f32/16 allow only dst_sel == DWORD;
3473 auto OpNum =
3474 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3475 const auto &Op = Inst.getOperand(OpNum);
3476 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3477 return Match_InvalidOperand;
3478 }
3479 }
3480
3481 return Match_Success;
3482}
3483
3485 static const unsigned Variants[] = {
3489 };
3490
3491 return ArrayRef(Variants);
3492}
3493
3494// What asm variants we should check
3495ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3496 if (isForcedDPP() && isForcedVOP3()) {
3497 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3498 return ArrayRef(Variants);
3499 }
3500 if (getForcedEncodingSize() == 32) {
3501 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3502 return ArrayRef(Variants);
3503 }
3504
3505 if (isForcedVOP3()) {
3506 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3507 return ArrayRef(Variants);
3508 }
3509
3510 if (isForcedSDWA()) {
3511 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3513 return ArrayRef(Variants);
3514 }
3515
3516 if (isForcedDPP()) {
3517 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3518 return ArrayRef(Variants);
3519 }
3520
3521 return getAllVariants();
3522}
3523
3524StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3525 if (isForcedDPP() && isForcedVOP3())
3526 return "e64_dpp";
3527
3528 if (getForcedEncodingSize() == 32)
3529 return "e32";
3530
3531 if (isForcedVOP3())
3532 return "e64";
3533
3534 if (isForcedSDWA())
3535 return "sdwa";
3536
3537 if (isForcedDPP())
3538 return "dpp";
3539
3540 return "";
3541}
3542
3543unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3544 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3545 for (MCPhysReg Reg : Desc.implicit_uses()) {
3546 switch (Reg) {
3547 case AMDGPU::FLAT_SCR:
3548 case AMDGPU::VCC:
3549 case AMDGPU::VCC_LO:
3550 case AMDGPU::VCC_HI:
3551 case AMDGPU::M0:
3552 return Reg;
3553 default:
3554 break;
3555 }
3556 }
3557 return AMDGPU::NoRegister;
3558}
3559
3560// NB: This code is correct only when used to check constant
3561// bus limitations because GFX7 support no f16 inline constants.
3562// Note that there are no cases when a GFX7 opcode violates
3563// constant bus limitations due to the use of an f16 constant.
3564bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3565 unsigned OpIdx) const {
3566 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3567
3568 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3569 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3570 return false;
3571 }
3572
3573 const MCOperand &MO = Inst.getOperand(OpIdx);
3574
3575 int64_t Val = MO.getImm();
3576 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3577
3578 switch (OpSize) { // expected operand size
3579 case 8:
3580 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3581 case 4:
3582 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3583 case 2: {
3584 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3588 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3589
3594
3599
3604
3609 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3610
3615 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3616
3617 llvm_unreachable("invalid operand type");
3618 }
3619 default:
3620 llvm_unreachable("invalid operand size");
3621 }
3622}
3623
3624unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3625 if (!isGFX10Plus())
3626 return 1;
3627
3628 switch (Opcode) {
3629 // 64-bit shift instructions can use only one scalar value input
3630 case AMDGPU::V_LSHLREV_B64_e64:
3631 case AMDGPU::V_LSHLREV_B64_gfx10:
3632 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3633 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3634 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3635 case AMDGPU::V_LSHRREV_B64_e64:
3636 case AMDGPU::V_LSHRREV_B64_gfx10:
3637 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3638 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3639 case AMDGPU::V_ASHRREV_I64_e64:
3640 case AMDGPU::V_ASHRREV_I64_gfx10:
3641 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3642 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3643 case AMDGPU::V_LSHL_B64_e64:
3644 case AMDGPU::V_LSHR_B64_e64:
3645 case AMDGPU::V_ASHR_I64_e64:
3646 return 1;
3647 default:
3648 return 2;
3649 }
3650}
3651
3652constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3654
3655// Get regular operand indices in the same order as specified
3656// in the instruction (but append mandatory literals to the end).
3658 bool AddMandatoryLiterals = false) {
3659
3660 int16_t ImmIdx =
3661 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3662
3663 if (isVOPD(Opcode)) {
3664 int16_t ImmDeferredIdx =
3665 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3666 : -1;
3667
3668 return {getNamedOperandIdx(Opcode, OpName::src0X),
3669 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3670 getNamedOperandIdx(Opcode, OpName::src0Y),
3671 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3672 ImmDeferredIdx,
3673 ImmIdx};
3674 }
3675
3676 return {getNamedOperandIdx(Opcode, OpName::src0),
3677 getNamedOperandIdx(Opcode, OpName::src1),
3678 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3679}
3680
3681bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3682 const MCOperand &MO = Inst.getOperand(OpIdx);
3683 if (MO.isImm()) {
3684 return !isInlineConstant(Inst, OpIdx);
3685 } else if (MO.isReg()) {
3686 auto Reg = MO.getReg();
3687 if (!Reg) {
3688 return false;
3689 }
3690 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3691 auto PReg = mc2PseudoReg(Reg);
3692 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3693 } else {
3694 return true;
3695 }
3696}
3697
3698// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3699// Writelane is special in that it can use SGPR and M0 (which would normally
3700// count as using the constant bus twice - but in this case it is allowed since
3701// the lane selector doesn't count as a use of the constant bus). However, it is
3702// still required to abide by the 1 SGPR rule.
3703static bool checkWriteLane(const MCInst &Inst) {
3704 const unsigned Opcode = Inst.getOpcode();
3705 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3706 return false;
3707 const MCOperand &LaneSelOp = Inst.getOperand(2);
3708 if (!LaneSelOp.isReg())
3709 return false;
3710 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3711 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3712}
3713
3714bool AMDGPUAsmParser::validateConstantBusLimitations(
3715 const MCInst &Inst, const OperandVector &Operands) {
3716 const unsigned Opcode = Inst.getOpcode();
3717 const MCInstrDesc &Desc = MII.get(Opcode);
3718 unsigned LastSGPR = AMDGPU::NoRegister;
3719 unsigned ConstantBusUseCount = 0;
3720 unsigned NumLiterals = 0;
3721 unsigned LiteralSize;
3722
3723 if (!(Desc.TSFlags &
3726 !isVOPD(Opcode))
3727 return true;
3728
3729 if (checkWriteLane(Inst))
3730 return true;
3731
3732 // Check special imm operands (used by madmk, etc)
3733 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3734 ++NumLiterals;
3735 LiteralSize = 4;
3736 }
3737
3738 SmallDenseSet<unsigned> SGPRsUsed;
3739 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3740 if (SGPRUsed != AMDGPU::NoRegister) {
3741 SGPRsUsed.insert(SGPRUsed);
3742 ++ConstantBusUseCount;
3743 }
3744
3745 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3746
3747 for (int OpIdx : OpIndices) {
3748 if (OpIdx == -1)
3749 continue;
3750
3751 const MCOperand &MO = Inst.getOperand(OpIdx);
3752 if (usesConstantBus(Inst, OpIdx)) {
3753 if (MO.isReg()) {
3754 LastSGPR = mc2PseudoReg(MO.getReg());
3755 // Pairs of registers with a partial intersections like these
3756 // s0, s[0:1]
3757 // flat_scratch_lo, flat_scratch
3758 // flat_scratch_lo, flat_scratch_hi
3759 // are theoretically valid but they are disabled anyway.
3760 // Note that this code mimics SIInstrInfo::verifyInstruction
3761 if (SGPRsUsed.insert(LastSGPR).second) {
3762 ++ConstantBusUseCount;
3763 }
3764 } else { // Expression or a literal
3765
3766 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3767 continue; // special operand like VINTERP attr_chan
3768
3769 // An instruction may use only one literal.
3770 // This has been validated on the previous step.
3771 // See validateVOPLiteral.
3772 // This literal may be used as more than one operand.
3773 // If all these operands are of the same size,
3774 // this literal counts as one scalar value.
3775 // Otherwise it counts as 2 scalar values.
3776 // See "GFX10 Shader Programming", section 3.6.2.3.
3777
3778 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3779 if (Size < 4)
3780 Size = 4;
3781
3782 if (NumLiterals == 0) {
3783 NumLiterals = 1;
3784 LiteralSize = Size;
3785 } else if (LiteralSize != Size) {
3786 NumLiterals = 2;
3787 }
3788 }
3789 }
3790 }
3791 ConstantBusUseCount += NumLiterals;
3792
3793 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3794 return true;
3795
3796 SMLoc LitLoc = getLitLoc(Operands);
3797 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3798 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3799 Error(Loc, "invalid operand (violates constant bus restrictions)");
3800 return false;
3801}
3802
3803bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3804 const MCInst &Inst, const OperandVector &Operands) {
3805
3806 const unsigned Opcode = Inst.getOpcode();
3807 if (!isVOPD(Opcode))
3808 return true;
3809
3810 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3811
3812 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3813 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3814 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3815 ? Opr.getReg()
3817 };
3818
3819 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3820 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3821
3822 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3823 auto InvalidCompOprIdx =
3824 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3825 if (!InvalidCompOprIdx)
3826 return true;
3827
3828 auto CompOprIdx = *InvalidCompOprIdx;
3829 auto ParsedIdx =
3830 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3831 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3832 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3833
3834 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3835 if (CompOprIdx == VOPD::Component::DST) {
3836 Error(Loc, "one dst register must be even and the other odd");
3837 } else {
3838 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3839 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3840 " operands must use different VGPR banks");
3841 }
3842
3843 return false;
3844}
3845
3846bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3847
3848 const unsigned Opc = Inst.getOpcode();
3849 const MCInstrDesc &Desc = MII.get(Opc);
3850
3851 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3852 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3853 assert(ClampIdx != -1);
3854 return Inst.getOperand(ClampIdx).getImm() == 0;
3855 }
3856
3857 return true;
3858}
3859
3862
3863bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3864 const SMLoc &IDLoc) {
3865
3866 const unsigned Opc = Inst.getOpcode();
3867 const MCInstrDesc &Desc = MII.get(Opc);
3868
3869 if ((Desc.TSFlags & MIMGFlags) == 0)
3870 return true;
3871
3872 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3873 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3874 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3875
3876 assert(VDataIdx != -1);
3877
3878 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3879 return true;
3880
3881 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3882 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3883 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3884 if (DMask == 0)
3885 DMask = 1;
3886
3887 bool IsPackedD16 = false;
3888 unsigned DataSize =
3889 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3890 if (hasPackedD16()) {
3891 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3892 IsPackedD16 = D16Idx >= 0;
3893 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3894 DataSize = (DataSize + 1) / 2;
3895 }
3896
3897 if ((VDataSize / 4) == DataSize + TFESize)
3898 return true;
3899
3900 StringRef Modifiers;
3901 if (isGFX90A())
3902 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3903 else
3904 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3905
3906 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3907 return false;
3908}
3909
3910bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3911 const SMLoc &IDLoc) {
3912 const unsigned Opc = Inst.getOpcode();
3913 const MCInstrDesc &Desc = MII.get(Opc);
3914
3915 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3916 return true;
3917
3919
3920 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3922 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3923 int RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
3924 : AMDGPU::OpName::rsrc;
3925 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3926 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3927 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3928
3929 assert(VAddr0Idx != -1);
3930 assert(SrsrcIdx != -1);
3931 assert(SrsrcIdx > VAddr0Idx);
3932
3933 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3934 if (BaseOpcode->BVH) {
3935 if (IsA16 == BaseOpcode->A16)
3936 return true;
3937 Error(IDLoc, "image address size does not match a16");
3938 return false;
3939 }
3940
3941 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3943 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3944 unsigned ActualAddrSize =
3945 IsNSA ? SrsrcIdx - VAddr0Idx
3946 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3947
3948 unsigned ExpectedAddrSize =
3949 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3950
3951 if (IsNSA) {
3952 if (hasPartialNSAEncoding() &&
3953 ExpectedAddrSize >
3955 int VAddrLastIdx = SrsrcIdx - 1;
3956 unsigned VAddrLastSize =
3957 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3958
3959 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3960 }
3961 } else {
3962 if (ExpectedAddrSize > 12)
3963 ExpectedAddrSize = 16;
3964
3965 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3966 // This provides backward compatibility for assembly created
3967 // before 160b/192b/224b types were directly supported.
3968 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3969 return true;
3970 }
3971
3972 if (ActualAddrSize == ExpectedAddrSize)
3973 return true;
3974
3975 Error(IDLoc, "image address size does not match dim and a16");
3976 return false;
3977}
3978
3979bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3980
3981 const unsigned Opc = Inst.getOpcode();
3982 const MCInstrDesc &Desc = MII.get(Opc);
3983
3984 if ((Desc.TSFlags & MIMGFlags) == 0)
3985 return true;
3986 if (!Desc.mayLoad() || !Desc.mayStore())
3987 return true; // Not atomic
3988
3989 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3990 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3991
3992 // This is an incomplete check because image_atomic_cmpswap
3993 // may only use 0x3 and 0xf while other atomic operations
3994 // may use 0x1 and 0x3. However these limitations are
3995 // verified when we check that dmask matches dst size.
3996 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3997}
3998
3999bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4000
4001 const unsigned Opc = Inst.getOpcode();
4002 const MCInstrDesc &Desc = MII.get(Opc);
4003
4004 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4005 return true;
4006
4007 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4008 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4009
4010 // GATHER4 instructions use dmask in a different fashion compared to
4011 // other MIMG instructions. The only useful DMASK values are
4012 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4013 // (red,red,red,red) etc.) The ISA document doesn't mention
4014 // this.
4015 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4016}
4017
4018bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4019 const unsigned Opc = Inst.getOpcode();
4020 const MCInstrDesc &Desc = MII.get(Opc);
4021
4022 if ((Desc.TSFlags & MIMGFlags) == 0)
4023 return true;
4024
4026 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4028
4029 if (!BaseOpcode->MSAA)
4030 return true;
4031
4032 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4033 assert(DimIdx != -1);
4034
4035 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4037
4038 return DimInfo->MSAA;
4039}
4040
4041static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4042{
4043 switch (Opcode) {
4044 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4045 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4046 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4047 return true;
4048 default:
4049 return false;
4050 }
4051}
4052
4053// movrels* opcodes should only allow VGPRS as src0.
4054// This is specified in .td description for vop1/vop3,
4055// but sdwa is handled differently. See isSDWAOperand.
4056bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4057 const OperandVector &Operands) {
4058
4059 const unsigned Opc = Inst.getOpcode();
4060 const MCInstrDesc &Desc = MII.get(Opc);
4061
4062 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4063 return true;
4064
4065 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4066 assert(Src0Idx != -1);
4067
4068 SMLoc ErrLoc;
4069 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4070 if (Src0.isReg()) {
4071 auto Reg = mc2PseudoReg(Src0.getReg());
4072 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4073 if (!isSGPR(Reg, TRI))
4074 return true;
4075 ErrLoc = getRegLoc(Reg, Operands);
4076 } else {
4077 ErrLoc = getConstLoc(Operands);
4078 }
4079
4080 Error(ErrLoc, "source operand must be a VGPR");
4081 return false;
4082}
4083
4084bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4085 const OperandVector &Operands) {
4086
4087 const unsigned Opc = Inst.getOpcode();
4088
4089 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4090 return true;
4091
4092 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4093 assert(Src0Idx != -1);
4094
4095 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4096 if (!Src0.isReg())
4097 return true;
4098
4099 auto Reg = mc2PseudoReg(Src0.getReg());
4100 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4101 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4102 Error(getRegLoc(Reg, Operands),
4103 "source operand must be either a VGPR or an inline constant");
4104 return false;
4105 }
4106
4107 return true;
4108}
4109
4110bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4111 const OperandVector &Operands) {
4112 unsigned Opcode = Inst.getOpcode();
4113 const MCInstrDesc &Desc = MII.get(Opcode);
4114
4115 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4116 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4117 return true;
4118
4119 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4120 if (Src2Idx == -1)
4121 return true;
4122
4123 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4124 Error(getConstLoc(Operands),
4125 "inline constants are not allowed for this operand");
4126 return false;
4127 }
4128
4129 return true;
4130}
4131
4132bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4133 const OperandVector &Operands) {
4134 const unsigned Opc = Inst.getOpcode();
4135 const MCInstrDesc &Desc = MII.get(Opc);
4136
4137 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4138 return true;
4139
4140 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4141 if (Src2Idx == -1)
4142 return true;
4143
4144 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4145 if (!Src2.isReg())
4146 return true;
4147
4148 MCRegister Src2Reg = Src2.getReg();
4149 MCRegister DstReg = Inst.getOperand(0).getReg();
4150 if (Src2Reg == DstReg)
4151 return true;
4152
4153 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4154 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4155 return true;
4156
4157 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4158 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
4159 "source 2 operand must not partially overlap with dst");
4160 return false;
4161 }
4162
4163 return true;
4164}
4165
4166bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4167 switch (Inst.getOpcode()) {
4168 default:
4169 return true;
4170 case V_DIV_SCALE_F32_gfx6_gfx7:
4171 case V_DIV_SCALE_F32_vi:
4172 case V_DIV_SCALE_F32_gfx10:
4173 case V_DIV_SCALE_F64_gfx6_gfx7:
4174 case V_DIV_SCALE_F64_vi:
4175 case V_DIV_SCALE_F64_gfx10:
4176 break;
4177 }
4178
4179 // TODO: Check that src0 = src1 or src2.
4180
4181 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4182 AMDGPU::OpName::src2_modifiers,
4183 AMDGPU::OpName::src2_modifiers}) {
4185 .getImm() &
4187 return false;
4188 }
4189 }
4190
4191 return true;
4192}
4193
4194bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4195
4196 const unsigned Opc = Inst.getOpcode();
4197 const MCInstrDesc &Desc = MII.get(Opc);
4198
4199 if ((Desc.TSFlags & MIMGFlags) == 0)
4200 return true;
4201
4202 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4203 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4204 if (isCI() || isSI())
4205 return false;
4206 }
4207
4208 return true;
4209}
4210
4211static bool IsRevOpcode(const unsigned Opcode)
4212{
4213 switch (Opcode) {
4214 case AMDGPU::V_SUBREV_F32_e32:
4215 case AMDGPU::V_SUBREV_F32_e64:
4216 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4217 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4218 case AMDGPU::V_SUBREV_F32_e32_vi:
4219 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4220 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4221 case AMDGPU::V_SUBREV_F32_e64_vi:
4222
4223 case AMDGPU::V_SUBREV_CO_U32_e32:
4224 case AMDGPU::V_SUBREV_CO_U32_e64:
4225 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4226 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4227
4228 case AMDGPU::V_SUBBREV_U32_e32:
4229 case AMDGPU::V_SUBBREV_U32_e64:
4230 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4231 case AMDGPU::V_SUBBREV_U32_e32_vi:
4232 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4233 case AMDGPU::V_SUBBREV_U32_e64_vi:
4234
4235 case AMDGPU::V_SUBREV_U32_e32:
4236 case AMDGPU::V_SUBREV_U32_e64:
4237 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4238 case AMDGPU::V_SUBREV_U32_e32_vi:
4239 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4240 case AMDGPU::V_SUBREV_U32_e64_vi:
4241
4242 case AMDGPU::V_SUBREV_F16_e32:
4243 case AMDGPU::V_SUBREV_F16_e64:
4244 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4245 case AMDGPU::V_SUBREV_F16_e32_vi:
4246 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4247 case AMDGPU::V_SUBREV_F16_e64_vi:
4248
4249 case AMDGPU::V_SUBREV_U16_e32:
4250 case AMDGPU::V_SUBREV_U16_e64:
4251 case AMDGPU::V_SUBREV_U16_e32_vi:
4252 case AMDGPU::V_SUBREV_U16_e64_vi:
4253
4254 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4255 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4256 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4257
4258 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4259 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4260
4261 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4262 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4263
4264 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4265 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4266
4267 case AMDGPU::V_LSHRREV_B32_e32:
4268 case AMDGPU::V_LSHRREV_B32_e64:
4269 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4270 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4271 case AMDGPU::V_LSHRREV_B32_e32_vi:
4272 case AMDGPU::V_LSHRREV_B32_e64_vi:
4273 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4274 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4275
4276 case AMDGPU::V_ASHRREV_I32_e32:
4277 case AMDGPU::V_ASHRREV_I32_e64:
4278 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4279 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4280 case AMDGPU::V_ASHRREV_I32_e32_vi:
4281 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4282 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4283 case AMDGPU::V_ASHRREV_I32_e64_vi:
4284
4285 case AMDGPU::V_LSHLREV_B32_e32:
4286 case AMDGPU::V_LSHLREV_B32_e64:
4287 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4288 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4289 case AMDGPU::V_LSHLREV_B32_e32_vi:
4290 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4291 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4292 case AMDGPU::V_LSHLREV_B32_e64_vi:
4293
4294 case AMDGPU::V_LSHLREV_B16_e32:
4295 case AMDGPU::V_LSHLREV_B16_e64:
4296 case AMDGPU::V_LSHLREV_B16_e32_vi:
4297 case AMDGPU::V_LSHLREV_B16_e64_vi:
4298 case AMDGPU::V_LSHLREV_B16_gfx10:
4299
4300 case AMDGPU::V_LSHRREV_B16_e32:
4301 case AMDGPU::V_LSHRREV_B16_e64:
4302 case AMDGPU::V_LSHRREV_B16_e32_vi:
4303 case AMDGPU::V_LSHRREV_B16_e64_vi:
4304 case AMDGPU::V_LSHRREV_B16_gfx10:
4305
4306 case AMDGPU::V_ASHRREV_I16_e32:
4307 case AMDGPU::V_ASHRREV_I16_e64:
4308 case AMDGPU::V_ASHRREV_I16_e32_vi:
4309 case AMDGPU::V_ASHRREV_I16_e64_vi:
4310 case AMDGPU::V_ASHRREV_I16_gfx10:
4311
4312 case AMDGPU::V_LSHLREV_B64_e64:
4313 case AMDGPU::V_LSHLREV_B64_gfx10:
4314 case AMDGPU::V_LSHLREV_B64_vi:
4315
4316 case AMDGPU::V_LSHRREV_B64_e64:
4317 case AMDGPU::V_LSHRREV_B64_gfx10:
4318 case AMDGPU::V_LSHRREV_B64_vi:
4319
4320 case AMDGPU::V_ASHRREV_I64_e64:
4321 case AMDGPU::V_ASHRREV_I64_gfx10:
4322 case AMDGPU::V_ASHRREV_I64_vi:
4323
4324 case AMDGPU::V_PK_LSHLREV_B16:
4325 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4326 case AMDGPU::V_PK_LSHLREV_B16_vi:
4327
4328 case AMDGPU::V_PK_LSHRREV_B16:
4329 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4330 case AMDGPU::V_PK_LSHRREV_B16_vi:
4331 case AMDGPU::V_PK_ASHRREV_I16:
4332 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4333 case AMDGPU::V_PK_ASHRREV_I16_vi:
4334 return true;
4335 default:
4336 return false;
4337 }
4338}
4339
4340std::optional<StringRef>
4341AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4342
4343 using namespace SIInstrFlags;
4344 const unsigned Opcode = Inst.getOpcode();
4345 const MCInstrDesc &Desc = MII.get(Opcode);
4346
4347 // lds_direct register is defined so that it can be used
4348 // with 9-bit operands only. Ignore encodings which do not accept these.
4349 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4350 if ((Desc.TSFlags & Enc) == 0)
4351 return std::nullopt;
4352
4353 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4354 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4355 if (SrcIdx == -1)
4356 break;
4357 const auto &Src = Inst.getOperand(SrcIdx);
4358 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4359
4360 if (isGFX90A() || isGFX11Plus())
4361 return StringRef("lds_direct is not supported on this GPU");
4362
4363 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4364 return StringRef("lds_direct cannot be used with this instruction");
4365
4366 if (SrcName != OpName::src0)
4367 return StringRef("lds_direct may be used as src0 only");
4368 }
4369 }
4370
4371 return std::nullopt;
4372}
4373
4374SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4375 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4376 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4377 if (Op.isFlatOffset())
4378 return Op.getStartLoc();
4379 }
4380 return getLoc();
4381}
4382
4383bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4384 const OperandVector &Operands) {
4385 auto Opcode = Inst.getOpcode();
4386 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4387 if (OpNum == -1)
4388 return true;
4389
4390 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4391 if ((TSFlags & SIInstrFlags::FLAT))
4392 return validateFlatOffset(Inst, Operands);
4393
4394 if ((TSFlags & SIInstrFlags::SMRD))
4395 return validateSMEMOffset(Inst, Operands);
4396
4397 const auto &Op = Inst.getOperand(OpNum);
4398 if (isGFX12Plus() &&
4399 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4400 const unsigned OffsetSize = 24;
4401 if (!isIntN(OffsetSize, Op.getImm())) {
4402 Error(getFlatOffsetLoc(Operands),
4403 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4404 return false;
4405 }
4406 } else {
4407 const unsigned OffsetSize = 16;
4408 if (!isUIntN(OffsetSize, Op.getImm())) {
4409 Error(getFlatOffsetLoc(Operands),
4410 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4411 return false;
4412 }
4413 }
4414 return true;
4415}
4416
4417bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4418 const OperandVector &Operands) {
4419 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4420 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4421 return true;
4422
4423 auto Opcode = Inst.getOpcode();
4424 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4425 assert(OpNum != -1);
4426
4427 const auto &Op = Inst.getOperand(OpNum);
4428 if (!hasFlatOffsets() && Op.getImm() != 0) {
4429 Error(getFlatOffsetLoc(Operands),
4430 "flat offset modifier is not supported on this GPU");
4431 return false;
4432 }
4433
4434 // For pre-GFX12 FLAT instructions the offset must be positive;
4435 // MSB is ignored and forced to zero.
4436 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4437 bool AllowNegative =
4439 isGFX12Plus();
4440 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4441 Error(getFlatOffsetLoc(Operands),
4442 Twine("expected a ") +
4443 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4444 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4445 return false;
4446 }
4447
4448 return true;
4449}
4450
4451SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4452 // Start with second operand because SMEM Offset cannot be dst or src0.
4453 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4454 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4455 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4456 return Op.getStartLoc();
4457 }
4458 return getLoc();
4459}
4460
4461bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4462 const OperandVector &Operands) {
4463 if (isCI() || isSI())
4464 return true;
4465
4466 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4467 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4468 return true;
4469
4470 auto Opcode = Inst.getOpcode();
4471 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4472 if (OpNum == -1)
4473 return true;
4474
4475 const auto &Op = Inst.getOperand(OpNum);
4476 if (!Op.isImm())
4477 return true;
4478
4479 uint64_t Offset = Op.getImm();
4480 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4483 return true;
4484
4485 Error(getSMEMOffsetLoc(Operands),
4486 isGFX12Plus() ? "expected a 24-bit signed offset"
4487 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4488 : "expected a 21-bit signed offset");
4489
4490 return false;
4491}
4492
4493bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4494 unsigned Opcode = Inst.getOpcode();
4495 const MCInstrDesc &Desc = MII.get(Opcode);
4496 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4497 return true;
4498
4499 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4500 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4501
4502 const int OpIndices[] = { Src0Idx, Src1Idx };
4503
4504 unsigned NumExprs = 0;
4505 unsigned NumLiterals = 0;
4507
4508 for (int OpIdx : OpIndices) {
4509 if (OpIdx == -1) break;
4510
4511 const MCOperand &MO = Inst.getOperand(OpIdx);
4512 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4513 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4514 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4515 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4516 if (NumLiterals == 0 || LiteralValue != Value) {
4518 ++NumLiterals;
4519 }
4520 } else if (MO.isExpr()) {
4521 ++NumExprs;
4522 }
4523 }
4524 }
4525
4526 return NumLiterals + NumExprs <= 1;
4527}
4528
4529bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4530 const unsigned Opc = Inst.getOpcode();
4531 if (isPermlane16(Opc)) {
4532 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4533 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4534
4535 if (OpSel & ~3)
4536 return false;
4537 }
4538
4539 uint64_t TSFlags = MII.get(Opc).TSFlags;
4540
4541 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4542 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4543 if (OpSelIdx != -1) {
4544 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4545 return false;
4546 }
4547 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4548 if (OpSelHiIdx != -1) {
4549 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4550 return false;
4551 }
4552 }
4553
4554 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4555 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4556 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4557 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4558 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4559 if (OpSel & 3)
4560 return false;
4561 }
4562
4563 return true;
4564}
4565
4566bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
4567 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4568
4569 const unsigned Opc = Inst.getOpcode();
4570 uint64_t TSFlags = MII.get(Opc).TSFlags;
4571
4572 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4573 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4574 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4575 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4576 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4577 !(TSFlags & SIInstrFlags::IsSWMMAC))
4578 return true;
4579
4580 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4581 if (NegIdx == -1)
4582 return true;
4583
4584 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4585
4586 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4587 // on some src operands but not allowed on other.
4588 // It is convenient that such instructions don't have src_modifiers operand
4589 // for src operands that don't allow neg because they also don't allow opsel.
4590
4591 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4592 AMDGPU::OpName::src1_modifiers,
4593 AMDGPU::OpName::src2_modifiers};
4594
4595 for (unsigned i = 0; i < 3; ++i) {
4596 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4597 if (Neg & (1 << i))
4598 return false;
4599 }
4600 }
4601
4602 return true;
4603}
4604
4605bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4606 const OperandVector &Operands) {
4607 const unsigned Opc = Inst.getOpcode();
4608 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4609 if (DppCtrlIdx >= 0) {
4610 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4611
4612 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4613 AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4614 // DP ALU DPP is supported for row_newbcast only on GFX9*
4615 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4616 Error(S, "DP ALU dpp only supports row_newbcast");
4617 return false;
4618 }
4619 }
4620
4621 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4622 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4623
4624 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4625 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4626 if (Src1Idx >= 0) {
4627 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4628 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4629 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
4630 auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());
4631 SMLoc S = getRegLoc(Reg, Operands);
4632 Error(S, "invalid operand for instruction");
4633 return false;
4634 }
4635 if (Src1.isImm()) {
4636 Error(getInstLoc(Operands),
4637 "src1 immediate operand invalid for instruction");
4638 return false;
4639 }
4640 }
4641 }
4642
4643 return true;
4644}
4645
4646// Check if VCC register matches wavefront size
4647bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4648 auto FB = getFeatureBits();
4649 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4650 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4651}
4652
4653// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4654bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4655 const OperandVector &Operands) {
4656 unsigned Opcode = Inst.getOpcode();
4657 const MCInstrDesc &Desc = MII.get(Opcode);
4658 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4659 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4660 !HasMandatoryLiteral && !isVOPD(Opcode))
4661 return true;
4662
4663 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4664
4665 unsigned NumExprs = 0;
4666 unsigned NumLiterals = 0;
4668
4669 for (int OpIdx : OpIndices) {
4670 if (OpIdx == -1)
4671 continue;
4672
4673 const MCOperand &MO = Inst.getOperand(OpIdx);
4674 if (!MO.isImm() && !MO.isExpr())
4675 continue;
4676 if (!isSISrcOperand(Desc, OpIdx))
4677 continue;
4678
4679 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4680 uint64_t Value = static_cast<uint64_t>(MO.getImm());
4681 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4682 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4683 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4684
4685 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4686 Error(getLitLoc(Operands), "invalid operand for instruction");
4687 return false;
4688 }
4689
4690 if (IsFP64 && IsValid32Op)
4691 Value = Hi_32(Value);
4692
4693 if (NumLiterals == 0 || LiteralValue != Value) {
4695 ++NumLiterals;
4696 }
4697 } else if (MO.isExpr()) {
4698 ++NumExprs;
4699 }
4700 }
4701 NumLiterals += NumExprs;
4702
4703 if (!NumLiterals)
4704 return true;
4705
4706 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4707 Error(getLitLoc(Operands), "literal operands are not supported");
4708 return false;
4709 }
4710
4711 if (NumLiterals > 1) {
4712 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4713 return false;
4714 }
4715
4716 return true;
4717}
4718
4719// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4720static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4721 const MCRegisterInfo *MRI) {
4722 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4723 if (OpIdx < 0)
4724 return -1;
4725
4726 const MCOperand &Op = Inst.getOperand(OpIdx);
4727 if (!Op.isReg())
4728 return -1;
4729
4730 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4731 auto Reg = Sub ? Sub : Op.getReg();
4732 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4733 return AGPR32.contains(Reg) ? 1 : 0;
4734}
4735
4736bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4737 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4738 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4740 SIInstrFlags::DS)) == 0)
4741 return true;
4742
4743 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4744 : AMDGPU::OpName::vdata;
4745
4746 const MCRegisterInfo *MRI = getMRI();
4747 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4748 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4749
4750 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4751 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4752 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4753 return false;
4754 }
4755
4756 auto FB = getFeatureBits();
4757 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4758 if (DataAreg < 0 || DstAreg < 0)
4759 return true;
4760 return DstAreg == DataAreg;
4761 }
4762
4763 return DstAreg < 1 && DataAreg < 1;
4764}
4765
4766bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4767 auto FB = getFeatureBits();
4768 if (!FB[AMDGPU::FeatureGFX90AInsts])
4769 return true;
4770
4771 const MCRegisterInfo *MRI = getMRI();
4772 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4773 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4774 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4775 const MCOperand &Op = Inst.getOperand(I);
4776 if (!Op.isReg())
4777 continue;
4778
4779 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4780 if (!Sub)
4781 continue;
4782
4783 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4784 return false;
4785 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4786 return false;
4787 }
4788
4789 return true;
4790}
4791
4792SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4793 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4794 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4795 if (Op.isBLGP())
4796 return Op.getStartLoc();
4797 }
4798 return SMLoc();
4799}
4800
4801bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4802 const OperandVector &Operands) {
4803 unsigned Opc = Inst.getOpcode();
4804 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4805 if (BlgpIdx == -1)
4806 return true;
4807 SMLoc BLGPLoc = getBLGPLoc(Operands);
4808 if (!BLGPLoc.isValid())
4809 return true;
4810 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4811 auto FB = getFeatureBits();
4812 bool UsesNeg = false;
4813 if (FB[AMDGPU::FeatureGFX940Insts]) {
4814 switch (Opc) {
4815 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4816 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4817 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4818 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4819 UsesNeg = true;
4820 }
4821 }
4822
4823 if (IsNeg == UsesNeg)
4824 return true;
4825
4826 Error(BLGPLoc,
4827 UsesNeg ? "invalid modifier: blgp is not supported"
4828 : "invalid modifier: neg is not supported");
4829
4830 return false;
4831}
4832
4833bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4834 const OperandVector &Operands) {
4835 if (!isGFX11Plus())
4836 return true;
4837
4838 unsigned Opc = Inst.getOpcode();
4839 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4840 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4841 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4842 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4843 return true;
4844
4845 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4846 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4847 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4848 if (Reg == AMDGPU::SGPR_NULL)
4849 return true;
4850
4851 SMLoc RegLoc = getRegLoc(Reg, Operands);
4852 Error(RegLoc, "src0 must be null");
4853 return false;
4854}
4855
4856bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4857 const OperandVector &Operands) {
4858 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4859 if ((TSFlags & SIInstrFlags::DS) == 0)
4860 return true;
4861 if (TSFlags & SIInstrFlags::GWS)
4862 return validateGWS(Inst, Operands);
4863 // Only validate GDS for non-GWS instructions.
4864 if (hasGDS())
4865 return true;
4866 int GDSIdx =
4867 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4868 if (GDSIdx < 0)
4869 return true;
4870 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4871 if (GDS) {
4872 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4873 Error(S, "gds modifier is not supported on this GPU");
4874 return false;
4875 }
4876 return true;
4877}
4878
4879// gfx90a has an undocumented limitation:
4880// DS_GWS opcodes must use even aligned registers.
4881bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4882 const OperandVector &Operands) {
4883 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4884 return true;
4885
4886 int Opc = Inst.getOpcode();
4887 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4888 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4889 return true;
4890
4891 const MCRegisterInfo *MRI = getMRI();
4892 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4893 int Data0Pos =
4894 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4895 assert(Data0Pos != -1);
4896 auto Reg = Inst.getOperand(Data0Pos).getReg();
4897 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4898 if (RegIdx & 1) {
4899 SMLoc RegLoc = getRegLoc(Reg, Operands);
4900 Error(RegLoc, "vgpr must be even aligned");
4901 return false;
4902 }
4903
4904 return true;
4905}
4906
4907bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4908 const OperandVector &Operands,
4909 const SMLoc &IDLoc) {
4910 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4911 AMDGPU::OpName::cpol);
4912 if (CPolPos == -1)
4913 return true;
4914
4915 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4916
4917 if (isGFX12Plus())
4918 return validateTHAndScopeBits(Inst, Operands, CPol);
4919
4920 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4921 if (TSFlags & SIInstrFlags::SMRD) {
4922 if (CPol && (isSI() || isCI())) {
4923 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4924 Error(S, "cache policy is not supported for SMRD instructions");
4925 return false;
4926 }
4927 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4928 Error(IDLoc, "invalid cache policy for SMEM instruction");
4929 return false;
4930 }
4931 }
4932
4933 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4934 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4937 if (!(TSFlags & AllowSCCModifier)) {
4938 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4939 StringRef CStr(S.getPointer());
4940 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4941 Error(S,
4942 "scc modifier is not supported for this instruction on this GPU");
4943 return false;
4944 }
4945 }
4946
4948 return true;
4949
4950 if (TSFlags & SIInstrFlags::IsAtomicRet) {
4951 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4952 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4953 : "instruction must use glc");
4954 return false;
4955 }
4956 } else {
4957 if (CPol & CPol::GLC) {
4958 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4959 StringRef CStr(S.getPointer());
4961 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4962 Error(S, isGFX940() ? "instruction must not use sc0"
4963 : "instruction must not use glc");
4964 return false;
4965 }
4966 }
4967
4968 return true;
4969}
4970
4971bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4972 const OperandVector &Operands,
4973 const unsigned CPol) {
4974 const unsigned TH = CPol & AMDGPU::CPol::TH;
4975 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
4976
4977 const unsigned Opcode = Inst.getOpcode();
4978 const MCInstrDesc &TID = MII.get(Opcode);
4979
4980 auto PrintError = [&](StringRef Msg) {
4981 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4982 Error(S, Msg);
4983 return false;
4984 };
4985
4986 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
4989 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
4990
4991 if (TH == 0)
4992 return true;
4993
4994 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
4995 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
4996 (TH == AMDGPU::CPol::TH_NT_HT)))
4997 return PrintError("invalid th value for SMEM instruction");
4998
4999 if (TH == AMDGPU::CPol::TH_BYPASS) {
5000 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5002 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5004 return PrintError("scope and th combination is not valid");
5005 }
5006
5007 bool IsStore = TID.mayStore();
5008 bool IsAtomic =
5010
5011 if (IsAtomic) {
5012 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5013 return PrintError("invalid th value for atomic instructions");
5014 } else if (IsStore) {
5015 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5016 return PrintError("invalid th value for store instructions");
5017 } else {
5018 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5019 return PrintError("invalid th value for load instructions");
5020 }
5021
5022 return true;
5023}
5024
5025bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
5026 if (!isGFX11Plus())
5027 return true;
5028 for (auto &Operand : Operands) {
5029 if (!Operand->isReg())
5030 continue;
5031 unsigned Reg = Operand->getReg();
5032 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
5033 Error(getRegLoc(Reg, Operands),
5034 "execz and vccz are not supported on this GPU");
5035 return false;
5036 }
5037 }
5038 return true;
5039}
5040
5041bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5042 const OperandVector &Operands) {
5043 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5044 if (Desc.mayStore() &&
5046 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5047 if (Loc != getInstLoc(Operands)) {
5048 Error(Loc, "TFE modifier has no meaning for store instructions");
5049 return false;
5050 }
5051 }
5052
5053 return true;
5054}
5055
5056bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5057 const SMLoc &IDLoc,
5058 const OperandVector &Operands) {
5059 if (auto ErrMsg = validateLdsDirect(Inst)) {
5060 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
5061 return false;
5062 }
5063 if (!validateSOPLiteral(Inst)) {
5064 Error(getLitLoc(Operands),
5065 "only one unique literal operand is allowed");
5066 return false;
5067 }
5068 if (!validateVOPLiteral(Inst, Operands)) {
5069 return false;
5070 }
5071 if (!validateConstantBusLimitations(Inst, Operands)) {
5072 return false;
5073 }
5074 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5075 return false;
5076 }
5077 if (!validateIntClampSupported(Inst)) {
5078 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5079 "integer clamping is not supported on this GPU");
5080 return false;
5081 }
5082 if (!validateOpSel(Inst)) {
5083 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5084 "invalid op_sel operand");
5085 return false;
5086 }
5087 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5088 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5089 "invalid neg_lo operand");
5090 return false;
5091 }
5092 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5093 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5094 "invalid neg_hi operand");
5095 return false;
5096 }
5097 if (!validateDPP(Inst, Operands)) {
5098 return false;
5099 }
5100 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5101 if (!validateMIMGD16(Inst)) {
5102 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5103 "d16 modifier is not supported on this GPU");
5104 return false;
5105 }
5106 if (!validateMIMGMSAA(Inst)) {
5107 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5108 "invalid dim; must be MSAA type");
5109 return false;
5110 }
5111 if (!validateMIMGDataSize(Inst, IDLoc)) {
5112 return false;
5113 }
5114 if (!validateMIMGAddrSize(Inst, IDLoc))
5115 return false;
5116 if (!validateMIMGAtomicDMask(Inst)) {
5117 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5118 "invalid atomic image dmask");
5119 return false;
5120 }
5121 if (!validateMIMGGatherDMask(Inst)) {
5122 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5123 "invalid image_gather dmask: only one bit must be set");
5124 return false;
5125 }
5126 if (!validateMovrels(Inst, Operands)) {
5127 return false;
5128 }
5129 if (!validateOffset(Inst, Operands)) {
5130 return false;
5131 }
5132 if (!validateMAIAccWrite(Inst, Operands)) {
5133 return false;
5134 }
5135 if (!validateMAISrc2(Inst, Operands)) {
5136 return false;
5137 }
5138 if (!validateMFMA(Inst, Operands)) {
5139 return false;
5140 }
5141 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5142 return false;
5143 }
5144
5145 if (!validateAGPRLdSt(Inst)) {
5146 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5147 ? "invalid register class: data and dst should be all VGPR or AGPR"
5148 : "invalid register class: agpr loads and stores not supported on this GPU"
5149 );
5150 return false;
5151 }
5152 if (!validateVGPRAlign(Inst)) {
5153 Error(IDLoc,
5154 "invalid register class: vgpr tuples must be 64 bit aligned");
5155 return false;
5156 }
5157 if (!validateDS(Inst, Operands)) {
5158 return false;
5159 }
5160
5161 if (!validateBLGP(Inst, Operands)) {
5162 return false;
5163 }
5164
5165 if (!validateDivScale(Inst)) {
5166 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5167 return false;
5168 }
5169 if (!validateWaitCnt(Inst, Operands)) {
5170 return false;
5171 }
5172 if (!validateExeczVcczOperands(Operands)) {
5173 return false;
5174 }
5175 if (!validateTFE(Inst, Operands)) {
5176 return false;
5177 }
5178
5179 return true;
5180}
5181
5183 const FeatureBitset &FBS,
5184 unsigned VariantID = 0);
5185
5186static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5187 const FeatureBitset &AvailableFeatures,
5188 unsigned VariantID);
5189
5190bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5191 const FeatureBitset &FBS) {
5192 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5193}
5194
5195bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5196 const FeatureBitset &FBS,
5197 ArrayRef<unsigned> Variants) {
5198 for (auto Variant : Variants) {
5199 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5200 return true;
5201 }
5202
5203 return false;
5204}
5205
5206bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5207 const SMLoc &IDLoc) {
5208 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5209
5210 // Check if requested instruction variant is supported.
5211 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5212 return false;
5213
5214 // This instruction is not supported.
5215 // Clear any other pending errors because they are no longer relevant.
5216 getParser().clearPendingErrors();
5217
5218 // Requested instruction variant is not supported.
5219 // Check if any other variants are supported.
5220 StringRef VariantName = getMatchedVariantName();
5221 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5222 return Error(IDLoc,
5223 Twine(VariantName,
5224 " variant of this instruction is not supported"));
5225 }
5226
5227 // Check if this instruction may be used with a different wavesize.
5228 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5229 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5230
5231 FeatureBitset FeaturesWS32 = getFeatureBits();
5232 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5233 .flip(AMDGPU::FeatureWavefrontSize32);
5234 FeatureBitset AvailableFeaturesWS32 =
5235 ComputeAvailableFeatures(FeaturesWS32);
5236
5237 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5238 return Error(IDLoc, "instruction requires wavesize=32");
5239 }
5240
5241 // Finally check if this instruction is supported on any other GPU.
5242 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5243 return Error(IDLoc, "instruction not supported on this GPU");
5244 }
5245
5246 // Instruction not supported on any GPU. Probably a typo.
5247 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5248 return Error(IDLoc, "invalid instruction" + Suggestion);
5249}
5250
5252 uint64_t InvalidOprIdx) {
5253 assert(InvalidOprIdx < Operands.size());
5254 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5255 if (Op.isToken() && InvalidOprIdx > 1) {
5256 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5257 return PrevOp.isToken() && PrevOp.getToken() == "::";
5258 }
5259 return false;
5260}
5261
5262bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5264 MCStreamer &Out,
5266 bool MatchingInlineAsm) {
5267 MCInst Inst;
5268 unsigned Result = Match_Success;
5269 for (auto Variant : getMatchedVariants()) {
5270 uint64_t EI;
5271 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5272 Variant);
5273 // We order match statuses from least to most specific. We use most specific
5274 // status as resulting
5275 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5276 if (R == Match_Success || R == Match_MissingFeature ||
5277 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5278 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5279 Result != Match_MissingFeature)) {
5280 Result = R;
5281 ErrorInfo = EI;
5282 }
5283 if (R == Match_Success)
5284 break;
5285 }
5286
5287 if (Result == Match_Success) {
5288 if (!validateInstruction(Inst, IDLoc, Operands)) {
5289 return true;
5290 }
5291 Inst.setLoc(IDLoc);
5292 Out.emitInstruction(Inst, getSTI());
5293 return false;
5294 }
5295
5296 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5297 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5298 return true;
5299 }
5300
5301 switch (Result) {
5302 default: break;
5303 case Match_MissingFeature:
5304 // It has been verified that the specified instruction
5305 // mnemonic is valid. A match was found but it requires
5306 // features which are not supported on this GPU.
5307 return Error(IDLoc, "operands are not valid for this GPU or mode");
5308
5309 case Match_InvalidOperand: {
5310 SMLoc ErrorLoc = IDLoc;
5311 if (ErrorInfo != ~0ULL) {
5312 if (ErrorInfo >= Operands.size()) {
5313 return Error(IDLoc, "too few operands for instruction");
5314 }
5315 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5316 if (ErrorLoc == SMLoc())
5317 ErrorLoc = IDLoc;
5318
5320 return Error(ErrorLoc, "invalid VOPDY instruction");
5321 }
5322 return Error(ErrorLoc, "invalid operand for instruction");
5323 }
5324
5325 case Match_MnemonicFail:
5326 llvm_unreachable("Invalid instructions should have been handled already");
5327 }
5328 llvm_unreachable("Implement any new match types added!");
5329}
5330
5331bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5332 int64_t Tmp = -1;
5333 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5334 return true;
5335 }
5336 if (getParser().parseAbsoluteExpression(Tmp)) {
5337 return true;
5338 }
5339 Ret = static_cast<uint32_t>(Tmp);
5340 return false;
5341}
5342
5343bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5344 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5345 return TokError("directive only supported for amdgcn architecture");
5346
5347 std::string TargetIDDirective;
5348 SMLoc TargetStart = getTok().getLoc();
5349 if (getParser().parseEscapedString(TargetIDDirective))
5350 return true;
5351
5352 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5353 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5354 return getParser().Error(TargetRange.Start,
5355 (Twine(".amdgcn_target directive's target id ") +
5356 Twine(TargetIDDirective) +
5357 Twine(" does not match the specified target id ") +
5358 Twine(getTargetStreamer().getTargetID()->toString())).str());
5359
5360 return false;
5361}
5362
5363bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5364 return Error(Range.Start, "value out of range", Range);
5365}
5366
5367bool AMDGPUAsmParser::calculateGPRBlocks(
5368 const FeatureBitset &Features, const MCExpr *VCCUsed,
5369 const MCExpr *FlatScrUsed, bool XNACKUsed,
5370 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5371 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5372 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5373 // TODO(scott.linder): These calculations are duplicated from
5374 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5375 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5376 MCContext &Ctx = getContext();
5377
5378 const MCExpr *NumSGPRs = NextFreeSGPR;
5379 int64_t EvaluatedSGPRs;
5380
5381 if (Version.Major >= 10)
5383 else {
5384 unsigned MaxAddressableNumSGPRs =
5386
5387 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5388 !Features.test(FeatureSGPRInitBug) &&
5389 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5390 return OutOfRangeError(SGPRRange);
5391
5392 const MCExpr *ExtraSGPRs =
5393 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5394 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5395
5396 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5397 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5398 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5399 return OutOfRangeError(SGPRRange);
5400
5401 if (Features.test(FeatureSGPRInitBug))
5402 NumSGPRs =
5404 }
5405
5406 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5407 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5408 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5409 unsigned Granule) -> const MCExpr * {
5410 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5411 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5412 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5413 const MCExpr *AlignToGPR =
5414 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5415 const MCExpr *DivGPR =
5416 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5417 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5418 return SubGPR;
5419 };
5420
5421 VGPRBlocks = GetNumGPRBlocks(
5422 NextFreeVGPR,
5423 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5424 SGPRBlocks =
5425 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5426
5427 return false;
5428}
5429
5430bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5431 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5432 return TokError("directive only supported for amdgcn architecture");
5433
5434 if (!isHsaAbi(getSTI()))
5435 return TokError("directive only supported for amdhsa OS");
5436
5437 StringRef KernelName;
5438 if (getParser().parseIdentifier(KernelName))
5439 return true;
5440
5443 &getSTI(), getContext());
5444
5445 StringSet<> Seen;
5446
5447 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5448
5449 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5450 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5451
5452 SMRange VGPRRange;
5453 const MCExpr *NextFreeVGPR = ZeroExpr;
5454 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5455 uint64_t SharedVGPRCount = 0;
5456 uint64_t PreloadLength = 0;
5457 uint64_t PreloadOffset = 0;
5458 SMRange SGPRRange;
5459 const MCExpr *NextFreeSGPR = ZeroExpr;
5460
5461 // Count the number of user SGPRs implied from the enabled feature bits.
5462 unsigned ImpliedUserSGPRCount = 0;
5463
5464 // Track if the asm explicitly contains the directive for the user SGPR
5465 // count.
5466 std::optional<unsigned> ExplicitUserSGPRCount;
5467 const MCExpr *ReserveVCC = OneExpr;
5468 const MCExpr *ReserveFlatScr = OneExpr;
5469 std::optional<bool> EnableWavefrontSize32;
5470
5471 while (true) {
5472 while (trySkipToken(AsmToken::EndOfStatement));
5473
5474 StringRef ID;
5475 SMRange IDRange = getTok().getLocRange();
5476 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5477 return true;
5478
5479 if (ID == ".end_amdhsa_kernel")
5480 break;
5481
5482 if (!Seen.insert(ID).second)
5483 return TokError(".amdhsa_ directives cannot be repeated");
5484
5485 SMLoc ValStart = getLoc();
5486 const MCExpr *ExprVal;
5487 if (getParser().parseExpression(ExprVal))
5488 return true;
5489 SMLoc ValEnd = getLoc();
5490 SMRange ValRange = SMRange(ValStart, ValEnd);
5491
5492 int64_t IVal = 0;
5493 uint64_t Val = IVal;
5494 bool EvaluatableExpr;
5495 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5496 if (IVal < 0)
5497 return OutOfRangeError(ValRange);
5498 Val = IVal;
5499 }
5500
5501#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5502 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5503 return OutOfRangeError(RANGE); \
5504 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5505 getContext());
5506
5507// Some fields use the parsed value immediately which requires the expression to
5508// be solvable.
5509#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5510 if (!(RESOLVED)) \
5511 return Error(IDRange.Start, "directive should have resolvable expression", \
5512 IDRange);
5513
5514 if (ID == ".amdhsa_group_segment_fixed_size") {
5516 CHAR_BIT>(Val))
5517 return OutOfRangeError(ValRange);
5518 KD.group_segment_fixed_size = ExprVal;
5519 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5521 CHAR_BIT>(Val))
5522 return OutOfRangeError(ValRange);
5523 KD.private_segment_fixed_size = ExprVal;
5524 } else if (ID == ".amdhsa_kernarg_size") {
5525 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
5526 return OutOfRangeError(ValRange);
5527 KD.kernarg_size = ExprVal;
5528 } else if (ID == ".amdhsa_user_sgpr_count") {
5529 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5530 ExplicitUserSGPRCount = Val;
5531 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5532 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5534 return Error(IDRange.Start,
5535 "directive is not supported with architected flat scratch",
5536 IDRange);
5538 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5539 ExprVal, ValRange);
5540 if (Val)
5541 ImpliedUserSGPRCount += 4;
5542 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5543 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5544 if (!hasKernargPreload())
5545 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5546
5547 if (Val > getMaxNumUserSGPRs())
5548 return OutOfRangeError(ValRange);
5549 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
5550 ValRange);
5551 if (Val) {
5552 ImpliedUserSGPRCount += Val;
5553 PreloadLength = Val;
5554 }
5555 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5556 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5557 if (!hasKernargPreload())
5558 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5559
5560 if (Val >= 1024)
5561 return OutOfRangeError(ValRange);
5562 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
5563 ValRange);
5564 if (Val)
5565 PreloadOffset = Val;
5566 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5567 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5569 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5570 ValRange);
5571 if (Val)
5572 ImpliedUserSGPRCount += 2;
5573 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5574 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5576 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5577 ValRange);
5578 if (Val)
5579 ImpliedUserSGPRCount += 2;
5580 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5581 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5583 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5584 ExprVal, ValRange);
5585 if (Val)
5586 ImpliedUserSGPRCount += 2;
5587 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5588 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5590 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5591 ValRange);
5592 if (Val)
5593 ImpliedUserSGPRCount += 2;
5594 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5596 return Error(IDRange.Start,
5597 "directive is not supported with architected flat scratch",
5598 IDRange);
5599 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5601 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5602 ExprVal, ValRange);
5603 if (Val)
5604 ImpliedUserSGPRCount += 2;
5605 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5606 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5608 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5609 ExprVal, ValRange);
5610 if (Val)
5611 ImpliedUserSGPRCount += 1;
5612 } else if (ID == ".amdhsa_wavefront_size32") {
5613 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5614 if (IVersion.Major < 10)
5615 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5616 EnableWavefrontSize32 = Val;
5618 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5619 ValRange);
5620 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5622 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5623 ValRange);
5624 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5626 return Error(IDRange.Start,
5627 "directive is not supported with architected flat scratch",
5628 IDRange);
5630 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5631 ValRange);
5632 } else if (ID == ".amdhsa_enable_private_segment") {
5634 return Error(
5635 IDRange.Start,
5636 "directive is not supported without architected flat scratch",
5637 IDRange);
5639 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5640 ValRange);
5641 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5643 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5644 ValRange);
5645 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5647 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5648 ValRange);
5649 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5651 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5652 ValRange);
5653 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5655 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5656 ValRange);
5657 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5659 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5660 ValRange);
5661 } else if (ID == ".amdhsa_next_free_vgpr") {
5662 VGPRRange = ValRange;
5663 NextFreeVGPR = ExprVal;
5664 } else if (ID == ".amdhsa_next_free_sgpr") {
5665 SGPRRange = ValRange;
5666 NextFreeSGPR = ExprVal;
5667 } else if (ID == ".amdhsa_accum_offset") {
5668 if (!isGFX90A())
5669 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5670 AccumOffset = ExprVal;
5671 } else if (ID == ".amdhsa_reserve_vcc") {
5672 if (EvaluatableExpr && !isUInt<1>(Val))
5673 return OutOfRangeError(ValRange);
5674 ReserveVCC = ExprVal;
5675 } else if (ID == ".amdhsa_reserve_flat_scratch") {
5676 if (IVersion.Major < 7)
5677 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5679 return Error(IDRange.Start,
5680 "directive is not supported with architected flat scratch",
5681 IDRange);
5682 if (EvaluatableExpr && !isUInt<1>(Val))
5683 return OutOfRangeError(ValRange);
5684 ReserveFlatScr = ExprVal;
5685 } else if (ID == ".amdhsa_reserve_xnack_mask") {
5686 if (IVersion.Major < 8)
5687 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5688 if (!isUInt<1>(Val))
5689 return OutOfRangeError(ValRange);
5690 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5691 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5692 IDRange);
5693 } else if (ID == ".amdhsa_float_round_mode_32") {
5695 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5696 ValRange);
5697 } else if (ID == ".amdhsa_float_round_mode_16_64") {
5699 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5700 ValRange);
5701 } else if (ID == ".amdhsa_float_denorm_mode_32") {
5703 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5704 ValRange);
5705 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5707 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5708 ValRange);
5709 } else if (ID == ".amdhsa_dx10_clamp") {
5710 if (IVersion.Major >= 12)
5711 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5713 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5714 ValRange);
5715 } else if (ID == ".amdhsa_ieee_mode") {
5716 if (IVersion.Major >= 12)
5717 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5719 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5720 ValRange);
5721 } else if (ID == ".amdhsa_fp16_overflow") {
5722 if (IVersion.Major < 9)
5723 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5725 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5726 ValRange);
5727 } else if (ID == ".amdhsa_tg_split") {
5728 if (!isGFX90A())
5729 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5730 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
5731 ExprVal, ValRange);
5732 } else if (ID == ".amdhsa_workgroup_processor_mode") {
5733 if (IVersion.Major < 10)
5734 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5736 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5737 ValRange);
5738 } else if (ID == ".amdhsa_memory_ordered") {
5739 if (IVersion.Major < 10)
5740 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5742 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5743 ValRange);
5744 } else if (ID == ".amdhsa_forward_progress") {
5745 if (IVersion.Major < 10)
5746 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5748 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5749 ValRange);
5750 } else if (ID == ".amdhsa_shared_vgpr_count") {
5751 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5752 if (IVersion.Major < 10 || IVersion.Major >= 12)
5753 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
5754 IDRange);
5755 SharedVGPRCount = Val;
5757 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5758 ValRange);
5759 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5762 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5763 ExprVal, ValRange);
5764 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5766 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5767 ExprVal, ValRange);
5768 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5771 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5772 ExprVal, ValRange);
5773 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5775 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5776 ExprVal, ValRange);
5777 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5779 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5780 ExprVal, ValRange);
5781 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5783 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5784 ExprVal, ValRange);
5785 } else if (ID == ".amdhsa_exception_int_div_zero") {
5787 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5788 ExprVal, ValRange);
5789 } else if (ID == ".amdhsa_round_robin_scheduling") {
5790 if (IVersion.Major < 12)
5791 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
5793 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5794 ValRange);
5795 } else {
5796 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5797 }
5798
5799#undef PARSE_BITS_ENTRY
5800 }
5801
5802 if (!Seen.contains(".amdhsa_next_free_vgpr"))
5803 return TokError(".amdhsa_next_free_vgpr directive is required");
5804
5805 if (!Seen.contains(".amdhsa_next_free_sgpr"))
5806 return TokError(".amdhsa_next_free_sgpr directive is required");
5807
5808 const MCExpr *VGPRBlocks;
5809 const MCExpr *SGPRBlocks;
5810 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5811 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5812 EnableWavefrontSize32, NextFreeVGPR,
5813 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5814 SGPRBlocks))
5815 return true;
5816
5817 int64_t EvaluatedVGPRBlocks;
5818 bool VGPRBlocksEvaluatable =
5819 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
5820 if (VGPRBlocksEvaluatable &&
5821 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5822 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
5823 return OutOfRangeError(VGPRRange);
5824 }
5826 KD.compute_pgm_rsrc1, VGPRBlocks,
5827 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5828 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
5829
5830 int64_t EvaluatedSGPRBlocks;
5831 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
5832 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5833 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
5834 return OutOfRangeError(SGPRRange);
5836 KD.compute_pgm_rsrc1, SGPRBlocks,
5837 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5838 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
5839
5840 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5841 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5842 "enabled user SGPRs");
5843
5844 unsigned UserSGPRCount =
5845 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5846
5847 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5848 return TokError("too many user SGPRs enabled");
5850 KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
5851 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5852 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
5853
5854 int64_t IVal = 0;
5855 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
5856 return TokError("Kernarg size should be resolvable");
5857 uint64_t kernarg_size = IVal;
5858 if (PreloadLength && kernarg_size &&
5859 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5860 return TokError("Kernarg preload length + offset is larger than the "
5861 "kernarg segment size");
5862
5863 if (isGFX90A()) {
5864 if (!Seen.contains(".amdhsa_accum_offset"))
5865 return TokError(".amdhsa_accum_offset directive is required");
5866 int64_t EvaluatedAccum;
5867 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
5868 uint64_t UEvaluatedAccum = EvaluatedAccum;
5869 if (AccumEvaluatable &&
5870 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
5871 return TokError("accum_offset should be in range [4..256] in "
5872 "increments of 4");
5873
5874 int64_t EvaluatedNumVGPR;
5875 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
5876 AccumEvaluatable &&
5877 UEvaluatedAccum >
5878 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
5879 return TokError("accum_offset exceeds total VGPR allocation");
5880 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
5882 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
5883 MCConstantExpr::create(1, getContext()), getContext());
5885 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5886 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5887 getContext());
5888 }
5889
5890 if (IVersion.Major >= 10 && IVersion.Major < 12) {
5891 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5892 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5893 return TokError("shared_vgpr_count directive not valid on "
5894 "wavefront size 32");
5895 }
5896
5897 if (VGPRBlocksEvaluatable &&
5898 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
5899 63)) {
5900 return TokError("shared_vgpr_count*2 + "
5901 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5902 "exceed 63\n");
5903 }
5904 }
5905
5906 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5907 NextFreeVGPR, NextFreeSGPR,
5908 ReserveVCC, ReserveFlatScr);
5909 return false;
5910}
5911
5912bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5914 if (ParseAsAbsoluteExpression(Version))
5915 return true;
5916
5917 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5918 return false;
5919}
5920
5921bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5923 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5924 // assembly for backwards compatibility.
5925 if (ID == "max_scratch_backing_memory_byte_size") {
5926 Parser.eatToEndOfStatement();
5927 return false;
5928 }
5929
5930 SmallString<40> ErrStr;
5931 raw_svector_ostream Err(ErrStr);
5932 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
5933 return TokError(Err.str());
5934 }
5935 Lex();
5936
5937 if (ID == "enable_wavefront_size32") {
5938 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5939 if (!isGFX10Plus())
5940 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5941 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5942 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5943 } else {
5944 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5945 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5946 }
5947 }
5948
5949 if (ID == "wavefront_size") {
5950 if (C.wavefront_size == 5) {
5951 if (!isGFX10Plus())
5952 return TokError("wavefront_size=5 is only allowed on GFX10+");
5953 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5954 return TokError("wavefront_size=5 requires +WavefrontSize32");
5955 } else if (C.wavefront_size == 6) {
5956 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5957 return TokError("wavefront_size=6 requires +WavefrontSize64");
5958 }
5959 }
5960
5961 return false;
5962}
5963
5964bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5965 AMDGPUMCKernelCodeT KernelCode;
5966 KernelCode.initDefault(&getSTI(), getContext());
5967
5968 while (true) {
5969 // Lex EndOfStatement. This is in a while loop, because lexing a comment
5970 // will set the current token to EndOfStatement.
5971 while(trySkipToken(AsmToken::EndOfStatement));
5972
5973 StringRef ID;
5974 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5975 return true;
5976
5977 if (ID == ".end_amd_kernel_code_t")
5978 break;
5979
5980 if (ParseAMDKernelCodeTValue(ID, KernelCode))
5981 return true;
5982 }
5983
5984 KernelCode.validate(&getSTI(), getContext());
5985 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
5986
5987 return false;
5988}
5989
5990bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5991 StringRef KernelName;
5992 if (!parseId(KernelName, "expected symbol name"))
5993 return true;
5994
5995 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5997
5998 KernelScope.initialize(getContext());
5999 return false;
6000}
6001
6002bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6003 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
6004 return Error(getLoc(),
6005 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6006 "architectures");
6007 }
6008
6009 auto TargetIDDirective = getLexer().getTok().getStringContents();
6010 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6011 return Error(getParser().getTok().getLoc(), "target id must match options");
6012
6013 getTargetStreamer().EmitISAVersion();
6014 Lex();
6015
6016 return false;
6017}
6018
6019bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6020 assert(isHsaAbi(getSTI()));
6021
6022 std::string HSAMetadataString;
6023 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6024 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6025 return true;
6026
6027 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6028 return Error(getLoc(), "invalid HSA metadata");
6029
6030 return false;
6031}
6032
6033/// Common code to parse out a block of text (typically YAML) between start and
6034/// end directives.
6035bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6036 const char *AssemblerDirectiveEnd,
6037 std::string &CollectString) {
6038
6039 raw_string_ostream CollectStream(CollectString);
6040
6041 getLexer().setSkipSpace(false);
6042
6043 bool FoundEnd = false;
6044 while (!isToken(AsmToken::Eof)) {
6045 while (isToken(AsmToken::Space)) {
6046 CollectStream << getTokenStr();
6047 Lex();
6048 }
6049
6050 if (trySkipId(AssemblerDirectiveEnd)) {
6051 FoundEnd = true;
6052 break;
6053 }
6054
6055 CollectStream << Parser.parseStringToEndOfStatement()
6056 << getContext().getAsmInfo()->getSeparatorString();
6057
6058 Parser.eatToEndOfStatement();
6059 }
6060
6061 getLexer().setSkipSpace(true);
6062
6063 if (isToken(AsmToken::Eof) && !FoundEnd) {
6064 return TokError(Twine("expected directive ") +
6065 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6066 }
6067
6068 CollectStream.flush();
6069 return false;
6070}
6071
6072/// Parse the assembler directive for new MsgPack-format PAL metadata.
6073bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6074 std::string String;
6075 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6077 return true;
6078
6079 auto PALMetadata = getTargetStreamer().getPALMetadata();
6080 if (!PALMetadata->setFromString(String))
6081 return Error(getLoc(), "invalid PAL metadata");
6082 return false;
6083}
6084
6085/// Parse the assembler directive for old linear-format PAL metadata.
6086bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6087 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6088 return Error(getLoc(),
6089 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6090 "not available on non-amdpal OSes")).str());
6091 }
6092
6093 auto PALMetadata = getTargetStreamer().getPALMetadata();
6094 PALMetadata->setLegacy();
6095 for (;;) {
6097 if (ParseAsAbsoluteExpression(Key)) {
6098 return TokError(Twine("invalid value in ") +
6100 }
6101 if (!trySkipToken(AsmToken::Comma)) {
6102 return TokError(Twine("expected an even number of values in ") +
6104 }
6105 if (ParseAsAbsoluteExpression(Value)) {
6106 return TokError(Twine("invalid value in ") +
6108 }
6109 PALMetadata->setRegister(Key, Value);
6110 if (!trySkipToken(AsmToken::Comma))
6111 break;
6112 }
6113 return false;
6114}
6115
6116/// ParseDirectiveAMDGPULDS
6117/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6118bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6119 if (getParser().checkForValidSection())
6120 return true;
6121
6123 SMLoc NameLoc = getLoc();
6124 if (getParser().parseIdentifier(Name))
6125 return TokError("expected identifier in directive");
6126
6127 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6128 if (getParser().parseComma())
6129 return true;
6130
6131 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6132
6133 int64_t Size;
6134 SMLoc SizeLoc = getLoc();
6135 if (getParser().parseAbsoluteExpression(Size))
6136 return true;
6137 if (Size < 0)
6138 return Error(SizeLoc, "size must be non-negative");
6139 if (Size > LocalMemorySize)
6140 return Error(SizeLoc, "size is too large");
6141
6142 int64_t Alignment = 4;
6143 if (trySkipToken(AsmToken::Comma)) {
6144 SMLoc AlignLoc = getLoc();
6145 if (getParser().parseAbsoluteExpression(Alignment))
6146 return true;
6147 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6148 return Error(AlignLoc, "alignment must be a power of two");
6149
6150 // Alignment larger than the size of LDS is possible in theory, as long
6151 // as the linker manages to place to symbol at address 0, but we do want
6152 // to make sure the alignment fits nicely into a 32-bit integer.
6153 if (Alignment >= 1u << 31)
6154 return Error(AlignLoc, "alignment is too large");
6155 }
6156
6157 if (parseEOL())
6158 return true;
6159
6160 Symbol->redefineIfPossible();
6161 if (!Symbol->isUndefined())
6162 return Error(NameLoc, "invalid symbol redefinition");
6163
6164 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6165 return false;
6166}
6167
6168bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6169 StringRef IDVal = DirectiveID.getString();
6170
6171 if (isHsaAbi(getSTI())) {
6172 if (IDVal == ".amdhsa_kernel")
6173 return ParseDirectiveAMDHSAKernel();
6174
6175 if (IDVal == ".amdhsa_code_object_version")
6176 return ParseDirectiveAMDHSACodeObjectVersion();
6177
6178 // TODO: Restructure/combine with PAL metadata directive.
6180 return ParseDirectiveHSAMetadata();
6181 } else {
6182 if (IDVal == ".amd_kernel_code_t")
6183 return ParseDirectiveAMDKernelCodeT();
6184
6185 if (IDVal == ".amdgpu_hsa_kernel")
6186 return ParseDirectiveAMDGPUHsaKernel();
6187
6188 if (IDVal == ".amd_amdgpu_isa")
6189 return ParseDirectiveISAVersion();
6190
6192 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6193 Twine(" directive is "
6194 "not available on non-amdhsa OSes"))
6195 .str());
6196 }
6197 }
6198
6199 if (IDVal == ".amdgcn_target")
6200 return ParseDirectiveAMDGCNTarget();
6201
6202 if (IDVal == ".amdgpu_lds")
6203 return ParseDirectiveAMDGPULDS();
6204
6205 if (IDVal == PALMD::AssemblerDirectiveBegin)
6206 return ParseDirectivePALMetadataBegin();
6207
6208 if (IDVal == PALMD::AssemblerDirective)
6209 return ParseDirectivePALMetadata();
6210
6211 return true;
6212}
6213
6214bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6215 unsigned RegNo) {
6216
6217 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
6218 return isGFX9Plus();
6219
6220 // GFX10+ has 2 more SGPRs 104 and 105.
6221 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
6222 return hasSGPR104_SGPR105();
6223
6224 switch (RegNo) {
6225 case AMDGPU::SRC_SHARED_BASE_LO:
6226 case AMDGPU::SRC_SHARED_BASE:
6227 case AMDGPU::SRC_SHARED_LIMIT_LO:
6228 case AMDGPU::SRC_SHARED_LIMIT:
6229 case AMDGPU::SRC_PRIVATE_BASE_LO:
6230 case AMDGPU::SRC_PRIVATE_BASE:
6231 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
6232 case AMDGPU::SRC_PRIVATE_LIMIT:
6233 return isGFX9Plus();
6234 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
6235 return isGFX9Plus() && !isGFX11Plus();
6236 case AMDGPU::TBA:
6237 case AMDGPU::TBA_LO:
6238 case AMDGPU::TBA_HI:
6239 case AMDGPU::TMA:
6240 case AMDGPU::TMA_LO:
6241 case AMDGPU::TMA_HI:
6242 return !isGFX9Plus();
6243 case AMDGPU::XNACK_MASK:
6244 case AMDGPU::XNACK_MASK_LO:
6245 case AMDGPU::XNACK_MASK_HI:
6246 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6247 case AMDGPU::SGPR_NULL:
6248 return isGFX10Plus();
6249 default:
6250 break;
6251 }
6252
6253 if (isCI())
6254 return true;
6255
6256 if (isSI() || isGFX10Plus()) {
6257 // No flat_scr on SI.
6258 // On GFX10Plus flat scratch is not a valid register operand and can only be
6259 // accessed with s_setreg/s_getreg.
6260 switch (RegNo) {
6261 case AMDGPU::FLAT_SCR:
6262 case AMDGPU::FLAT_SCR_LO:
6263 case AMDGPU::FLAT_SCR_HI:
6264 return false;
6265 default:
6266 return true;
6267 }
6268 }
6269
6270 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6271 // SI/CI have.
6272 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
6273 return hasSGPR102_SGPR103();
6274
6275 return true;
6276}
6277
6278ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6279 StringRef Mnemonic,
6280 OperandMode Mode) {
6281 ParseStatus Res = parseVOPD(Operands);
6282 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6283 return Res;
6284
6285 // Try to parse with a custom parser
6286 Res = MatchOperandParserImpl(Operands, Mnemonic);
6287
6288 // If we successfully parsed the operand or if there as an error parsing,
6289 // we are done.
6290 //
6291 // If we are parsing after we reach EndOfStatement then this means we
6292 // are appending default values to the Operands list. This is only done
6293 // by custom parser, so we shouldn't continue on to the generic parsing.
6294 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6295 return Res;
6296
6297 SMLoc RBraceLoc;
6298 SMLoc LBraceLoc = getLoc();
6299 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6300 unsigned Prefix = Operands.size();
6301
6302 for (;;) {
6303 auto Loc = getLoc();
6304 Res = parseReg(Operands);
6305 if (Res.isNoMatch())
6306 Error(Loc, "expected a register");
6307 if (!Res.isSuccess())
6308 return ParseStatus::Failure;
6309
6310 RBraceLoc = getLoc();
6311 if (trySkipToken(AsmToken::RBrac))
6312 break;
6313
6314 if (!skipToken(AsmToken::Comma,
6315 "expected a comma or a closing square bracket"))
6316 return ParseStatus::Failure;
6317 }
6318
6319 if (Operands.size() - Prefix > 1) {
6320 Operands.insert(Operands.begin() + Prefix,
6321 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6322 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6323 }
6324
6325 return ParseStatus::Success;
6326 }
6327
6328 return parseRegOrImm(Operands);
6329}
6330
6331StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6332 // Clear any forced encodings from the previous instruction.
6333 setForcedEncodingSize(0);
6334 setForcedDPP(false);
6335 setForcedSDWA(false);
6336
6337 if (Name.ends_with("_e64_dpp")) {
6338 setForcedDPP(true);
6339 setForcedEncodingSize(64);
6340 return Name.substr(0, Name.size() - 8);
6341 } else if (Name.ends_with("_e64")) {
6342 setForcedEncodingSize(64);
6343 return Name.substr(0, Name.size() - 4);
6344 } else if (Name.ends_with("_e32")) {
6345 setForcedEncodingSize(32);
6346 return Name.substr(0, Name.size() - 4);
6347 } else if (Name.ends_with("_dpp")) {
6348 setForcedDPP(true);
6349 return Name.substr(0, Name.size() - 4);
6350 } else if (Name.ends_with("_sdwa")) {
6351 setForcedSDWA(true);
6352 return Name.substr(0, Name.size() - 5);
6353 }
6354 return Name;
6355}
6356
6357static void applyMnemonicAliases(StringRef &Mnemonic,
6358 const FeatureBitset &Features,
6359 unsigned VariantID);
6360
6361bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
6363 SMLoc NameLoc, OperandVector &Operands) {
6364 // Add the instruction mnemonic
6365 Name = parseMnemonicSuffix(Name);
6366
6367 // If the target architecture uses MnemonicAlias, call it here to parse
6368 // operands correctly.
6369 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6370
6371 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6372
6373 bool IsMIMG = Name.starts_with("image_");
6374
6375 while (!trySkipToken(AsmToken::EndOfStatement)) {
6376 OperandMode Mode = OperandMode_Default;
6377 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6378 Mode = OperandMode_NSA;
6379 ParseStatus Res = parseOperand(Operands, Name, Mode);
6380
6381 if (!Res.isSuccess()) {
6382 checkUnsupportedInstruction(Name, NameLoc);
6383 if (!Parser.hasPendingError()) {
6384 // FIXME: use real operand location rather than the current location.
6385 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6386 : "not a valid operand.";
6387 Error(getLoc(), Msg);
6388 }
6389 while (!trySkipToken(AsmToken::EndOfStatement)) {
6390 lex();
6391 }
6392 return true;
6393 }
6394
6395 // Eat the comma or space if there is one.
6396 trySkipToken(AsmToken::Comma);
6397 }
6398
6399 return false;
6400}
6401
6402//===----------------------------------------------------------------------===//
6403// Utility functions
6404//===----------------------------------------------------------------------===//
6405
6406ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6408 SMLoc S = getLoc();
6409 if (!trySkipId(Name))
6410 return ParseStatus::NoMatch;
6411
6412 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6413 return ParseStatus::Success;
6414}
6415
6416ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6417 int64_t &IntVal) {
6418
6419 if (!trySkipId(Prefix, AsmToken::Colon))
6420 return ParseStatus::NoMatch;
6421
6423}
6424
6425ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6426 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6427 std::function<bool(int64_t &)> ConvertResult) {
6428 SMLoc S = getLoc();
6429 int64_t Value = 0;
6430
6431 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6432 if (!Res.isSuccess())
6433 return Res;
6434
6435 if (ConvertResult && !ConvertResult(Value)) {
6436 Error(S, "invalid " + StringRef(Prefix) + " value.");
6437 }
6438
6439 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6440 return ParseStatus::Success;
6441}
6442
6443ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6444 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6445 bool (*ConvertResult)(int64_t &)) {
6446 SMLoc S = getLoc();
6447 if (!trySkipId(Prefix, AsmToken::Colon))
6448 return ParseStatus::NoMatch;
6449
6450 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6451 return ParseStatus::Failure;
6452
6453 unsigned Val = 0;
6454 const unsigned MaxSize = 4;
6455
6456 // FIXME: How to verify the number of elements matches the number of src
6457 // operands?
6458 for (int I = 0; ; ++I) {
6459 int64_t Op;
6460 SMLoc Loc = getLoc();
6461 if (!parseExpr(Op))
6462 return ParseStatus::Failure;
6463
6464 if (Op != 0 && Op != 1)
6465 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6466
6467 Val |= (Op << I);
6468
6469 if (trySkipToken(AsmToken::RBrac))
6470 break;
6471
6472 if (I + 1 == MaxSize)
6473 return Error(getLoc(), "expected a closing square bracket");
6474
6475 if (!skipToken(AsmToken::Comma, "expected a comma"))
6476 return ParseStatus::Failure;
6477 }
6478
6479 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6480 return ParseStatus::Success;
6481}
6482
6483ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6485 AMDGPUOperand::ImmTy ImmTy) {
6486 int64_t Bit;
6487 SMLoc S = getLoc();
6488
6489 if (trySkipId(Name)) {
6490 Bit = 1;
6491 } else if (trySkipId("no", Name)) {
6492 Bit = 0;
6493 } else {
6494 return ParseStatus::NoMatch;
6495 }
6496
6497 if (Name == "r128" && !hasMIMG_R128())
6498 return Error(S, "r128 modifier is not supported on this GPU");
6499 if (Name == "a16" && !hasA16())
6500 return Error(S, "a16 modifier is not supported on this GPU");
6501
6502 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6503 ImmTy = AMDGPUOperand::ImmTyR128A16;
6504
6505 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6506 return ParseStatus::Success;
6507}
6508
6509unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6510 bool &Disabling) const {
6511 Disabling = Id.consume_front("no");
6512
6513 if (isGFX940() && !Mnemo.starts_with("s_")) {
6514 return StringSwitch<unsigned>(Id)
6515 .Case("nt", AMDGPU::CPol::NT)
6516 .Case("sc0", AMDGPU::CPol::SC0)
6517 .Case("sc1", AMDGPU::CPol::SC1)
6518 .Default(0);
6519 }
6520
6521 return StringSwitch<unsigned>(Id)
6522 .Case("dlc", AMDGPU::CPol::DLC)
6523 .Case("glc", AMDGPU::CPol::GLC)
6524 .Case("scc", AMDGPU::CPol::SCC)
6525 .Case("slc", AMDGPU::CPol::SLC)
6526 .Default(0);
6527}
6528
6529ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6530 if (isGFX12Plus()) {
6531 SMLoc StringLoc = getLoc();
6532
6533 int64_t CPolVal = 0;
6536
6537 for (;;) {
6538 if (ResTH.isNoMatch()) {
6539 int64_t TH;
6540 ResTH = parseTH(Operands, TH);
6541 if (ResTH.isFailure())
6542 return ResTH;
6543 if (ResTH.isSuccess()) {
6544 CPolVal |= TH;
6545 continue;
6546 }
6547 }
6548
6549 if (ResScope.isNoMatch()) {
6550 int64_t Scope;
6551 ResScope = parseScope(Operands, Scope);
6552 if (ResScope.isFailure())
6553 return ResScope;
6554 if (ResScope.isSuccess()) {
6555 CPolVal |= Scope;
6556 continue;
6557 }
6558 }
6559
6560 break;
6561 }
6562
6563 if (ResTH.isNoMatch() && ResScope.isNoMatch())
6564 return ParseStatus::NoMatch;
6565
6566 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6567 AMDGPUOperand::ImmTyCPol));
6568 return ParseStatus::Success;
6569 }
6570
6571 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6572 SMLoc OpLoc = getLoc();
6573 unsigned Enabled = 0, Seen = 0;
6574 for (;;) {
6575 SMLoc S = getLoc();
6576 bool Disabling;
6577 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6578 if (!CPol)
6579 break;
6580
6581 lex();
6582
6583 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6584 return Error(S, "dlc modifier is not supported on this GPU");
6585
6586 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6587 return Error(S, "scc modifier is not supported on this GPU");
6588
6589 if (Seen & CPol)
6590 return Error(S, "duplicate cache policy modifier");
6591
6592 if (!Disabling)
6593 Enabled |= CPol;
6594
6595 Seen |= CPol;
6596 }
6597
6598 if (!Seen)
6599 return ParseStatus::NoMatch;
6600
6601 Operands.push_back(
6602 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6603 return ParseStatus::Success;
6604}
6605
6606ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6607 int64_t &Scope) {
6608 Scope = AMDGPU::CPol::SCOPE_CU; // default;
6609
6611 SMLoc StringLoc;
6612 ParseStatus Res;
6613
6614 Res = parseStringWithPrefix("scope", Value, StringLoc);
6615 if (!Res.isSuccess())
6616 return Res;
6617
6619 .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU)
6620 .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE)
6621 .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV)
6622 .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS)
6623 .Default(0xffffffff);
6624
6625 if (Scope == 0xffffffff)
6626 return Error(StringLoc, "invalid scope value");
6627
6628 return ParseStatus::Success;
6629}
6630
6631ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6632 TH = AMDGPU::CPol::TH_RT; // default
6633
6635 SMLoc StringLoc;
6636 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
6637 if (!Res.isSuccess())
6638 return Res;
6639
6640 if (Value == "TH_DEFAULT")
6642 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
6643 Value == "TH_LOAD_NT_WB") {
6644 return Error(StringLoc, "invalid th value");
6645 } else if (Value.consume_front("TH_ATOMIC_")) {
6647 } else if (Value.consume_front("TH_LOAD_")) {
6649 } else if (Value.consume_front("TH_STORE_")) {
6651 } else {
6652 return Error(StringLoc, "invalid th value");
6653 }
6654
6655 if (Value == "BYPASS")
6657
6658 if (TH != 0) {
6665 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
6668 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
6670 .Default(0xffffffff);
6671 else
6677 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
6678 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
6679 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
6680 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
6681 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
6682 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
6683 .Default(0xffffffff);
6684 }
6685
6686 if (TH == 0xffffffff)
6687 return Error(StringLoc, "invalid th value");
6688
6689 return ParseStatus::Success;
6690}
6691
6693 MCInst& Inst, const OperandVector& Operands,
6694 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6695 AMDGPUOperand::ImmTy ImmT,
6696 int64_t Default = 0) {
6697 auto i = OptionalIdx.find(ImmT);
6698 if (i != OptionalIdx.end()) {
6699 unsigned Idx = i->second;
6700 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6701 } else {
6703 }
6704}
6705
6706ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6708 SMLoc &StringLoc) {
6709 if (!trySkipId(Prefix, AsmToken::Colon))
6710 return ParseStatus::NoMatch;
6711
6712 StringLoc = getLoc();
6713 return parseId(Value, "expected an identifier") ? ParseStatus::Success
6715}
6716
6717//===----------------------------------------------------------------------===//
6718// MTBUF format
6719//===----------------------------------------------------------------------===//
6720
6721bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6722 int64_t MaxVal,
6723 int64_t &Fmt) {
6724 int64_t Val;
6725 SMLoc Loc = getLoc();
6726
6727 auto Res = parseIntWithPrefix(Pref, Val);
6728 if (Res.isFailure())
6729 return false;
6730 if (Res.isNoMatch())
6731 return true;
6732
6733 if (Val < 0 || Val > MaxVal) {
6734 Error(Loc, Twine("out of range ", StringRef(Pref)));
6735 return false;
6736 }
6737
6738 Fmt = Val;
6739 return true;
6740}
6741
6742ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
6743 AMDGPUOperand::ImmTy ImmTy) {
6744 const char *Pref = "index_key";
6745 int64_t ImmVal = 0;
6746 SMLoc Loc = getLoc();
6747 auto Res = parseIntWithPrefix(Pref, ImmVal);
6748 if (!Res.isSuccess())
6749 return Res;
6750
6751 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6752 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6753
6754 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6755 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6756
6757 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
6758 return ParseStatus::Success;
6759}
6760
6761ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
6762 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6763}
6764
6765ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
6766 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6767}
6768
6769// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6770// values to live in a joint format operand in the MCInst encoding.
6771ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6772 using namespace llvm::AMDGPU::MTBUFFormat;
6773
6774 int64_t Dfmt = DFMT_UNDEF;
6775 int64_t Nfmt = NFMT_UNDEF;
6776
6777 // dfmt and nfmt can appear in either order, and each is optional.
6778 for (int I = 0; I < 2; ++I) {
6779 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6780 return ParseStatus::Failure;
6781
6782 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6783 return ParseStatus::Failure;
6784
6785 // Skip optional comma between dfmt/nfmt
6786 // but guard against 2 commas following each other.
6787 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6788 !peekToken().is(AsmToken::Comma)) {
6789 trySkipToken(AsmToken::Comma);
6790 }
6791 }
6792
6793 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6794 return ParseStatus::NoMatch;
6795
6796 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6797 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6798
6799 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6800 return ParseStatus::Success;
6801}
6802
6803ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6804 using namespace llvm::AMDGPU::MTBUFFormat;
6805
6806 int64_t Fmt = UFMT_UNDEF;
6807
6808 if (!tryParseFmt("format", UFMT_MAX, Fmt))
6809 return ParseStatus::Failure;
6810
6811 if (Fmt == UFMT_UNDEF)
6812 return ParseStatus::NoMatch;
6813
6814 Format = Fmt;
6815 return ParseStatus::Success;
6816}
6817
6818bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6819 int64_t &Nfmt,
6820 StringRef FormatStr,
6821 SMLoc Loc) {
6822 using namespace llvm::AMDGPU::MTBUFFormat;
6823 int64_t Format;
6824
6825 Format = getDfmt(FormatStr);
6826 if (Format != DFMT_UNDEF) {
6827 Dfmt = Format;
6828 return true;
6829 }
6830
6831 Format = getNfmt(FormatStr, getSTI());
6832 if (Format != NFMT_UNDEF) {
6833 Nfmt = Format;
6834 return true;
6835 }
6836
6837 Error(Loc, "unsupported format");
6838 return false;
6839}
6840
6841ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6842 SMLoc FormatLoc,
6843 int64_t &Format) {
6844 using namespace llvm::AMDGPU::MTBUFFormat;
6845
6846 int64_t Dfmt = DFMT_UNDEF;
6847 int64_t Nfmt = NFMT_UNDEF;
6848 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6849 return ParseStatus::Failure;
6850
6851 if (trySkipToken(AsmToken::Comma)) {
6852 StringRef Str;
6853 SMLoc Loc = getLoc();
6854 if (!parseId(Str, "expected a format string") ||
6855 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6856 return ParseStatus::Failure;
6857 if (Dfmt == DFMT_UNDEF)
6858 return Error(Loc, "duplicate numeric format");
6859 if (Nfmt == NFMT_UNDEF)
6860 return Error(Loc, "duplicate data format");
6861 }
6862
6863 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6864 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6865
6866 if (isGFX10Plus()) {
6867 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6868 if (Ufmt == UFMT_UNDEF)
6869 return Error(FormatLoc, "unsupported format");
6870 Format = Ufmt;
6871 } else {
6872 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6873 }
6874
6875 return ParseStatus::Success;
6876}
6877
6878ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6879 SMLoc Loc,
6880 int64_t &Format) {
6881 using namespace llvm::AMDGPU::MTBUFFormat;
6882
6883 auto Id = getUnifiedFormat(FormatStr, getSTI());
6884 if (Id == UFMT_UNDEF)
6885 return ParseStatus::NoMatch;
6886
6887 if (!isGFX10Plus())
6888 return Error(Loc, "unified format is not supported on this GPU");
6889
6890 Format = Id;
6891 return ParseStatus::Success;
6892}
6893
6894ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6895 using namespace llvm::AMDGPU::MTBUFFormat;
6896 SMLoc Loc = getLoc();
6897
6898 if (!parseExpr(Format))
6899 return ParseStatus::Failure;
6900 if (!isValidFormatEncoding(Format, getSTI()))
6901 return Error(Loc, "out of range format");
6902
6903 return ParseStatus::Success;
6904}
6905
6906ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6907 using namespace llvm::AMDGPU::MTBUFFormat;
6908
6909 if (!trySkipId("format", AsmToken::Colon))
6910 return ParseStatus::NoMatch;
6911
6912 if (trySkipToken(AsmToken::LBrac)) {
6913 StringRef FormatStr;
6914 SMLoc Loc = getLoc();
6915 if (!parseId(FormatStr, "expected a format string"))
6916 return ParseStatus::Failure;
6917
6918 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6919 if (Res.isNoMatch())
6920 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6921 if (!Res.isSuccess())
6922 return Res;
6923
6924 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6925 return ParseStatus::Failure;
6926
6927 return ParseStatus::Success;
6928 }
6929
6930 return parseNumericFormat(Format);
6931}
6932
6933ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6934 using namespace llvm::AMDGPU::MTBUFFormat;
6935
6936 int64_t Format = getDefaultFormatEncoding(getSTI());
6937 ParseStatus Res;
6938 SMLoc Loc = getLoc();
6939
6940 // Parse legacy format syntax.
6941 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6942 if (Res.isFailure())
6943 return Res;
6944
6945 bool FormatFound = Res.isSuccess();
6946
6947 Operands.push_back(
6948 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6949
6950 if (FormatFound)
6951 trySkipToken(AsmToken::Comma);
6952
6953 if (isToken(AsmToken::EndOfStatement)) {
6954 // We are expecting an soffset operand,
6955 // but let matcher handle the error.
6956 return ParseStatus::Success;
6957 }
6958
6959 // Parse soffset.
6960 Res = parseRegOrImm(Operands);
6961 if (!Res.isSuccess())
6962 return Res;
6963
6964 trySkipToken(AsmToken::Comma);
6965
6966 if (!FormatFound) {
6967 Res = parseSymbolicOrNumericFormat(Format);
6968 if (Res.isFailure())
6969 return Res;
6970 if (Res.isSuccess()) {
6971 auto Size = Operands.size();
6972 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6973 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6974 Op.setImm(Format);
6975 }
6976 return ParseStatus::Success;
6977 }
6978
6979 if (isId("format") && peekToken().is(AsmToken::Colon))
6980 return Error(getLoc(), "duplicate format");
6981 return ParseStatus::Success;
6982}
6983
6984ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
6985 ParseStatus Res =
6986 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
6987 if (Res.isNoMatch()) {
6988 Res = parseIntWithPrefix("inst_offset", Operands,
6989 AMDGPUOperand::ImmTyInstOffset);
6990 }
6991 return Res;
6992}
6993
6994ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
6995 ParseStatus Res =
6996 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
6997 if (Res.isNoMatch())
6998 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
6999 return Res;
7000}
7001
7002ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7003 ParseStatus Res =
7004 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7005 if (Res.isNoMatch()) {
7006 Res =
7007 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7008 }
7009 return Res;
7010}
7011
7012//===----------------------------------------------------------------------===//
7013// Exp
7014//===----------------------------------------------------------------------===//
7015
7016void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7017 OptionalImmIndexMap OptionalIdx;
7018
7019 unsigned OperandIdx[4];
7020 unsigned EnMask = 0;
7021 int SrcIdx = 0;
7022
7023 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7024 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7025
7026 // Add the register arguments
7027 if (Op.isReg()) {
7028 assert(SrcIdx < 4);
7029 OperandIdx[SrcIdx] = Inst.size();
7030 Op.addRegOperands(Inst, 1);
7031 ++SrcIdx;
7032 continue;
7033 }
7034
7035 if (Op.isOff()) {
7036 assert(SrcIdx < 4);
7037 OperandIdx[SrcIdx] = Inst.size();
7038 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
7039 ++SrcIdx;
7040 continue;
7041 }
7042
7043 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7044 Op.addImmOperands(Inst, 1);
7045 continue;
7046 }
7047
7048 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7049 continue;
7050
7051 // Handle optional arguments
7052 OptionalIdx[Op.getImmTy()] = i;
7053 }
7054
7055 assert(SrcIdx == 4);
7056
7057 bool Compr = false;
7058 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7059 Compr = true;
7060 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7061 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
7062 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
7063 }
7064
7065 for (auto i = 0; i < SrcIdx; ++i) {
7066 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
7067 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7068 }
7069 }
7070
7071 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7072 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7073
7074 Inst.addOperand(MCOperand::createImm(EnMask));
7075}
7076
7077//===----------------------------------------------------------------------===//
7078// s_waitcnt
7079//===----------------------------------------------------------------------===//
7080
7081static bool
7083 const AMDGPU::IsaVersion ISA,
7084 int64_t &IntVal,
7085 int64_t CntVal,
7086 bool Saturate,
7087 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7088 unsigned (*decode)(const IsaVersion &Version, unsigned))
7089{
7090 bool Failed = false;
7091
7092 IntVal = encode(ISA, IntVal, CntVal);
7093 if (CntVal != decode(ISA, IntVal)) {
7094 if (Saturate) {
7095 IntVal = encode(ISA, IntVal, -1);
7096 } else {
7097 Failed = true;
7098 }
7099 }
7100 return Failed;
7101}
7102
7103bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7104
7105 SMLoc CntLoc = getLoc();
7106 StringRef CntName = getTokenStr();
7107
7108 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7109 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7110 return false;
7111
7112 int64_t CntVal;
7113 SMLoc ValLoc = getLoc();
7114 if (!parseExpr(CntVal))
7115 return false;
7116
7118
7119 bool Failed = true;
7120 bool Sat = CntName.ends_with("_sat");
7121
7122 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7123 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7124 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7125 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7126 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7127 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7128 } else {
7129 Error(CntLoc, "invalid counter name " + CntName);
7130 return false;
7131 }
7132
7133 if (Failed) {
7134 Error(ValLoc, "too large value for " + CntName);
7135 return false;
7136 }
7137
7138 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7139 return false;
7140
7141 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7142 if (isToken(AsmToken::EndOfStatement)) {
7143 Error(getLoc(), "expected a counter name");
7144 return false;
7145 }
7146 }
7147
7148 return true;
7149}
7150
7151ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7153 int64_t Waitcnt = getWaitcntBitMask(ISA);
7154 SMLoc S = getLoc();
7155
7156 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7157 while (!isToken(AsmToken::EndOfStatement)) {
7158 if (!parseCnt(Waitcnt))
7159 return ParseStatus::Failure;
7160 }
7161 } else {
7162 if (!parseExpr(Waitcnt))
7163 return ParseStatus::Failure;
7164 }
7165
7166 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7167 return ParseStatus::Success;
7168}
7169
7170bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7171 SMLoc FieldLoc = getLoc();
7172 StringRef FieldName = getTokenStr();
7173 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7174 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7175 return false;
7176
7177 SMLoc ValueLoc = getLoc();
7178 StringRef ValueName = getTokenStr();
7179 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7180 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7181 return false;
7182
7183 unsigned Shift;
7184 if (FieldName == "instid0") {
7185 Shift = 0;
7186 } else if (FieldName == "instskip") {
7187 Shift = 4;
7188 } else if (FieldName == "instid1") {
7189 Shift = 7;
7190 } else {
7191 Error(FieldLoc, "invalid field name " + FieldName);
7192 return false;
7193 }
7194
7195 int Value;
7196 if (Shift == 4) {
7197 // Parse values for instskip.
7199 .Case("SAME", 0)
7200 .Case("NEXT", 1)
7201 .Case("SKIP_1", 2)
7202 .Case("SKIP_2", 3)
7203 .Case("SKIP_3", 4)
7204 .Case("SKIP_4", 5)
7205 .Default(-1);
7206 } else {
7207 // Parse values for instid0 and instid1.
7209 .Case("NO_DEP", 0)
7210 .Case("VALU_DEP_1", 1)
7211 .Case("VALU_DEP_2", 2)
7212 .Case("VALU_DEP_3", 3)
7213 .Case("VALU_DEP_4", 4)
7214 .Case("TRANS32_DEP_1", 5)
7215 .Case("TRANS32_DEP_2", 6)
7216 .Case("TRANS32_DEP_3", 7)
7217 .Case("FMA_ACCUM_CYCLE_1", 8)
7218 .Case("SALU_CYCLE_1", 9)
7219 .Case("SALU_CYCLE_2", 10)
7220 .Case("SALU_CYCLE_3", 11)
7221 .Default(-1);
7222 }
7223 if (Value < 0) {
7224 Error(ValueLoc, "invalid value name " + ValueName);
7225 return false;
7226 }
7227
7228 Delay |= Value << Shift;
7229 return true;
7230}
7231
7232ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7233 int64_t Delay = 0;
7234 SMLoc S = getLoc();
7235
7236 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7237 do {
7238 if (!parseDelay(Delay))
7239 return ParseStatus::Failure;
7240 } while (trySkipToken(AsmToken::Pipe));
7241 } else {
7242 if (!parseExpr(Delay))
7243 return ParseStatus::Failure;
7244 }
7245
7246 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7247 return ParseStatus::Success;
7248}
7249
7250bool
7251AMDGPUOperand::isSWaitCnt() const {
7252 return isImm();
7253}
7254
7255bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7256
7257//===----------------------------------------------------------------------===//
7258// DepCtr
7259//===----------------------------------------------------------------------===//
7260
7261void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7262 StringRef DepCtrName) {
7263 switch (ErrorId) {
7264 case OPR_ID_UNKNOWN:
7265 Error(Loc, Twine("invalid counter name ", DepCtrName));
7266 return;
7267 case OPR_ID_UNSUPPORTED:
7268 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7269 return;
7270 case OPR_ID_DUPLICATE:
7271 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7272 return;
7273 case OPR_VAL_INVALID:
7274 Error(Loc, Twine("invalid value for ", DepCtrName));
7275 return;
7276 default:
7277 assert(false);
7278 }
7279}
7280
7281bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7282
7283 using namespace llvm::AMDGPU::DepCtr;
7284
7285 SMLoc DepCtrLoc = getLoc();
7286 StringRef DepCtrName = getTokenStr();
7287
7288 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7289 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7290 return false;
7291
7292 int64_t ExprVal;
7293 if (!parseExpr(ExprVal))
7294 return false;
7295
7296 unsigned PrevOprMask = UsedOprMask;
7297 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7298
7299 if (CntVal < 0) {
7300 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7301 return false;
7302 }
7303
7304 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7305 return false;
7306
7307 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7308 if (isToken(AsmToken::EndOfStatement)) {
7309 Error(getLoc(), "expected a counter name");
7310 return false;
7311 }
7312 }
7313
7314 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7315 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7316 return true;
7317}
7318
7319ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7320 using namespace llvm::AMDGPU::DepCtr;
7321
7322 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7323 SMLoc Loc = getLoc();
7324
7325 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7326 unsigned UsedOprMask = 0;
7327 while (!isToken(AsmToken::EndOfStatement)) {
7328 if (!parseDepCtr(DepCtr, UsedOprMask))
7329 return ParseStatus::Failure;
7330 }
7331 } else {
7332 if (!parseExpr(DepCtr))
7333 return ParseStatus::Failure;
7334 }
7335
7336 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7337 return ParseStatus::Success;
7338}
7339
7340bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7341
7342//===----------------------------------------------------------------------===//
7343// hwreg
7344//===----------------------------------------------------------------------===//
7345
7346ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7347 OperandInfoTy &Offset,
7348 OperandInfoTy &Width) {
7349 using namespace llvm::AMDGPU::Hwreg;
7350
7351 if (!trySkipId("hwreg", AsmToken::LParen))
7352 return ParseStatus::NoMatch;
7353
7354 // The register may be specified by name or using a numeric code
7355 HwReg.Loc = getLoc();
7356 if (isToken(AsmToken::Identifier) &&
7357 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7358 HwReg.IsSymbolic = true;
7359 lex(); // skip register name
7360 } else if (!parseExpr(HwReg.Val, "a register name")) {
7361 return ParseStatus::Failure;
7362 }
7363
7364 if (trySkipToken(AsmToken::RParen))
7365 return ParseStatus::Success;
7366
7367 // parse optional params
7368 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7369 return ParseStatus::Failure;
7370
7371 Offset.Loc = getLoc();
7372 if (!parseExpr(Offset.Val))
7373 return ParseStatus::Failure;
7374
7375 if (!skipToken(AsmToken::Comma, "expected a comma"))
7376 return ParseStatus::Failure;
7377
7378 Width.Loc = getLoc();
7379 if (!parseExpr(Width.Val) ||
7380 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7381 return ParseStatus::Failure;
7382
7383 return ParseStatus::Success;
7384}
7385
7386ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7387 using namespace llvm::AMDGPU::Hwreg;
7388
7389 int64_t ImmVal = 0;
7390 SMLoc Loc = getLoc();
7391
7392 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
7393 HwregId::Default);
7394 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
7395 HwregOffset::Default);
7396 struct : StructuredOpField {
7397 using StructuredOpField::StructuredOpField;
7398 bool validate(AMDGPUAsmParser &Parser) const override {
7399 if (!isUIntN(Width, Val - 1))
7400 return Error(Parser, "only values from 1 to 32 are legal");
7401 return true;
7402 }
7403 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
7404 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
7405
7406 if (Res.isNoMatch())
7407 Res = parseHwregFunc(HwReg, Offset, Width);
7408
7409 if (Res.isSuccess()) {
7410 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
7411 return ParseStatus::Failure;
7412 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
7413 }
7414
7415 if (Res.isNoMatch() &&
7416 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
7418
7419 if (!Res.isSuccess())
7420 return ParseStatus::Failure;
7421
7422 if (!isUInt<16>(ImmVal))
7423 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7424 Operands.push_back(
7425 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7426 return ParseStatus::Success;
7427}
7428
7429bool AMDGPUOperand::isHwreg() const {
7430 return isImmTy(ImmTyHwreg);
7431}
7432
7433//===----------------------------------------------------------------------===//
7434// sendmsg
7435//===----------------------------------------------------------------------===//
7436
7437bool
7438AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7439 OperandInfoTy &Op,
7440 OperandInfoTy &Stream) {
7441 using namespace llvm::AMDGPU::SendMsg;
7442
7443 Msg.Loc = getLoc();
7444 if (isToken(AsmToken::Identifier) &&
7445 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7446 Msg.IsSymbolic = true;
7447 lex(); // skip message name
7448 } else if (!parseExpr(Msg.Val, "a message name")) {
7449 return false;
7450 }
7451
7452 if (trySkipToken(AsmToken::Comma)) {
7453 Op.IsDefined = true;
7454 Op.Loc = getLoc();
7455 if (isToken(AsmToken::Identifier) &&
7456 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
7458 lex(); // skip operation name
7459 } else if (!parseExpr(Op.Val, "an operation name")) {
7460 return false;
7461 }
7462
7463 if (trySkipToken(AsmToken::Comma)) {
7464 Stream.IsDefined = true;
7465 Stream.Loc = getLoc();
7466 if (!parseExpr(Stream.Val))
7467 return false;
7468 }
7469 }
7470
7471 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7472}
7473
7474bool
7475AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7476 const OperandInfoTy &Op,
7477 const OperandInfoTy &Stream) {
7478 using namespace llvm::AMDGPU::SendMsg;
7479
7480 // Validation strictness depends on whether message is specified
7481 // in a symbolic or in a numeric form. In the latter case
7482 // only encoding possibility is checked.
7483 bool Strict = Msg.IsSymbolic;
7484
7485 if (Strict) {
7486 if (Msg.Val == OPR_ID_UNSUPPORTED) {
7487 Error(Msg.Loc, "specified message id is not supported on this GPU");
7488 return false;
7489 }
7490 } else {
7491 if (!isValidMsgId(Msg.Val, getSTI())) {
7492 Error(Msg.Loc, "invalid message id");
7493 return false;
7494 }
7495 }
7496 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
7497 if (Op.IsDefined) {
7498 Error(Op.Loc, "message does not support operations");
7499 } else {
7500 Error(Msg.Loc, "missing message operation");
7501 }
7502 return false;
7503 }
7504 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
7505 if (Op.Val == OPR_ID_UNSUPPORTED)
7506 Error(Op.Loc, "specified operation id is not supported on this GPU");
7507 else
7508 Error(Op.Loc, "invalid operation id");
7509 return false;
7510 }
7511 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
7512 Stream.IsDefined) {
7513 Error(Stream.Loc, "message operation does not support streams");
7514 return false;
7515 }
7516 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
7517 Error(Stream.Loc, "invalid message stream id");
7518 return false;
7519 }
7520 return true;
7521}
7522
7523ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7524 using namespace llvm::AMDGPU::SendMsg;
7525
7526 int64_t ImmVal = 0;
7527 SMLoc Loc = getLoc();
7528
7529 if (trySkipId("sendmsg", AsmToken::LParen)) {
7530 OperandInfoTy Msg(OPR_ID_UNKNOWN);
7531 OperandInfoTy Op(OP_NONE_);
7532 OperandInfoTy Stream(STREAM_ID_NONE_);
7533 if (parseSendMsgBody(Msg, Op, Stream) &&
7534 validateSendMsg(Msg, Op, Stream)) {
7535 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
7536 } else {
7537 return ParseStatus::Failure;
7538 }
7539 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7540 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7541 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7542 } else {
7543 return ParseStatus::Failure;
7544 }
7545
7546 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7547 return ParseStatus::Success;
7548}
7549
7550bool AMDGPUOperand::isSendMsg() const {
7551 return isImmTy(ImmTySendMsg);
7552}
7553
7554//===----------------------------------------------------------------------===//
7555// v_interp
7556//===----------------------------------------------------------------------===//
7557
7558ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7559 StringRef Str;
7560 SMLoc S = getLoc();
7561
7562 if (!parseId(Str))
7563 return ParseStatus::NoMatch;
7564
7565 int Slot = StringSwitch<int>(Str)
7566 .Case("p10", 0)
7567 .Case("p20", 1)
7568 .Case("p0", 2)
7569 .Default(-1);
7570
7571 if (Slot == -1)
7572 return Error(S, "invalid interpolation slot");
7573
7574 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
7575 AMDGPUOperand::ImmTyInterpSlot));
7576 return ParseStatus::Success;
7577}
7578
7579ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7580 StringRef Str;
7581 SMLoc S = getLoc();
7582
7583 if (!parseId(Str))
7584 return ParseStatus::NoMatch;
7585
7586 if (!Str.starts_with("attr"))
7587 return Error(S, "invalid interpolation attribute");
7588
7589 StringRef Chan = Str.take_back(2);
7590 int AttrChan = StringSwitch<int>(Chan)
7591 .Case(".x", 0)
7592 .Case(".y", 1)
7593 .Case(".z", 2)
7594 .Case(".w", 3)
7595 .Default(-1);
7596 if (AttrChan == -1)
7597 return Error(S, "invalid or missing interpolation attribute channel");
7598
7599 Str = Str.drop_back(2).drop_front(4);
7600
7601 uint8_t Attr;
7602 if (Str.getAsInteger(10, Attr))
7603 return Error(S, "invalid or missing interpolation attribute number");
7604
7605 if (Attr > 32)
7606 return Error(S, "out of bounds interpolation attribute number");
7607
7608 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
7609
7610 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
7611 AMDGPUOperand::ImmTyInterpAttr));
7612 Operands.push_back(AMDGPUOperand::CreateImm(
7613 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7614 return ParseStatus::Success;
7615}
7616
7617//===----------------------------------------------------------------------===//
7618// exp
7619//===----------------------------------------------------------------------===//
7620
7621ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7622 using namespace llvm::AMDGPU::Exp;
7623
7624 StringRef Str;
7625 SMLoc S = getLoc();
7626
7627 if (!parseId(Str))
7628 return ParseStatus::NoMatch;
7629
7630 unsigned Id = getTgtId(Str);
7631 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
7632 return Error(S, (Id == ET_INVALID)
7633 ? "invalid exp target"
7634 : "exp target is not supported on this GPU");
7635
7636 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
7637 AMDGPUOperand::ImmTyExpTgt));
7638 return ParseStatus::Success;
7639}
7640
7641//===----------------------------------------------------------------------===//
7642// parser helpers
7643//===----------------------------------------------------------------------===//
7644
7645bool
7646AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7647 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
7648}
7649
7650bool
7651AMDGPUAsmParser::isId(const StringRef Id) const {
7652 return isId(getToken(), Id);
7653}
7654
7655bool
7656AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7657 return getTokenKind() == Kind;
7658}
7659
7660StringRef AMDGPUAsmParser::getId() const {
7661 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7662}
7663
7664bool
7665AMDGPUAsmParser::trySkipId(const StringRef Id) {
7666 if (isId(Id)) {
7667 lex();
7668 return true;
7669 }
7670 return false;
7671}
7672
7673bool
7674AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7675 if (isToken(AsmToken::Identifier)) {
7676 StringRef Tok = getTokenStr();
7677 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
7678 lex();
7679 return true;
7680 }
7681 }
7682 return false;
7683}
7684
7685bool
7686AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7687 if (isId(Id) && peekToken().is(Kind)) {
7688 lex();
7689 lex();
7690 return true;
7691 }
7692 return false;
7693}
7694
7695bool
7696AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7697 if (isToken(Kind)) {
7698 lex();
7699 return true;
7700 }
7701 return false;
7702}
7703
7704bool
7705AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7706 const StringRef ErrMsg) {
7707 if (!trySkipToken(Kind)) {
7708 Error(getLoc(), ErrMsg);
7709 return false;
7710 }
7711 return true;
7712}
7713
7714bool
7715AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7716 SMLoc S = getLoc();
7717
7718 const MCExpr *Expr;
7719 if (Parser.parseExpression(Expr))
7720 return false;
7721
7722 if (Expr->evaluateAsAbsolute(Imm))
7723 return true;
7724
7725 if (Expected.empty()) {
7726 Error(S, "expected absolute expression");
7727 } else {
7728 Error(S, Twine("expected ", Expected) +
7729 Twine(" or an absolute expression"));
7730 }
7731 return false;
7732}
7733
7734bool
7735AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7736 SMLoc S = getLoc();
7737
7738 const MCExpr *Expr;
7739 if (Parser.parseExpression(Expr))
7740 return false;
7741
7742 int64_t IntVal;
7743 if (Expr->evaluateAsAbsolute(IntVal)) {
7744 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7745 } else {
7746 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7747 }
7748 return true;
7749}
7750
7751bool
7752AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7753 if (isToken(AsmToken::String)) {
7754 Val = getToken().getStringContents();
7755 lex();
7756 return true;
7757 } else {
7758 Error(getLoc(), ErrMsg);
7759 return false;
7760 }
7761}
7762
7763bool
7764AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7765 if (isToken(AsmToken::Identifier)) {
7766 Val = getTokenStr();
7767 lex();
7768 return true;
7769 } else {
7770 if (!ErrMsg.empty())
7771 Error(getLoc(), ErrMsg);
7772 return false;
7773 }
7774}
7775
7777AMDGPUAsmParser::getToken() const {
7778 return Parser.getTok();
7779}
7780
7781AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7782 return isToken(AsmToken::EndOfStatement)
7783 ? getToken()
7784 : getLexer().peekTok(ShouldSkipSpace);
7785}
7786
7787void
7788AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7789 auto TokCount = getLexer().peekTokens(Tokens);
7790
7791 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7792 Tokens[Idx] = AsmToken(AsmToken::Error, "");
7793}
7794
7796AMDGPUAsmParser::getTokenKind() const {
7797 return getLexer().getKind();
7798}
7799
7800SMLoc
7801AMDGPUAsmParser::getLoc() const {
7802 return getToken().getLoc();
7803}
7804
7806AMDGPUAsmParser::getTokenStr() const {
7807 return getToken().getString();
7808}
7809
7810void
7811AMDGPUAsmParser::lex() {
7812 Parser.Lex();
7813}
7814
7815SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7816 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7817}
7818
7819SMLoc
7820AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7821 const OperandVector &Operands) const {
7822 for (unsigned i = Operands.size() - 1; i > 0; --i) {
7823 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7824 if (Test(Op))
7825 return Op.getStartLoc();
7826 }
7827 return getInstLoc(Operands);
7828}
7829
7830SMLoc
7831AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7832 const OperandVector &Operands) const {
7833 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7834 return getOperandLoc(Test, Operands);
7835}
7836
7837SMLoc
7838AMDGPUAsmParser::getRegLoc(unsigned Reg,
7839 const OperandVector &Operands) const {
7840 auto Test = [=](const AMDGPUOperand& Op) {
7841 return Op.isRegKind() && Op.getReg() == Reg;
7842 };
7843 return getOperandLoc(Test, Operands);
7844}
7845
7846SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7847 bool SearchMandatoryLiterals) const {
7848 auto Test = [](const AMDGPUOperand& Op) {
7849 return Op.IsImmKindLiteral() || Op.isExpr();
7850 };
7851 SMLoc Loc = getOperandLoc(Test, Operands);
7852 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7853 Loc = getMandatoryLitLoc(Operands);
7854 return Loc;
7855}
7856
7857SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7858 auto Test = [](const AMDGPUOperand &Op) {
7859 return Op.IsImmKindMandatoryLiteral();
7860 };
7861 return getOperandLoc(Test, Operands);
7862}
7863
7864SMLoc
7865AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7866 auto Test = [](const AMDGPUOperand& Op) {
7867 return Op.isImmKindConst();
7868 };
7869 return getOperandLoc(Test, Operands);
7870}
7871
7873AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
7874 if (!trySkipToken(AsmToken::LCurly))
7875 return ParseStatus::NoMatch;
7876
7877 bool First = true;
7878 while (!trySkipToken(AsmToken::RCurly)) {
7879 if (!First &&
7880 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
7881 return ParseStatus::Failure;
7882
7883 StringRef Id = getTokenStr();
7884 SMLoc IdLoc = getLoc();
7885 if (!skipToken(AsmToken::Identifier, "field name expected") ||
7886 !skipToken(AsmToken::Colon, "colon expected"))
7887 return ParseStatus::Failure;
7888
7889 auto I =
7890 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
7891 if (I == Fields.end())
7892 return Error(IdLoc, "unknown field");
7893 if ((*I)->IsDefined)
7894 return Error(IdLoc, "duplicate field");
7895
7896 // TODO: Support symbolic values.
7897 (*I)->Loc = getLoc();
7898 if (!parseExpr((*I)->Val))
7899 return ParseStatus::Failure;
7900 (*I)->IsDefined = true;
7901
7902 First = false;
7903 }
7904 return ParseStatus::Success;
7905}
7906
7907bool AMDGPUAsmParser::validateStructuredOpFields(
7909 return all_of(Fields, [this](const StructuredOpField *F) {
7910 return F->validate(*this);
7911 });
7912}
7913
7914//===----------------------------------------------------------------------===//
7915// swizzle
7916//===----------------------------------------------------------------------===//
7917
7919static unsigned
7920encodeBitmaskPerm(const unsigned AndMask,
7921 const unsigned OrMask,
7922 const unsigned XorMask) {
7923 using namespace llvm::AMDGPU::Swizzle;
7924
7925 return BITMASK_PERM_ENC |
7926 (AndMask << BITMASK_AND_SHIFT) |
7927 (OrMask << BITMASK_OR_SHIFT) |
7928 (XorMask << BITMASK_XOR_SHIFT);
7929}
7930
7931bool
7932AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7933 const unsigned MinVal,
7934 const unsigned MaxVal,
7935 const StringRef ErrMsg,
7936 SMLoc &Loc) {
7937 if (!skipToken(AsmToken::Comma, "expected a comma")) {
7938 return false;
7939 }
7940 Loc = getLoc();
7941 if (!parseExpr(Op)) {
7942 return false;
7943 }
7944 if (Op < MinVal || Op > MaxVal) {
7945 Error(Loc, ErrMsg);
7946 return false;
7947 }
7948
7949 return true;
7950}
7951
7952bool
7953AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7954 const unsigned MinVal,
7955 const unsigned MaxVal,
7956 const StringRef ErrMsg) {
7957 SMLoc Loc;
7958 for (unsigned i = 0; i < OpNum; ++i) {
7959 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7960 return false;
7961 }
7962
7963 return true;
7964}
7965
7966bool
7967AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7968 using namespace llvm::AMDGPU::Swizzle;
7969
7970 int64_t Lane[LANE_NUM];
7971 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7972 "expected a 2-bit lane id")) {
7974 for (unsigned I = 0; I < LANE_NUM; ++I) {
7975 Imm |= Lane[I] << (LANE_SHIFT * I);
7976 }
7977 return true;
7978 }
7979 return false;
7980}
7981
7982bool
7983AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7984 using namespace llvm::AMDGPU::Swizzle;
7985
7986 SMLoc Loc;
7987 int64_t GroupSize;
7988 int64_t LaneIdx;
7989
7990 if (!parseSwizzleOperand(GroupSize,
7991 2, 32,
7992 "group size must be in the interval [2,32]",
7993 Loc)) {
7994 return false;
7995 }
7996 if (!isPowerOf2_64(GroupSize)) {
7997 Error(Loc, "group size must be a power of two");
7998 return false;
7999 }
8000 if (parseSwizzleOperand(LaneIdx,
8001 0, GroupSize - 1,
8002 "lane id must be in the interval [0,group size - 1]",
8003 Loc)) {
8004 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8005 return true;
8006 }
8007 return false;
8008}
8009
8010bool
8011AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8012 using namespace llvm::AMDGPU::Swizzle;
8013
8014 SMLoc Loc;
8015 int64_t GroupSize;
8016
8017 if (!parseSwizzleOperand(GroupSize,
8018 2, 32,
8019 "group size must be in the interval [2,32]",
8020 Loc)) {
8021 return false;
8022 }
8023 if (!isPowerOf2_64(GroupSize)) {
8024 Error(Loc, "group size must be a power of two");
8025 return false;
8026 }
8027
8028 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8029 return true;
8030}
8031
8032bool
8033AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8034 using namespace llvm::AMDGPU::Swizzle;
8035
8036 SMLoc Loc;
8037 int64_t GroupSize;
8038
8039 if (!parseSwizzleOperand(GroupSize,
8040 1, 16,
8041 "group size must be in the interval [1,16]",
8042 Loc)) {
8043 return false;
8044 }
8045 if (!isPowerOf2_64(GroupSize)) {
8046 Error(Loc, "group size must be a power of two");
8047 return false;
8048 }
8049
8050 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8051 return true;
8052}
8053
8054bool
8055AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8056 using namespace llvm::AMDGPU::Swizzle;
8057
8058 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8059 return false;
8060 }
8061
8062 StringRef Ctl;
8063 SMLoc StrLoc = getLoc();
8064 if (!parseString(Ctl)) {
8065 return false;
8066 }
8067 if (Ctl.size() != BITMASK_WIDTH) {
8068 Error(StrLoc, "expected a 5-character mask");
8069 return false;
8070 }
8071
8072 unsigned AndMask = 0;
8073 unsigned OrMask = 0;
8074 unsigned XorMask = 0;
8075
8076 for (size_t i = 0; i < Ctl.size(); ++i) {
8077 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8078 switch(Ctl[i]) {
8079 default:
8080 Error(StrLoc, "invalid mask");
8081 return false;
8082 case '0':
8083 break;
8084 case '1':
8085 OrMask |= Mask;
8086 break;
8087 case 'p':
8088 AndMask |= Mask;
8089 break;
8090 case 'i':
8091 AndMask |= Mask;
8092 XorMask |= Mask;
8093 break;
8094 }
8095 }
8096
8097 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8098 return true;
8099}
8100
8101bool
8102AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8103
8104 SMLoc OffsetLoc = getLoc();
8105
8106 if (!parseExpr(Imm, "a swizzle macro")) {
8107 return false;
8108 }
8109 if (!isUInt<16>(Imm)) {
8110 Error(OffsetLoc, "expected a 16-bit offset");
8111 return false;
8112 }
8113 return true;
8114}
8115
8116bool
8117AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8118 using namespace llvm::AMDGPU::Swizzle;
8119
8120 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8121
8122 SMLoc ModeLoc = getLoc();
8123 bool Ok = false;
8124
8125 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8126 Ok = parseSwizzleQuadPerm(Imm);
8127 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8128 Ok = parseSwizzleBitmaskPerm(Imm);
8129 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8130 Ok = parseSwizzleBroadcast(Imm);
8131 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8132 Ok = parseSwizzleSwap(Imm);
8133 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8134 Ok = parseSwizzleReverse(Imm);
8135 } else {
8136 Error(ModeLoc, "expected a swizzle mode");
8137 }
8138
8139 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8140 }
8141
8142 return false;
8143}
8144
8145ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8146 SMLoc S = getLoc();
8147 int64_t Imm = 0;
8148
8149 if (trySkipId("offset")) {
8150
8151 bool Ok = false;
8152 if (skipToken(AsmToken::Colon, "expected a colon")) {
8153 if (trySkipId("swizzle")) {
8154 Ok = parseSwizzleMacro(Imm);
8155 } else {
8156 Ok = parseSwizzleOffset(Imm);
8157 }
8158 }
8159
8160 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8161
8163 }
8164 return ParseStatus::NoMatch;
8165}
8166
8167bool
8168AMDGPUOperand::isSwizzle() const {
8169 return isImmTy(ImmTySwizzle);
8170}
8171
8172//===----------------------------------------------------------------------===//
8173// VGPR Index Mode
8174//===----------------------------------------------------------------------===//
8175
8176int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8177
8178 using namespace llvm::AMDGPU::VGPRIndexMode;
8179
8180 if (trySkipToken(AsmToken::RParen)) {
8181 return OFF;
8182 }
8183
8184 int64_t Imm = 0;
8185
8186 while (true) {
8187 unsigned Mode = 0;
8188 SMLoc S = getLoc();
8189
8190 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8191 if (trySkipId(IdSymbolic[ModeId])) {
8192 Mode = 1 << ModeId;
8193 break;
8194 }
8195 }
8196
8197 if (Mode == 0) {
8198 Error(S, (Imm == 0)?
8199 "expected a VGPR index mode or a closing parenthesis" :
8200 "expected a VGPR index mode");
8201 return UNDEF;
8202 }
8203
8204 if (Imm & Mode) {
8205 Error(S, "duplicate VGPR index mode");
8206 return UNDEF;
8207 }
8208 Imm |= Mode;
8209
8210 if (trySkipToken(AsmToken::RParen))
8211 break;
8212 if (!skipToken(AsmToken::Comma,
8213 "expected a comma or a closing parenthesis"))
8214 return UNDEF;
8215 }
8216
8217 return Imm;
8218}
8219
8220ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8221
8222 using namespace llvm::AMDGPU::VGPRIndexMode;
8223
8224 int64_t Imm = 0;
8225 SMLoc S = getLoc();
8226
8227 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8228 Imm = parseGPRIdxMacro();
8229 if (Imm == UNDEF)
8230 return ParseStatus::Failure;
8231 } else {
8232 if (getParser().parseAbsoluteExpression(Imm))
8233 return ParseStatus::Failure;
8234 if (Imm < 0 || !isUInt<4>(Imm))
8235 return Error(S, "invalid immediate: only 4-bit values are legal");
8236 }
8237
8238 Operands.push_back(
8239 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8240 return ParseStatus::Success;
8241}
8242
8243bool AMDGPUOperand::isGPRIdxMode() const {
8244 return isImmTy(ImmTyGprIdxMode);
8245}
8246
8247//===----------------------------------------------------------------------===//
8248// sopp branch targets
8249//===----------------------------------------------------------------------===//
8250
8251ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8252
8253 // Make sure we are not parsing something
8254 // that looks like a label or an expression but is not.
8255 // This will improve error messages.
8256 if (isRegister() || isModifier())
8257 return ParseStatus::NoMatch;
8258
8259 if (!parseExpr(Operands))
8260 return ParseStatus::Failure;
8261
8262 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8263 assert(Opr.isImm() || Opr.isExpr());
8264 SMLoc Loc = Opr.getStartLoc();
8265
8266 // Currently we do not support arbitrary expressions as branch targets.
8267 // Only labels and absolute expressions are accepted.
8268 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8269 Error(Loc, "expected an absolute expression or a label");
8270 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8271 Error(Loc, "expected a 16-bit signed jump offset");
8272 }
8273
8274 return ParseStatus::Success;
8275}
8276
8277//===----------------------------------------------------------------------===//
8278// Boolean holding registers
8279//===----------------------------------------------------------------------===//
8280
8281ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8282 return parseReg(Operands);
8283}
8284
8285//===----------------------------------------------------------------------===//
8286// mubuf
8287//===----------------------------------------------------------------------===//
8288
8289void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8290 const OperandVector &Operands,
8291 bool IsAtomic) {
8292 OptionalImmIndexMap OptionalIdx;
8293 unsigned FirstOperandIdx = 1;
8294 bool IsAtomicReturn = false;
8295
8296 if (IsAtomic) {
8297 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
8299 }
8300
8301 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8302 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8303
8304 // Add the register arguments
8305 if (Op.isReg()) {
8306 Op.addRegOperands(Inst, 1);
8307 // Insert a tied src for atomic return dst.
8308 // This cannot be postponed as subsequent calls to
8309 // addImmOperands rely on correct number of MC operands.
8310 if (IsAtomicReturn && i == FirstOperandIdx)
8311 Op.addRegOperands(Inst, 1);
8312 continue;
8313 }
8314
8315 // Handle the case where soffset is an immediate
8316 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8317 Op.addImmOperands(Inst, 1);
8318 continue;
8319 }
8320
8321 // Handle tokens like 'offen' which are sometimes hard-coded into the
8322 // asm string. There are no MCInst operands for these.
8323 if (Op.isToken()) {
8324 continue;
8325 }
8326 assert(Op.isImm());
8327
8328 // Handle optional arguments
8329 OptionalIdx[Op.getImmTy()] = i;
8330 }
8331
8332 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8333 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8334}
8335
8336//===----------------------------------------------------------------------===//
8337// smrd
8338//===----------------------------------------------------------------------===//
8339
8340bool AMDGPUOperand::isSMRDOffset8() const {
8341 return isImmLiteral() && isUInt<8>(getImm());
8342}
8343
8344bool AMDGPUOperand::isSMEMOffset() const {
8345 // Offset range is checked later by validator.
8346 return isImmLiteral();
8347}
8348
8349bool AMDGPUOperand::isSMRDLiteralOffset() const {
8350 // 32-bit literals are only supported on CI and we only want to use them
8351 // when the offset is > 8-bits.
8352 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8353}
8354
8355//===----------------------------------------------------------------------===//
8356// vop3
8357//===----------------------------------------------------------------------===//
8358
8359static bool ConvertOmodMul(int64_t &Mul) {
8360 if (Mul != 1 && Mul != 2 && Mul != 4)
8361 return false;
8362
8363 Mul >>= 1;
8364 return true;
8365}
8366
8367static bool ConvertOmodDiv(int64_t &Div) {
8368 if (Div == 1) {
8369 Div = 0;
8370 return true;
8371 }
8372
8373 if (Div == 2) {
8374 Div = 3;
8375 return true;
8376 }
8377
8378 return false;
8379}
8380
8381// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8382// This is intentional and ensures compatibility with sp3.
8383// See bug 35397 for details.
8384bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8385 if (BoundCtrl == 0 || BoundCtrl == 1) {
8386 if (!isGFX11Plus())
8387 BoundCtrl = 1;
8388 return true;
8389 }
8390 return false;
8391}
8392
8393void AMDGPUAsmParser::onBeginOfFile() {
8394 if (!getParser().getStreamer().getTargetStreamer() ||
8395 getSTI().getTargetTriple().getArch() == Triple::r600)
8396 return;
8397
8398 if (!getTargetStreamer().getTargetID())
8399 getTargetStreamer().initializeTargetID(getSTI(),
8400 getSTI().getFeatureString());
8401
8402 if (isHsaAbi(getSTI()))
8403 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8404}
8405
8406/// Parse AMDGPU specific expressions.
8407///
8408/// expr ::= or(expr, ...) |
8409/// max(expr, ...)
8410///
8411bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8412 using AGVK = AMDGPUMCExpr::VariantKind;
8413
8414 if (isToken(AsmToken::Identifier)) {
8415 StringRef TokenId = getTokenStr();
8416 AGVK VK = StringSwitch<AGVK>(TokenId)
8417 .Case("max", AGVK::AGVK_Max)
8418 .Case("or", AGVK::AGVK_Or)
8419 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
8420 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
8421 .Case("alignto", AGVK::AGVK_AlignTo)
8422 .Case("occupancy", AGVK::AGVK_Occupancy)
8423 .Default(AGVK::AGVK_None);
8424
8425 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
8427 uint64_t CommaCount = 0;
8428 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
8429 lex(); // Eat '('
8430 while (true) {
8431 if (trySkipToken(AsmToken::RParen)) {
8432 if (Exprs.empty()) {
8433 Error(getToken().getLoc(),
8434 "empty " + Twine(TokenId) + " expression");
8435 return true;
8436 }
8437 if (CommaCount + 1 != Exprs.size()) {
8438 Error(getToken().getLoc(),
8439 "mismatch of commas in " + Twine(TokenId) + " expression");
8440 return true;
8441 }
8442 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
8443 return false;
8444 }
8445 const MCExpr *Expr;
8446 if (getParser().parseExpression(Expr, EndLoc))
8447 return true;
8448 Exprs.push_back(Expr);
8449 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
8450 if (LastTokenWasComma)
8451 CommaCount++;
8452 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
8453 Error(getToken().getLoc(),
8454 "unexpected token in " + Twine(TokenId) + " expression");
8455 return true;
8456 }
8457 }
8458 }
8459 }
8460 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
8461}
8462
8463ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8464 StringRef Name = getTokenStr();
8465 if (Name == "mul") {
8466 return parseIntWithPrefix("mul", Operands,
8467 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8468 }
8469
8470 if (Name == "div") {
8471 return parseIntWithPrefix("div", Operands,
8472 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8473 }
8474
8475 return ParseStatus::NoMatch;
8476}
8477
8478// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8479// the number of src operands present, then copies that bit into src0_modifiers.
8480static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
8481 int Opc = Inst.getOpcode();
8482 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8483 if (OpSelIdx == -1)
8484 return;
8485
8486 int SrcNum;
8487 const int Ops[] = { AMDGPU::OpName::src0,
8488 AMDGPU::OpName::src1,
8489 AMDGPU::OpName::src2 };
8490 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8491 ++SrcNum)
8492 ;
8493 assert(SrcNum > 0);
8494
8495 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8496
8497 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
8498 if (DstIdx == -1)
8499 return;
8500
8501 const MCOperand &DstOp = Inst.getOperand(DstIdx);
8502 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8503 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8504 if (DstOp.isReg() &&
8505 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
8506 if (AMDGPU::isHi(DstOp.getReg(), MRI))
8507 ModVal |= SISrcMods::DST_OP_SEL;
8508 } else {
8509 if ((OpSel & (1 << SrcNum)) != 0)
8510 ModVal |= SISrcMods::DST_OP_SEL;
8511 }
8512 Inst.getOperand(ModIdx).setImm(ModVal);
8513}
8514
8515void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8516 const OperandVector &Operands) {
8517 cvtVOP3P(Inst, Operands);
8518 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8519}
8520
8521void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8522 OptionalImmIndexMap &OptionalIdx) {
8523 cvtVOP3P(Inst, Operands, OptionalIdx);
8524 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8525}
8526
8527static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8528 return
8529 // 1. This operand is input modifiers
8530 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8531 // 2. This is not last operand
8532 && Desc.NumOperands > (OpNum + 1)
8533 // 3. Next operand is register class
8534 && Desc.operands()[OpNum + 1].RegClass != -1
8535 // 4. Next register is not tied to any other operand
8536 && Desc.getOperandConstraint(OpNum + 1,
8537 MCOI::OperandConstraint::TIED_TO) == -1;
8538}
8539
8540void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8541{
8542 OptionalImmIndexMap OptionalIdx;
8543 unsigned Opc = Inst.getOpcode();
8544
8545 unsigned I = 1;
8546 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8547 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8548 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8549 }
8550
8551 for (unsigned E = Operands.size(); I != E; ++I) {
8552 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8554 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8555 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
8556 Op.isInterpAttrChan()) {
8557 Inst.addOperand(MCOperand::createImm(Op.getImm()));
8558 } else if (Op.isImmModifier()) {
8559 OptionalIdx[Op.getImmTy()] = I;
8560 } else {
8561 llvm_unreachable("unhandled operand type");
8562 }
8563 }
8564
8565 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
8566 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8567 AMDGPUOperand::ImmTyHigh);
8568
8569 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8570 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8571 AMDGPUOperand::ImmTyClamp);
8572
8573 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8574 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8575 AMDGPUOperand::ImmTyOModSI);
8576}
8577
8578void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8579{
8580 OptionalImmIndexMap OptionalIdx;
8581 unsigned Opc = Inst.getOpcode();
8582
8583 unsigned I = 1;
8584 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8585 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8586 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8587 }
8588
8589 for (unsigned E = Operands.size(); I != E; ++I) {
8590 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8592 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8593 } else if (Op.isImmModifier()) {
8594 OptionalIdx[Op.getImmTy()] = I;
8595 } else {
8596 llvm_unreachable("unhandled operand type");
8597 }
8598 }
8599
8600 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
8601
8602 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8603 if (OpSelIdx != -1)
8604 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8605
8606 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8607
8608 if (OpSelIdx == -1)
8609 return;
8610
8611 const int Ops[] = { AMDGPU::OpName::src0,
8612 AMDGPU::OpName::src1,
8613 AMDGPU::OpName::src2 };
8614 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8615 AMDGPU::OpName::src1_modifiers,
8616 AMDGPU::OpName::src2_modifiers };
8617
8618 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8619
8620 for (int J = 0; J < 3; ++J) {
8621 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8622 if (OpIdx == -1)
8623 break;
8624
8625 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8626 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8627
8628 if ((OpSel & (1 << J)) != 0)
8629 ModVal |= SISrcMods::OP_SEL_0;
8630 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8631 (OpSel & (1 << 3)) != 0)
8632 ModVal |= SISrcMods::DST_OP_SEL;
8633
8634 Inst.getOperand(ModIdx).setImm(ModVal);
8635 }
8636}
8637
8638void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8639 OptionalImmIndexMap &OptionalIdx) {
8640 unsigned Opc = Inst.getOpcode();
8641
8642 unsigned I = 1;
8643 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8644 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8645 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8646 }
8647
8648 for (unsigned E = Operands.size(); I != E; ++I) {
8649 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8651 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8652 } else if (Op.isImmModifier()) {
8653 OptionalIdx[Op.getImmTy()] = I;
8654 } else if (Op.isRegOrImm()) {
8655 Op.addRegOrImmOperands(Inst, 1);
8656 } else {
8657 llvm_unreachable("unhandled operand type");
8658 }
8659 }
8660
8661 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
8662 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
8663 Inst.addOperand(Inst.getOperand(0));
8664 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8665 AMDGPUOperand::ImmTyByteSel);
8666 }
8667
8668 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8669 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8670 AMDGPUOperand::ImmTyClamp);
8671
8672 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8673 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8674 AMDGPUOperand::ImmTyOModSI);
8675
8676 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8677 // it has src2 register operand that is tied to dst operand
8678 // we don't allow modifiers for this operand in assembler so src2_modifiers
8679 // should be 0.
8680 if (isMAC(Opc)) {
8681 auto it = Inst.begin();
8682 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8683 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8684 ++it;
8685 // Copy the operand to ensure it's not invalidated when Inst grows.
8686 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8687 }
8688}
8689
8690void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8691 OptionalImmIndexMap OptionalIdx;
8692 cvtVOP3(Inst, Operands, OptionalIdx);
8693}
8694
8695void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8696 OptionalImmIndexMap &OptIdx) {
8697 const int Opc = Inst.getOpcode();
8698 const MCInstrDesc &Desc = MII.get(Opc);
8699
8700 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8701
8702 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8703 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8704 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
8705 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
8706 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8707 Inst.addOperand(Inst.getOperand(0));
8708 }
8709
8710 // Adding vdst_in operand is already covered for these DPP instructions in
8711 // cvtVOP3DPP.
8712 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
8713 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8714 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8715 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8716 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 ||
8717 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
8718 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
8719 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
8720 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
8721 assert(!IsPacked);
8722 Inst.addOperand(Inst.getOperand(0));
8723 }
8724
8725 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8726 // instruction, and then figure out where to actually put the modifiers
8727
8728 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8729 if (OpSelIdx != -1) {
8730 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8731 }
8732
8733 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8734 if (OpSelHiIdx != -1) {
8735 int DefaultVal = IsPacked ? -1 : 0;
8736 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8737 DefaultVal);
8738 }
8739
8740 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8741 if (NegLoIdx != -1)
8742 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8743
8744 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8745 if (NegHiIdx != -1)
8746 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8747
8748 const int Ops[] = { AMDGPU::OpName::src0,
8749 AMDGPU::OpName::src1,
8750 AMDGPU::OpName::src2 };
8751 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8752 AMDGPU::OpName::src1_modifiers,
8753 AMDGPU::OpName::src2_modifiers };
8754
8755 unsigned OpSel = 0;
8756 unsigned OpSelHi = 0;
8757 unsigned NegLo = 0;
8758 unsigned NegHi = 0;
8759
8760 if (OpSelIdx != -1)
8761 OpSel = Inst.getOperand(OpSelIdx).getImm();
8762
8763 if (OpSelHiIdx != -1)
8764 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8765
8766 if (NegLoIdx != -1)
8767 NegLo = Inst.getOperand(NegLoIdx).getImm();
8768
8769 if (NegHiIdx != -1)
8770 NegHi = Inst.getOperand(NegHiIdx).getImm();
8771
8772 for (int J = 0; J < 3; ++J) {
8773 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8774 if (OpIdx == -1)
8775 break;
8776
8777 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8778
8779 if (ModIdx == -1)
8780 continue;
8781
8782 uint32_t ModVal = 0;
8783
8784 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
8785 if (SrcOp.isReg() && getMRI()
8786 ->getRegClass(AMDGPU::VGPR_16RegClassID)
8787 .contains(SrcOp.getReg())) {
8788 bool VGPRSuffixIsHi = AMDGPU::isHi(SrcOp.getReg(), *getMRI());
8789 if (VGPRSuffixIsHi)
8790 ModVal |= SISrcMods::OP_SEL_0;
8791 } else {
8792 if ((OpSel & (1 << J)) != 0)
8793 ModVal |= SISrcMods::OP_SEL_0;
8794 }
8795
8796 if ((OpSelHi & (1 << J)) != 0)
8797 ModVal |= SISrcMods::OP_SEL_1;
8798
8799 if ((NegLo & (1 << J)) != 0)
8800 ModVal |= SISrcMods::NEG;
8801
8802 if ((NegHi & (1 << J)) != 0)
8803 ModVal |= SISrcMods::NEG_HI;
8804
8805 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8806 }
8807}
8808
8809void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8810 OptionalImmIndexMap OptIdx;
8811 cvtVOP3(Inst, Operands, OptIdx);
8812 cvtVOP3P(Inst, Operands, OptIdx);
8813}
8814
8816 unsigned i, unsigned Opc, unsigned OpName) {
8817 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
8818 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8819 else
8820 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
8821}
8822
8823void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
8824 unsigned Opc = Inst.getOpcode();
8825
8826 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
8827 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
8828 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
8829 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
8830 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
8831
8832 OptionalImmIndexMap OptIdx;
8833 for (unsigned i = 5; i < Operands.size(); ++i) {
8834 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8835 OptIdx[Op.getImmTy()] = i;
8836 }
8837
8838 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
8839 addOptionalImmOperand(Inst, Operands, OptIdx,
8840 AMDGPUOperand::ImmTyIndexKey8bit);
8841
8842 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
8843 addOptionalImmOperand(Inst, Operands, OptIdx,
8844 AMDGPUOperand::ImmTyIndexKey16bit);
8845
8846 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8847 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
8848
8849 cvtVOP3P(Inst, Operands, OptIdx);
8850}
8851
8852//===----------------------------------------------------------------------===//
8853// VOPD
8854//===----------------------------------------------------------------------===//
8855
8856ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8857 if (!hasVOPD(getSTI()))
8858 return ParseStatus::NoMatch;
8859
8860 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8861 SMLoc S = getLoc();
8862 lex();
8863 lex();
8864 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8865 SMLoc OpYLoc = getLoc();
8866 StringRef OpYName;
8867 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8868 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8869 return ParseStatus::Success;
8870 }
8871 return Error(OpYLoc, "expected a VOPDY instruction after ::");
8872 }
8873 return ParseStatus::NoMatch;
8874}
8875
8876// Create VOPD MCInst operands using parsed assembler operands.
8877void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8878 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8879 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8880 if (Op.isReg()) {
8881 Op.addRegOperands(Inst, 1);
8882 return;
8883 }
8884 if (Op.isImm()) {
8885 Op.addImmOperands(Inst, 1);
8886 return;
8887 }
8888 llvm_unreachable("Unhandled operand type in cvtVOPD");
8889 };
8890
8891 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8892
8893 // MCInst operands are ordered as follows:
8894 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8895
8896 for (auto CompIdx : VOPD::COMPONENTS) {
8897 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8898 }
8899
8900 for (auto CompIdx : VOPD::COMPONENTS) {
8901 const auto &CInfo = InstInfo[CompIdx];
8902 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8903 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8904 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8905 if (CInfo.hasSrc2Acc())
8906 addOp(CInfo.getIndexOfDstInParsedOperands());
8907 }
8908}
8909
8910//===----------------------------------------------------------------------===//
8911// dpp
8912//===----------------------------------------------------------------------===//
8913
8914bool AMDGPUOperand::isDPP8() const {
8915 return isImmTy(ImmTyDPP8);
8916}
8917
8918bool AMDGPUOperand::isDPPCtrl() const {
8919 using namespace AMDGPU::DPP;
8920
8921 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8922 if (result) {
8923 int64_t Imm = getImm();
8924 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8925 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8926 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8927 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8928 (Imm == DppCtrl::WAVE_SHL1) ||
8929 (Imm == DppCtrl::WAVE_ROL1) ||
8930 (Imm == DppCtrl::WAVE_SHR1) ||
8931 (Imm == DppCtrl::WAVE_ROR1) ||
8932 (Imm == DppCtrl::ROW_MIRROR) ||
8933 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8934 (Imm == DppCtrl::BCAST15) ||
8935 (Imm == DppCtrl::BCAST31) ||
8936 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8937 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8938 }
8939 return false;
8940}
8941
8942//===----------------------------------------------------------------------===//
8943// mAI
8944//===----------------------------------------------------------------------===//
8945
8946bool AMDGPUOperand::isBLGP() const {
8947 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8948}
8949
8950bool AMDGPUOperand::isS16Imm() const {
8951 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8952}
8953
8954bool AMDGPUOperand::isU16Imm() const {
8955 return isImmLiteral() && isUInt<16>(getImm());
8956}
8957
8958//===----------------------------------------------------------------------===//
8959// dim
8960//===----------------------------------------------------------------------===//
8961
8962bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8963 // We want to allow "dim:1D" etc.,
8964 // but the initial 1 is tokenized as an integer.
8965 std::string Token;
8966 if (isToken(AsmToken::Integer)) {
8967 SMLoc Loc = getToken().getEndLoc();
8968 Token = std::string(getTokenStr());
8969 lex();
8970 if (getLoc() != Loc)
8971 return false;
8972 }
8973
8974 StringRef Suffix;
8975 if (!parseId(Suffix))
8976 return false;
8977 Token += Suffix;
8978
8979 StringRef DimId = Token;
8980 if (DimId.starts_with("SQ_RSRC_IMG_"))
8981 DimId = DimId.drop_front(12);
8982
8984 if (!DimInfo)
8985 return false;
8986
8987 Encoding = DimInfo->Encoding;
8988 return true;
8989}
8990
8991ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8992 if (!isGFX10Plus())
8993 return ParseStatus::NoMatch;
8994
8995 SMLoc S = getLoc();
8996
8997 if (!trySkipId("dim", AsmToken::Colon))
8998 return ParseStatus::NoMatch;
8999
9000 unsigned Encoding;
9001 SMLoc Loc = getLoc();
9002 if (!parseDimId(Encoding))
9003 return Error(Loc, "invalid dim value");
9004
9005 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9006 AMDGPUOperand::ImmTyDim));
9007 return ParseStatus::Success;
9008}
9009
9010//===----------------------------------------------------------------------===//
9011// dpp
9012//===----------------------------------------------------------------------===//
9013
9014ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9015 SMLoc S = getLoc();
9016
9017 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9018 return ParseStatus::NoMatch;
9019
9020 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9021
9022 int64_t Sels[8];
9023
9024 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9025 return ParseStatus::Failure;
9026
9027 for (size_t i = 0; i < 8; ++i) {
9028 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9029 return ParseStatus::Failure;
9030
9031 SMLoc Loc = getLoc();
9032 if (getParser().parseAbsoluteExpression(Sels[i]))
9033 return ParseStatus::Failure;
9034 if (0 > Sels[i] || 7 < Sels[i])
9035 return Error(Loc, "expected a 3-bit value");
9036 }
9037
9038 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9039 return ParseStatus::Failure;
9040
9041 unsigned DPP8 = 0;
9042 for (size_t i = 0; i < 8; ++i)
9043 DPP8 |= (Sels[i] << (i * 3));
9044
9045 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9046 return ParseStatus::Success;
9047}
9048
9049bool
9050AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9051 const OperandVector &Operands) {
9052 if (Ctrl == "row_newbcast")
9053 return isGFX90A();
9054
9055 if (Ctrl == "row_share" ||
9056 Ctrl == "row_xmask")
9057 return isGFX10Plus();
9058
9059 if (Ctrl == "wave_shl" ||
9060 Ctrl == "wave_shr" ||
9061 Ctrl == "wave_rol" ||
9062 Ctrl == "wave_ror" ||
9063 Ctrl == "row_bcast")
9064 return isVI() || isGFX9();
9065
9066 return Ctrl == "row_mirror" ||
9067 Ctrl == "row_half_mirror" ||
9068 Ctrl == "quad_perm" ||
9069 Ctrl == "row_shl" ||
9070 Ctrl == "row_shr" ||
9071 Ctrl == "row_ror";
9072}
9073
9074int64_t
9075AMDGPUAsmParser::parseDPPCtrlPerm() {
9076 // quad_perm:[%d,%d,%d,%d]
9077
9078 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9079 return -1;
9080
9081 int64_t Val = 0;
9082 for (int i = 0; i < 4; ++i) {
9083 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9084 return -1;
9085
9086 int64_t Temp;
9087 SMLoc Loc = getLoc();
9088 if (getParser().parseAbsoluteExpression(Temp))
9089 return -1;
9090 if (Temp < 0 || Temp > 3) {
9091 Error(Loc, "expected a 2-bit value");
9092 return -1;
9093 }
9094
9095 Val += (Temp << i * 2);
9096 }
9097
9098 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9099 return -1;
9100
9101 return Val;
9102}
9103
9104int64_t
9105AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9106 using namespace AMDGPU::DPP;
9107
9108 // sel:%d
9109
9110 int64_t Val;
9111 SMLoc Loc = getLoc();
9112
9113 if (getParser().parseAbsoluteExpression(Val))
9114 return -1;
9115
9116 struct DppCtrlCheck {
9117 int64_t Ctrl;
9118 int Lo;
9119 int Hi;
9120 };
9121
9122 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9123 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9124 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9125 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9126 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9127 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9128 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9129 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9130 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9131 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9132 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9133 .Default({-1, 0, 0});
9134
9135 bool Valid;
9136 if (Check.Ctrl == -1) {
9137 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
9138 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9139 } else {
9140 Valid = Check.Lo <= Val && Val <= Check.Hi;
9141 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
9142 }
9143
9144 if (!Valid) {
9145 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
9146 return -1;
9147 }
9148
9149 return Val;
9150}
9151
9152ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
9153 using namespace AMDGPU::DPP;
9154
9155 if (!isToken(AsmToken::Identifier) ||
9156 !isSupportedDPPCtrl(getTokenStr(), Operands))
9157 return ParseStatus::NoMatch;
9158
9159 SMLoc S = getLoc();
9160 int64_t Val = -1;
9162
9163 parseId(Ctrl);
9164
9165 if (Ctrl == "row_mirror") {
9166 Val = DppCtrl::ROW_MIRROR;
9167 } else if (Ctrl == "row_half_mirror") {
9168 Val = DppCtrl::ROW_HALF_MIRROR;
9169 } else {
9170 if (skipToken(AsmToken::Colon, "expected a colon")) {
9171 if (Ctrl == "quad_perm") {
9172 Val = parseDPPCtrlPerm();
9173 } else {
9174 Val = parseDPPCtrlSel(Ctrl);
9175 }
9176 }
9177 }
9178
9179 if (Val == -1)
9180 return ParseStatus::Failure;
9181
9182 Operands.push_back(
9183 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9184 return ParseStatus::Success;
9185}
9186
9187void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
9188 bool IsDPP8) {
9189 OptionalImmIndexMap OptionalIdx;
9190 unsigned Opc = Inst.getOpcode();
9191 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9192
9193 // MAC instructions are special because they have 'old'
9194 // operand which is not tied to dst (but assumed to be).
9195 // They also have dummy unused src2_modifiers.
9196 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
9197 int Src2ModIdx =
9198 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
9199 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9200 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
9201
9202 unsigned I = 1;
9203 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9204 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9205 }
9206
9207 int Fi = 0;
9208 for (unsigned E = Operands.size(); I != E; ++I) {
9209
9210 if (IsMAC) {
9211 int NumOperands = Inst.getNumOperands();
9212 if (OldIdx == NumOperands) {
9213 // Handle old operand
9214 constexpr int DST_IDX = 0;
9215 Inst.addOperand(Inst.getOperand(DST_IDX));
9216 } else if (Src2ModIdx == NumOperands) {
9217 // Add unused dummy src2_modifiers
9219 }
9220 }
9221
9222 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9223 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
9224 Inst.addOperand(Inst.getOperand(0));
9225 }
9226
9227 bool IsVOP3CvtSrDpp =
9228 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9229 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9230 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9231 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9232 if (IsVOP3CvtSrDpp) {
9233 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
9236 }
9237 }
9238
9239 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9241 if (TiedTo != -1) {
9242 assert((unsigned)TiedTo < Inst.getNumOperands());
9243 // handle tied old or src2 for MAC instructions
9244 Inst.addOperand(Inst.getOperand(TiedTo));
9245 }
9246 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9247 // Add the register arguments
9248 if (IsDPP8 && Op.isDppFI()) {
9249 Fi = Op.getImm();
9250 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9251 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9252 } else if (Op.isReg()) {
9253 Op.addRegOperands(Inst, 1);
9254 } else if (Op.isImm() &&
9255 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
9256 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
9257 Op.addImmOperands(Inst, 1);
9258 } else if (Op.isImm()) {
9259 OptionalIdx[Op.getImmTy()] = I;
9260 } else {
9261 llvm_unreachable("unhandled operand type");
9262 }
9263 }
9264
9265 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel))
9266 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9267 AMDGPUOperand::ImmTyByteSel);
9268
9269 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9270 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9271 AMDGPUOperand::ImmTyClamp);
9272
9273 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9274 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
9275
9276 if (Desc.TSFlags & SIInstrFlags::VOP3P)
9277 cvtVOP3P(Inst, Operands, OptionalIdx);
9278 else if (Desc.TSFlags & SIInstrFlags::VOP3)
9279 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
9280 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9281 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9282 }
9283
9284 if (IsDPP8) {
9285 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
9286 using namespace llvm::AMDGPU::DPP;
9287 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9288 } else {
9289 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
9290 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9291 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9292 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9293
9294 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
9295 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9296 AMDGPUOperand::ImmTyDppFI);
9297 }
9298}
9299
9300void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
9301 OptionalImmIndexMap OptionalIdx;
9302
9303 unsigned I = 1;
9304 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9305 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9306 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9307 }
9308
9309 int Fi = 0;
9310 for (unsigned E = Operands.size(); I != E; ++I) {
9311 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9313 if (TiedTo != -1) {
9314 assert((unsigned)TiedTo < Inst.getNumOperands());
9315 // handle tied old or src2 for MAC instructions
9316 Inst.addOperand(Inst.getOperand(TiedTo));
9317 }
9318 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9319 // Add the register arguments
9320 if (Op.isReg() && validateVccOperand(Op.getReg())) {
9321 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
9322 // Skip it.
9323 continue;
9324 }
9325
9326 if (IsDPP8) {
9327 if (Op.isDPP8()) {
9328 Op.addImmOperands(Inst, 1);
9329 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9330 Op.addRegWithFPInputModsOperands(Inst, 2);
9331 } else if (Op.isDppFI()) {
9332 Fi = Op.getImm();
9333 } else if (Op.isReg()) {
9334 Op.addRegOperands(Inst, 1);
9335 } else {
9336 llvm_unreachable("Invalid operand type");
9337 }
9338 } else {
9340 Op.addRegWithFPInputModsOperands(Inst, 2);
9341 } else if (Op.isReg()) {
9342 Op.addRegOperands(Inst, 1);
9343 } else if (Op.isDPPCtrl()) {
9344 Op.addImmOperands(Inst, 1);
9345 } else if (Op.isImm()) {
9346 // Handle optional arguments
9347 OptionalIdx[Op.getImmTy()] = I;
9348 } else {
9349 llvm_unreachable("Invalid operand type");
9350 }
9351 }
9352 }
9353
9354 if (IsDPP8) {
9355 using namespace llvm::AMDGPU::DPP;
9356 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9357 } else {
9358 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9359 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9360 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9361 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
9362 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9363 AMDGPUOperand::ImmTyDppFI);
9364 }
9365 }
9366}
9367
9368//===----------------------------------------------------------------------===//
9369// sdwa
9370//===----------------------------------------------------------------------===//
9371
9372ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9373 StringRef Prefix,
9374 AMDGPUOperand::ImmTy Type) {
9375 using namespace llvm::AMDGPU::SDWA;
9376
9377 SMLoc S = getLoc();
9379
9380 SMLoc StringLoc;
9381 ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
9382 if (!Res.isSuccess())
9383 return Res;
9384
9385 int64_t Int;
9387 .Case("BYTE_0", SdwaSel::BYTE_0)
9388 .Case("BYTE_1", SdwaSel::BYTE_1)
9389 .Case("BYTE_2", SdwaSel::BYTE_2)
9390 .Case("BYTE_3", SdwaSel::BYTE_3)
9391 .Case("WORD_0", SdwaSel::WORD_0)
9392 .Case("WORD_1", SdwaSel::WORD_1)
9393 .Case("DWORD", SdwaSel::DWORD)
9394 .Default(0xffffffff);
9395
9396 if (Int == 0xffffffff)
9397 return Error(StringLoc, "invalid " + Twine(Prefix) + " value");
9398
9399 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
9400 return ParseStatus::Success;
9401}
9402
9403ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9404 using namespace llvm::AMDGPU::SDWA;
9405
9406 SMLoc S = getLoc();
9408
9409 SMLoc StringLoc;
9410 ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc);
9411 if (!Res.isSuccess())
9412 return Res;
9413
9414 int64_t Int;
9416 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
9417 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
9418 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
9419 .Default(0xffffffff);
9420
9421 if (Int == 0xffffffff)
9422 return Error(StringLoc, "invalid dst_unused value");
9423
9424 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused));
9425 return ParseStatus::Success;
9426}
9427
9428void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9429 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
9430}
9431
9432void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9433 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
9434}
9435
9436void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9437 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
9438}
9439
9440void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9441 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
9442}
9443
9444void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9445 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9446}
9447
9448void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9449 uint64_t BasicInstType,
9450 bool SkipDstVcc,
9451 bool SkipSrcVcc) {
9452 using namespace llvm::AMDGPU::SDWA;
9453
9454 OptionalImmIndexMap OptionalIdx;
9455 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9456 bool SkippedVcc = false;
9457
9458 unsigned I = 1;
9459 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9460 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9461 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9462 }
9463
9464 for (unsigned E = Operands.size(); I != E; ++I) {
9465 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9466 if (SkipVcc && !SkippedVcc && Op.isReg() &&
9467 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9468 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9469 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9470 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9471 // Skip VCC only if we didn't skip it on previous iteration.
9472 // Note that src0 and src1 occupy 2 slots each because of modifiers.
9473 if (BasicInstType == SIInstrFlags::VOP2 &&
9474 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9475 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9476 SkippedVcc = true;
9477 continue;
9478 } else if (BasicInstType == SIInstrFlags::VOPC &&
9479 Inst.getNumOperands() == 0) {
9480 SkippedVcc = true;
9481 continue;
9482 }
9483 }
9485 Op.addRegOrImmWithInputModsOperands(Inst, 2);
9486 } else if (Op.isImm()) {
9487 // Handle optional arguments
9488 OptionalIdx[Op.getImmTy()] = I;
9489 } else {
9490 llvm_unreachable("Invalid operand type");
9491 }
9492 SkippedVcc = false;
9493 }
9494
9495 const unsigned Opc = Inst.getOpcode();
9496 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9497 Opc != AMDGPU::V_NOP_sdwa_vi) {
9498 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9499 switch (BasicInstType) {
9500 case SIInstrFlags::VOP1:
9501 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9502 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9503 AMDGPUOperand::ImmTyClamp, 0);
9504
9505 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9506 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9507 AMDGPUOperand::ImmTyOModSI, 0);
9508
9509 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
9510 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9511 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9512
9513 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
9514 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9515 AMDGPUOperand::ImmTySDWADstUnused,
9516 DstUnused::UNUSED_PRESERVE);
9517
9518 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9519 break;
9520
9521 case SIInstrFlags::VOP2:
9522 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9523 AMDGPUOperand::ImmTyClamp, 0);
9524
9525 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
9526 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9527
9528 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9529 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
9530 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9531 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9532 break;
9533
9534 case SIInstrFlags::VOPC:
9535 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
9536 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9537 AMDGPUOperand::ImmTyClamp, 0);
9538 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9539 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9540 break;
9541
9542 default:
9543 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9544 }
9545 }
9546
9547 // special case v_mac_{f16, f32}:
9548 // it has src2 register operand that is tied to dst operand
9549 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9550 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9551 auto it = Inst.begin();
9552 std::advance(
9553 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9554 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9555 }
9556}
9557
9558/// Force static initialization.
9562}
9563
9564#define GET_REGISTER_MATCHER
9565#define GET_MATCHER_IMPLEMENTATION
9566#define GET_MNEMONIC_SPELL_CHECKER
9567#define GET_MNEMONIC_CHECKER
9568#include "AMDGPUGenAsmMatcher.inc"
9569
9570ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9571 unsigned MCK) {
9572 switch (MCK) {
9573 case MCK_addr64:
9574 return parseTokenOp("addr64", Operands);
9575 case MCK_done:
9576 return parseTokenOp("done", Operands);
9577 case MCK_idxen:
9578 return parseTokenOp("idxen", Operands);
9579 case MCK_lds:
9580 return parseTokenOp("lds", Operands);
9581 case MCK_offen:
9582 return parseTokenOp("offen", Operands);
9583 case MCK_off:
9584 return parseTokenOp("off", Operands);
9585 case MCK_row_95_en:
9586 return parseTokenOp("row_en", Operands);
9587 case MCK_gds:
9588 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
9589 case MCK_tfe:
9590 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
9591 }
9592 return tryCustomParseOperand(Operands, MCK);
9593}
9594
9595// This function should be defined after auto-generated include so that we have
9596// MatchClassKind enum defined
9597unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9598 unsigned Kind) {
9599 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9600 // But MatchInstructionImpl() expects to meet token and fails to validate
9601 // operand. This method checks if we are given immediate operand but expect to
9602 // get corresponding token.
9603 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9604 switch (Kind) {
9605 case MCK_addr64:
9606 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9607 case MCK_gds:
9608 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9609 case MCK_lds:
9610 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9611 case MCK_idxen:
9612 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9613 case MCK_offen:
9614 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9615 case MCK_tfe:
9616 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9617 case MCK_SSrc_b32:
9618 // When operands have expression values, they will return true for isToken,
9619 // because it is not possible to distinguish between a token and an
9620 // expression at parse time. MatchInstructionImpl() will always try to
9621 // match an operand as a token, when isToken returns true, and when the
9622 // name of the expression is not a valid token, the match will fail,
9623 // so we need to handle it here.
9624 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9625 case MCK_SSrc_f32:
9626 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9627 case MCK_SOPPBrTarget:
9628 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9629 case MCK_VReg32OrOff:
9630 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9631 case MCK_InterpSlot:
9632 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9633 case MCK_InterpAttr:
9634 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9635 case MCK_InterpAttrChan:
9636 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9637 case MCK_SReg_64:
9638 case MCK_SReg_64_XEXEC:
9639 // Null is defined as a 32-bit register but
9640 // it should also be enabled with 64-bit operands.
9641 // The following code enables it for SReg_64 operands
9642 // used as source and destination. Remaining source
9643 // operands are handled in isInlinableImm.
9644 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9645 default:
9646 return Match_InvalidOperand;
9647 }
9648}
9649
9650//===----------------------------------------------------------------------===//
9651// endpgm
9652//===----------------------------------------------------------------------===//
9653
9654ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9655 SMLoc S = getLoc();
9656 int64_t Imm = 0;
9657
9658 if (!parseExpr(Imm)) {
9659 // The operand is optional, if not present default to 0
9660 Imm = 0;
9661 }
9662
9663 if (!isUInt<16>(Imm))
9664 return Error(S, "expected a 16-bit value");
9665
9666 Operands.push_back(
9667 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9668 return ParseStatus::Success;
9669}
9670
9671bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9672
9673//===----------------------------------------------------------------------===//
9674// Split Barrier
9675//===----------------------------------------------------------------------===//
9676
9677bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static unsigned getSpecialRegForName(StringRef RegName)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, unsigned OpName)
static bool IsRevOpcode(const unsigned Opcode)
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_READNONE
Definition: Compiler.h:220
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
@ Default
Definition: DwarfDebug.cpp:87
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
Definition: InlineInfo.cpp:180
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
unsigned unsigned DefaultVal
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
static bool Enabled
Definition: Statistic.cpp:46
StringSet - A set-like wrapper for the StringMap.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
BinaryOperator * Mul
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
support::ulittle16_t & Hi
Definition: aarch32.cpp:205
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
Definition: AMDGPUMCExpr.h:69
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
Definition: AMDGPUMCExpr.h:83
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5297
Class for arbitrary precision integers.
Definition: APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmMacro.h:110
bool is(TokenKind K) const
Definition: MCAsmMacro.h:82
TokenKind getKind() const
Definition: MCAsmMacro.h:81
This class represents an Operation in the Expression.
Register getReg() const
Base class for user error types.
Definition: Error.h:355
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:481
Class representing an expression and its matching format.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:123
virtual MCStreamer & getStreamer()=0
Return the output streamer for the assembler.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:532
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:542
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:617
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:193
Context object for machine code objects.
Definition: MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:414
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:213
const MCSubtargetInfo * getSubtargetInfo() const
Definition: MCContext.h:418
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
SMLoc getLoc() const
Definition: MCInst.h:204
void setLoc(SMLoc loc)
Definition: MCInst.h:203
unsigned getOpcode() const
Definition: MCInst.h:198
iterator insert(iterator I, const MCOperand &Op)
Definition: MCInst.h:224
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
iterator begin()
Definition: MCInst.h:219
size_t size() const
Definition: MCInst.h:218
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:444
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
void setImm(int64_t Val)
Definition: MCInst.h:85
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
void setReg(unsigned Reg)
Set the register number.
Definition: MCInst.h:75
int64_t getImm() const
Definition: MCInst.h:80
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
bool isImm() const
Definition: MCInst.h:62
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
bool isReg() const
Definition: MCInst.h:61
bool isExpr() const
Definition: MCInst.h:65
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual bool isReg() const =0
isReg - Is this a register operand?
virtual bool isMem() const =0
isMem - Is this a memory operand?
virtual MCRegister getReg() const =0
virtual bool isToken() const =0
isToken - Is this a token operand?
virtual bool isImm() const =0
isImm - Is this an immediate operand?
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
Streaming machine code generation interface.
Definition: MCStreamer.h:213
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Definition: MCStreamer.h:309
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
void setVariableValue(const MCExpr *Value)
Definition: MCSymbol.cpp:47
MCTargetAsmParser - Generic interface to target specific assembly parsers.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
const MCInstrInfo & MII
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn't/can't handle effec...
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
ParseInstruction - Parse one assembly instruction.
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
MatchAndEmitInstruction - Recognize a series of operands of a parsed instruction as an actual MCInst ...
Target specific streamer interface.
Definition: MCStreamer.h:94
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Represents a location in source code.
Definition: SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
constexpr const char * getPointer() const
Definition: SMLoc.h:34
constexpr bool isValid() const
Definition: SMLoc.h:29
Represents a range in source code.
Definition: SMLoc.h:48
SMLoc Start
Definition: SMLoc.h:50
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:290
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Register getReg() const
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:838
StringMapEntry - This is used to represent one value that is inserted into a StringMap.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition: StringRef.h:640
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:250
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:594
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:262
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
bool contains(StringRef key) const
Check if the set contains the given key.
Definition: StringSet.h:55
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
Key
PAL metadata keys.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool isGFX9(const MCSubtargetInfo &STI)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition: SIDefines.h:201
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:211
@ OPERAND_REG_INLINE_C_V2INT32
Definition: SIDefines.h:227
@ OPERAND_REG_INLINE_C_FP64
Definition: SIDefines.h:223
@ OPERAND_REG_INLINE_C_BF16
Definition: SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2BF16
Definition: SIDefines.h:225
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:212
@ OPERAND_REG_IMM_BF16
Definition: SIDefines.h:205
@ OPERAND_REG_INLINE_AC_V2FP16
Definition: SIDefines.h:246
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
Definition: SIDefines.h:200
@ OPERAND_REG_IMM_V2BF16
Definition: SIDefines.h:210
@ OPERAND_REG_IMM_BF16_DEFERRED
Definition: SIDefines.h:207
@ OPERAND_REG_IMM_FP16
Definition: SIDefines.h:206
@ OPERAND_REG_INLINE_C_INT64
Definition: SIDefines.h:219
@ OPERAND_REG_INLINE_AC_BF16
Definition: SIDefines.h:240
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition: SIDefines.h:217
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
Definition: SIDefines.h:238
@ OPERAND_REG_IMM_FP64
Definition: SIDefines.h:204
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:226
@ OPERAND_REG_INLINE_AC_V2INT16
Definition: SIDefines.h:244
@ OPERAND_REG_INLINE_AC_FP16
Definition: SIDefines.h:241
@ OPERAND_REG_INLINE_AC_INT32
Definition: SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition: SIDefines.h:242
@ OPERAND_REG_INLINE_AC_V2BF16
Definition: SIDefines.h:245
@ OPERAND_REG_IMM_V2INT32
Definition: SIDefines.h:213
@ OPERAND_REG_IMM_FP32
Definition: SIDefines.h:203
@ OPERAND_INPUT_MODS
Definition: SIDefines.h:251
@ OPERAND_REG_INLINE_C_FP32
Definition: SIDefines.h:222
@ OPERAND_REG_INLINE_C_INT32
Definition: SIDefines.h:218
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:224
@ OPERAND_REG_IMM_V2FP32
Definition: SIDefines.h:214
@ OPERAND_REG_INLINE_AC_FP64
Definition: SIDefines.h:243
@ OPERAND_REG_INLINE_C_FP16
Definition: SIDefines.h:221
@ OPERAND_REG_IMM_INT16
Definition: SIDefines.h:202
@ OPERAND_REG_INLINE_C_V2FP32
Definition: SIDefines.h:228
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition: SIDefines.h:231
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isVI(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1344
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
const uint64_t Version
Definition: InstrProf.h:1107
@ OPERAND_IMMEDIATE
Definition: MCInstrDesc.h:60
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Reg
All possible values of the reg field in the ModR/M byte.
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
Format
The format used for serializing/deserializing remarks.
Definition: RemarkFormat.h:25
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition: Error.h:1099
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition: Alignment.h:217
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:255
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
void PrintError(const Twine &Msg)
Definition: Error.cpp:101
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
Target & getTheR600Target()
The target for R600 GPUs.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:154
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:193
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:159
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:403
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:260
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
unsigned M0(unsigned Val)
Definition: VE.h:375
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
#define N
RegisterKind Kind
StringLiteral Name
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
Definition: TargetParser.h:127
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
Represents the counter values to wait for in an s_waitcnt instruction.
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:271
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:269
static const fltSemantics & BFloat() LLVM_READNONE
Definition: APFloat.cpp:270
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:262
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...