LLVM 20.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
14#include "SIDefines.h"
15#include "SIInstrInfo.h"
16#include "SIRegisterInfo.h"
21#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/StringSet.h"
24#include "llvm/ADT/Twine.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCContext.h"
29#include "llvm/MC/MCExpr.h"
30#include "llvm/MC/MCInst.h"
31#include "llvm/MC/MCInstrDesc.h"
36#include "llvm/MC/MCSymbol.h"
43#include <optional>
44
45using namespace llvm;
46using namespace llvm::AMDGPU;
47using namespace llvm::amdhsa;
48
49namespace {
50
51class AMDGPUAsmParser;
52
53enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
54
55//===----------------------------------------------------------------------===//
56// Operand
57//===----------------------------------------------------------------------===//
58
59class AMDGPUOperand : public MCParsedAsmOperand {
60 enum KindTy {
61 Token,
62 Immediate,
65 } Kind;
66
67 SMLoc StartLoc, EndLoc;
68 const AMDGPUAsmParser *AsmParser;
69
70public:
71 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
72 : Kind(Kind_), AsmParser(AsmParser_) {}
73
74 using Ptr = std::unique_ptr<AMDGPUOperand>;
75
76 struct Modifiers {
77 bool Abs = false;
78 bool Neg = false;
79 bool Sext = false;
80 bool Lit = false;
81
82 bool hasFPModifiers() const { return Abs || Neg; }
83 bool hasIntModifiers() const { return Sext; }
84 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
85
86 int64_t getFPModifiersOperand() const {
87 int64_t Operand = 0;
88 Operand |= Abs ? SISrcMods::ABS : 0u;
89 Operand |= Neg ? SISrcMods::NEG : 0u;
90 return Operand;
91 }
92
93 int64_t getIntModifiersOperand() const {
94 int64_t Operand = 0;
95 Operand |= Sext ? SISrcMods::SEXT : 0u;
96 return Operand;
97 }
98
99 int64_t getModifiersOperand() const {
100 assert(!(hasFPModifiers() && hasIntModifiers())
101 && "fp and int modifiers should not be used simultaneously");
102 if (hasFPModifiers())
103 return getFPModifiersOperand();
104 if (hasIntModifiers())
105 return getIntModifiersOperand();
106 return 0;
107 }
108
109 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
110 };
111
112 enum ImmTy {
113 ImmTyNone,
114 ImmTyGDS,
115 ImmTyLDS,
116 ImmTyOffen,
117 ImmTyIdxen,
118 ImmTyAddr64,
119 ImmTyOffset,
120 ImmTyInstOffset,
121 ImmTyOffset0,
122 ImmTyOffset1,
123 ImmTySMEMOffsetMod,
124 ImmTyCPol,
125 ImmTyTFE,
126 ImmTyD16,
127 ImmTyClamp,
128 ImmTyOModSI,
129 ImmTySDWADstSel,
130 ImmTySDWASrc0Sel,
131 ImmTySDWASrc1Sel,
132 ImmTySDWADstUnused,
133 ImmTyDMask,
134 ImmTyDim,
135 ImmTyUNorm,
136 ImmTyDA,
137 ImmTyR128A16,
138 ImmTyA16,
139 ImmTyLWE,
140 ImmTyExpTgt,
141 ImmTyExpCompr,
142 ImmTyExpVM,
143 ImmTyFORMAT,
144 ImmTyHwreg,
145 ImmTyOff,
146 ImmTySendMsg,
147 ImmTyInterpSlot,
148 ImmTyInterpAttr,
149 ImmTyInterpAttrChan,
150 ImmTyOpSel,
151 ImmTyOpSelHi,
152 ImmTyNegLo,
153 ImmTyNegHi,
154 ImmTyIndexKey8bit,
155 ImmTyIndexKey16bit,
156 ImmTyDPP8,
157 ImmTyDppCtrl,
158 ImmTyDppRowMask,
159 ImmTyDppBankMask,
160 ImmTyDppBoundCtrl,
161 ImmTyDppFI,
162 ImmTySwizzle,
163 ImmTyGprIdxMode,
164 ImmTyHigh,
165 ImmTyBLGP,
166 ImmTyCBSZ,
167 ImmTyABID,
168 ImmTyEndpgm,
169 ImmTyWaitVDST,
170 ImmTyWaitEXP,
171 ImmTyWaitVAVDst,
172 ImmTyWaitVMVSrc,
173 ImmTyByteSel,
174 };
175
176 // Immediate operand kind.
177 // It helps to identify the location of an offending operand after an error.
178 // Note that regular literals and mandatory literals (KImm) must be handled
179 // differently. When looking for an offending operand, we should usually
180 // ignore mandatory literals because they are part of the instruction and
181 // cannot be changed. Report location of mandatory operands only for VOPD,
182 // when both OpX and OpY have a KImm and there are no other literals.
183 enum ImmKindTy {
184 ImmKindTyNone,
185 ImmKindTyLiteral,
186 ImmKindTyMandatoryLiteral,
187 ImmKindTyConst,
188 };
189
190private:
191 struct TokOp {
192 const char *Data;
193 unsigned Length;
194 };
195
196 struct ImmOp {
197 int64_t Val;
198 ImmTy Type;
199 bool IsFPImm;
200 mutable ImmKindTy Kind;
201 Modifiers Mods;
202 };
203
204 struct RegOp {
205 unsigned RegNo;
206 Modifiers Mods;
207 };
208
209 union {
210 TokOp Tok;
211 ImmOp Imm;
212 RegOp Reg;
213 const MCExpr *Expr;
214 };
215
216public:
217 bool isToken() const override { return Kind == Token; }
218
219 bool isSymbolRefExpr() const {
220 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
221 }
222
223 bool isImm() const override {
224 return Kind == Immediate;
225 }
226
227 void setImmKindNone() const {
228 assert(isImm());
229 Imm.Kind = ImmKindTyNone;
230 }
231
232 void setImmKindLiteral() const {
233 assert(isImm());
234 Imm.Kind = ImmKindTyLiteral;
235 }
236
237 void setImmKindMandatoryLiteral() const {
238 assert(isImm());
239 Imm.Kind = ImmKindTyMandatoryLiteral;
240 }
241
242 void setImmKindConst() const {
243 assert(isImm());
244 Imm.Kind = ImmKindTyConst;
245 }
246
247 bool IsImmKindLiteral() const {
248 return isImm() && Imm.Kind == ImmKindTyLiteral;
249 }
250
251 bool IsImmKindMandatoryLiteral() const {
252 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
253 }
254
255 bool isImmKindConst() const {
256 return isImm() && Imm.Kind == ImmKindTyConst;
257 }
258
259 bool isInlinableImm(MVT type) const;
260 bool isLiteralImm(MVT type) const;
261
262 bool isRegKind() const {
263 return Kind == Register;
264 }
265
266 bool isReg() const override {
267 return isRegKind() && !hasModifiers();
268 }
269
270 bool isRegOrInline(unsigned RCID, MVT type) const {
271 return isRegClass(RCID) || isInlinableImm(type);
272 }
273
274 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
275 return isRegOrInline(RCID, type) || isLiteralImm(type);
276 }
277
278 bool isRegOrImmWithInt16InputMods() const {
279 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
280 }
281
282 bool isRegOrImmWithIntT16InputMods() const {
283 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
284 }
285
286 bool isRegOrImmWithInt32InputMods() const {
287 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
288 }
289
290 bool isRegOrInlineImmWithInt16InputMods() const {
291 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
292 }
293
294 bool isRegOrInlineImmWithInt32InputMods() const {
295 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
296 }
297
298 bool isRegOrImmWithInt64InputMods() const {
299 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
300 }
301
302 bool isRegOrImmWithFP16InputMods() const {
303 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
304 }
305
306 bool isRegOrImmWithFPT16InputMods() const {
307 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
308 }
309
310 bool isRegOrImmWithFP32InputMods() const {
311 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
312 }
313
314 bool isRegOrImmWithFP64InputMods() const {
315 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
316 }
317
318 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
319 return isRegOrInline(
320 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
321 }
322
323 bool isRegOrInlineImmWithFP32InputMods() const {
324 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
325 }
326
327 bool isPackedFP16InputMods() const {
328 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
329 }
330
331 bool isVReg() const {
332 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
333 isRegClass(AMDGPU::VReg_64RegClassID) ||
334 isRegClass(AMDGPU::VReg_96RegClassID) ||
335 isRegClass(AMDGPU::VReg_128RegClassID) ||
336 isRegClass(AMDGPU::VReg_160RegClassID) ||
337 isRegClass(AMDGPU::VReg_192RegClassID) ||
338 isRegClass(AMDGPU::VReg_256RegClassID) ||
339 isRegClass(AMDGPU::VReg_512RegClassID) ||
340 isRegClass(AMDGPU::VReg_1024RegClassID);
341 }
342
343 bool isVReg32() const {
344 return isRegClass(AMDGPU::VGPR_32RegClassID);
345 }
346
347 bool isVReg32OrOff() const {
348 return isOff() || isVReg32();
349 }
350
351 bool isNull() const {
352 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
353 }
354
355 bool isVRegWithInputMods() const;
356 template <bool IsFake16> bool isT16VRegWithInputMods() const;
357
358 bool isSDWAOperand(MVT type) const;
359 bool isSDWAFP16Operand() const;
360 bool isSDWAFP32Operand() const;
361 bool isSDWAInt16Operand() const;
362 bool isSDWAInt32Operand() const;
363
364 bool isImmTy(ImmTy ImmT) const {
365 return isImm() && Imm.Type == ImmT;
366 }
367
368 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
369
370 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
371
372 bool isImmModifier() const {
373 return isImm() && Imm.Type != ImmTyNone;
374 }
375
376 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
377 bool isDim() const { return isImmTy(ImmTyDim); }
378 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
379 bool isOff() const { return isImmTy(ImmTyOff); }
380 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
381 bool isOffen() const { return isImmTy(ImmTyOffen); }
382 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
383 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
384 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
385 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
386 bool isGDS() const { return isImmTy(ImmTyGDS); }
387 bool isLDS() const { return isImmTy(ImmTyLDS); }
388 bool isCPol() const { return isImmTy(ImmTyCPol); }
389 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
390 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
391 bool isTFE() const { return isImmTy(ImmTyTFE); }
392 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
393 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
394 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
395 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
396 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
397 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
398 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
399 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
400 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
401 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
402 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
403 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
404 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
405
406 bool isRegOrImm() const {
407 return isReg() || isImm();
408 }
409
410 bool isRegClass(unsigned RCID) const;
411
412 bool isInlineValue() const;
413
414 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
415 return isRegOrInline(RCID, type) && !hasModifiers();
416 }
417
418 bool isSCSrcB16() const {
419 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
420 }
421
422 bool isSCSrcV2B16() const {
423 return isSCSrcB16();
424 }
425
426 bool isSCSrc_b32() const {
427 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
428 }
429
430 bool isSCSrc_b64() const {
431 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
432 }
433
434 bool isBoolReg() const;
435
436 bool isSCSrcF16() const {
437 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
438 }
439
440 bool isSCSrcV2F16() const {
441 return isSCSrcF16();
442 }
443
444 bool isSCSrcF32() const {
445 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
446 }
447
448 bool isSCSrcF64() const {
449 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
450 }
451
452 bool isSSrc_b32() const {
453 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
454 }
455
456 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
457
458 bool isSSrcV2B16() const {
459 llvm_unreachable("cannot happen");
460 return isSSrc_b16();
461 }
462
463 bool isSSrc_b64() const {
464 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
465 // See isVSrc64().
466 return isSCSrc_b64() || isLiteralImm(MVT::i64);
467 }
468
469 bool isSSrc_f32() const {
470 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
471 }
472
473 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
474
475 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
476
477 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
478
479 bool isSSrcV2F16() const {
480 llvm_unreachable("cannot happen");
481 return isSSrc_f16();
482 }
483
484 bool isSSrcV2FP32() const {
485 llvm_unreachable("cannot happen");
486 return isSSrc_f32();
487 }
488
489 bool isSCSrcV2FP32() const {
490 llvm_unreachable("cannot happen");
491 return isSCSrcF32();
492 }
493
494 bool isSSrcV2INT32() const {
495 llvm_unreachable("cannot happen");
496 return isSSrc_b32();
497 }
498
499 bool isSCSrcV2INT32() const {
500 llvm_unreachable("cannot happen");
501 return isSCSrc_b32();
502 }
503
504 bool isSSrcOrLds_b32() const {
505 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
506 isLiteralImm(MVT::i32) || isExpr();
507 }
508
509 bool isVCSrc_b32() const {
510 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
511 }
512
513 bool isVCSrcB64() const {
514 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
515 }
516
517 bool isVCSrcTB16() const {
518 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
519 }
520
521 bool isVCSrcTB16_Lo128() const {
522 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
523 }
524
525 bool isVCSrcFake16B16_Lo128() const {
526 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
527 }
528
529 bool isVCSrc_b16() const {
530 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
531 }
532
533 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
534
535 bool isVCSrc_f32() const {
536 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
537 }
538
539 bool isVCSrcF64() const {
540 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
541 }
542
543 bool isVCSrcTBF16() const {
544 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
545 }
546
547 bool isVCSrcTF16() const {
548 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
549 }
550
551 bool isVCSrcTBF16_Lo128() const {
552 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
553 }
554
555 bool isVCSrcTF16_Lo128() const {
556 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
557 }
558
559 bool isVCSrcFake16BF16_Lo128() const {
560 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
561 }
562
563 bool isVCSrcFake16F16_Lo128() const {
564 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
565 }
566
567 bool isVCSrc_bf16() const {
568 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
569 }
570
571 bool isVCSrc_f16() const {
572 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
573 }
574
575 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
576
577 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
578
579 bool isVSrc_b32() const {
580 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
581 }
582
583 bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
584
585 bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
586
587 bool isVSrcT_b16_Lo128() const {
588 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
589 }
590
591 bool isVSrcFake16_b16_Lo128() const {
592 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
593 }
594
595 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
596
597 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
598
599 bool isVCSrcV2FP32() const {
600 return isVCSrcF64();
601 }
602
603 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
604
605 bool isVCSrcV2INT32() const {
606 return isVCSrcB64();
607 }
608
609 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
610
611 bool isVSrc_f32() const {
612 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
613 }
614
615 bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
616
617 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
618
619 bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
620
621 bool isVSrcT_bf16_Lo128() const {
622 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
623 }
624
625 bool isVSrcT_f16_Lo128() const {
626 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
627 }
628
629 bool isVSrcFake16_bf16_Lo128() const {
630 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
631 }
632
633 bool isVSrcFake16_f16_Lo128() const {
634 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
635 }
636
637 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
638
639 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
640
641 bool isVSrc_v2bf16() const {
642 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
643 }
644
645 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
646
647 bool isVISrcB32() const {
648 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
649 }
650
651 bool isVISrcB16() const {
652 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
653 }
654
655 bool isVISrcV2B16() const {
656 return isVISrcB16();
657 }
658
659 bool isVISrcF32() const {
660 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
661 }
662
663 bool isVISrcF16() const {
664 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
665 }
666
667 bool isVISrcV2F16() const {
668 return isVISrcF16() || isVISrcB32();
669 }
670
671 bool isVISrc_64_bf16() const {
672 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
673 }
674
675 bool isVISrc_64_f16() const {
676 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
677 }
678
679 bool isVISrc_64_b32() const {
680 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
681 }
682
683 bool isVISrc_64B64() const {
684 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
685 }
686
687 bool isVISrc_64_f64() const {
688 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
689 }
690
691 bool isVISrc_64V2FP32() const {
692 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
693 }
694
695 bool isVISrc_64V2INT32() const {
696 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
697 }
698
699 bool isVISrc_256_b32() const {
700 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
701 }
702
703 bool isVISrc_256_f32() const {
704 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
705 }
706
707 bool isVISrc_256B64() const {
708 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
709 }
710
711 bool isVISrc_256_f64() const {
712 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
713 }
714
715 bool isVISrc_128B16() const {
716 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
717 }
718
719 bool isVISrc_128V2B16() const {
720 return isVISrc_128B16();
721 }
722
723 bool isVISrc_128_b32() const {
724 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
725 }
726
727 bool isVISrc_128_f32() const {
728 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
729 }
730
731 bool isVISrc_256V2FP32() const {
732 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
733 }
734
735 bool isVISrc_256V2INT32() const {
736 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
737 }
738
739 bool isVISrc_512_b32() const {
740 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
741 }
742
743 bool isVISrc_512B16() const {
744 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
745 }
746
747 bool isVISrc_512V2B16() const {
748 return isVISrc_512B16();
749 }
750
751 bool isVISrc_512_f32() const {
752 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
753 }
754
755 bool isVISrc_512F16() const {
756 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
757 }
758
759 bool isVISrc_512V2F16() const {
760 return isVISrc_512F16() || isVISrc_512_b32();
761 }
762
763 bool isVISrc_1024_b32() const {
764 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
765 }
766
767 bool isVISrc_1024B16() const {
768 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
769 }
770
771 bool isVISrc_1024V2B16() const {
772 return isVISrc_1024B16();
773 }
774
775 bool isVISrc_1024_f32() const {
776 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
777 }
778
779 bool isVISrc_1024F16() const {
780 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
781 }
782
783 bool isVISrc_1024V2F16() const {
784 return isVISrc_1024F16() || isVISrc_1024_b32();
785 }
786
787 bool isAISrcB32() const {
788 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
789 }
790
791 bool isAISrcB16() const {
792 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
793 }
794
795 bool isAISrcV2B16() const {
796 return isAISrcB16();
797 }
798
799 bool isAISrcF32() const {
800 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
801 }
802
803 bool isAISrcF16() const {
804 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
805 }
806
807 bool isAISrcV2F16() const {
808 return isAISrcF16() || isAISrcB32();
809 }
810
811 bool isAISrc_64B64() const {
812 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
813 }
814
815 bool isAISrc_64_f64() const {
816 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
817 }
818
819 bool isAISrc_128_b32() const {
820 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
821 }
822
823 bool isAISrc_128B16() const {
824 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
825 }
826
827 bool isAISrc_128V2B16() const {
828 return isAISrc_128B16();
829 }
830
831 bool isAISrc_128_f32() const {
832 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
833 }
834
835 bool isAISrc_128F16() const {
836 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
837 }
838
839 bool isAISrc_128V2F16() const {
840 return isAISrc_128F16() || isAISrc_128_b32();
841 }
842
843 bool isVISrc_128_bf16() const {
844 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
845 }
846
847 bool isVISrc_128_f16() const {
848 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
849 }
850
851 bool isVISrc_128V2F16() const {
852 return isVISrc_128_f16() || isVISrc_128_b32();
853 }
854
855 bool isAISrc_256B64() const {
856 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
857 }
858
859 bool isAISrc_256_f64() const {
860 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
861 }
862
863 bool isAISrc_512_b32() const {
864 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
865 }
866
867 bool isAISrc_512B16() const {
868 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
869 }
870
871 bool isAISrc_512V2B16() const {
872 return isAISrc_512B16();
873 }
874
875 bool isAISrc_512_f32() const {
876 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
877 }
878
879 bool isAISrc_512F16() const {
880 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
881 }
882
883 bool isAISrc_512V2F16() const {
884 return isAISrc_512F16() || isAISrc_512_b32();
885 }
886
887 bool isAISrc_1024_b32() const {
888 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
889 }
890
891 bool isAISrc_1024B16() const {
892 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
893 }
894
895 bool isAISrc_1024V2B16() const {
896 return isAISrc_1024B16();
897 }
898
899 bool isAISrc_1024_f32() const {
900 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
901 }
902
903 bool isAISrc_1024F16() const {
904 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
905 }
906
907 bool isAISrc_1024V2F16() const {
908 return isAISrc_1024F16() || isAISrc_1024_b32();
909 }
910
911 bool isKImmFP32() const {
912 return isLiteralImm(MVT::f32);
913 }
914
915 bool isKImmFP16() const {
916 return isLiteralImm(MVT::f16);
917 }
918
919 bool isMem() const override {
920 return false;
921 }
922
923 bool isExpr() const {
924 return Kind == Expression;
925 }
926
927 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
928
929 bool isSWaitCnt() const;
930 bool isDepCtr() const;
931 bool isSDelayALU() const;
932 bool isHwreg() const;
933 bool isSendMsg() const;
934 bool isSplitBarrier() const;
935 bool isSwizzle() const;
936 bool isSMRDOffset8() const;
937 bool isSMEMOffset() const;
938 bool isSMRDLiteralOffset() const;
939 bool isDPP8() const;
940 bool isDPPCtrl() const;
941 bool isBLGP() const;
942 bool isGPRIdxMode() const;
943 bool isS16Imm() const;
944 bool isU16Imm() const;
945 bool isEndpgm() const;
946
947 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
948 return [=](){ return P(*this); };
949 }
950
951 StringRef getToken() const {
952 assert(isToken());
953 return StringRef(Tok.Data, Tok.Length);
954 }
955
956 int64_t getImm() const {
957 assert(isImm());
958 return Imm.Val;
959 }
960
961 void setImm(int64_t Val) {
962 assert(isImm());
963 Imm.Val = Val;
964 }
965
966 ImmTy getImmTy() const {
967 assert(isImm());
968 return Imm.Type;
969 }
970
971 MCRegister getReg() const override {
972 assert(isRegKind());
973 return Reg.RegNo;
974 }
975
976 SMLoc getStartLoc() const override {
977 return StartLoc;
978 }
979
980 SMLoc getEndLoc() const override {
981 return EndLoc;
982 }
983
984 SMRange getLocRange() const {
985 return SMRange(StartLoc, EndLoc);
986 }
987
988 Modifiers getModifiers() const {
989 assert(isRegKind() || isImmTy(ImmTyNone));
990 return isRegKind() ? Reg.Mods : Imm.Mods;
991 }
992
993 void setModifiers(Modifiers Mods) {
994 assert(isRegKind() || isImmTy(ImmTyNone));
995 if (isRegKind())
996 Reg.Mods = Mods;
997 else
998 Imm.Mods = Mods;
999 }
1000
1001 bool hasModifiers() const {
1002 return getModifiers().hasModifiers();
1003 }
1004
1005 bool hasFPModifiers() const {
1006 return getModifiers().hasFPModifiers();
1007 }
1008
1009 bool hasIntModifiers() const {
1010 return getModifiers().hasIntModifiers();
1011 }
1012
1013 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1014
1015 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1016
1017 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1018
1019 void addRegOperands(MCInst &Inst, unsigned N) const;
1020
1021 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1022 if (isRegKind())
1023 addRegOperands(Inst, N);
1024 else
1025 addImmOperands(Inst, N);
1026 }
1027
1028 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1029 Modifiers Mods = getModifiers();
1030 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1031 if (isRegKind()) {
1032 addRegOperands(Inst, N);
1033 } else {
1034 addImmOperands(Inst, N, false);
1035 }
1036 }
1037
1038 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1039 assert(!hasIntModifiers());
1040 addRegOrImmWithInputModsOperands(Inst, N);
1041 }
1042
1043 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1044 assert(!hasFPModifiers());
1045 addRegOrImmWithInputModsOperands(Inst, N);
1046 }
1047
1048 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1049 Modifiers Mods = getModifiers();
1050 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1051 assert(isRegKind());
1052 addRegOperands(Inst, N);
1053 }
1054
1055 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1056 assert(!hasIntModifiers());
1057 addRegWithInputModsOperands(Inst, N);
1058 }
1059
1060 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1061 assert(!hasFPModifiers());
1062 addRegWithInputModsOperands(Inst, N);
1063 }
1064
1065 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1066 // clang-format off
1067 switch (Type) {
1068 case ImmTyNone: OS << "None"; break;
1069 case ImmTyGDS: OS << "GDS"; break;
1070 case ImmTyLDS: OS << "LDS"; break;
1071 case ImmTyOffen: OS << "Offen"; break;
1072 case ImmTyIdxen: OS << "Idxen"; break;
1073 case ImmTyAddr64: OS << "Addr64"; break;
1074 case ImmTyOffset: OS << "Offset"; break;
1075 case ImmTyInstOffset: OS << "InstOffset"; break;
1076 case ImmTyOffset0: OS << "Offset0"; break;
1077 case ImmTyOffset1: OS << "Offset1"; break;
1078 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1079 case ImmTyCPol: OS << "CPol"; break;
1080 case ImmTyIndexKey8bit: OS << "index_key"; break;
1081 case ImmTyIndexKey16bit: OS << "index_key"; break;
1082 case ImmTyTFE: OS << "TFE"; break;
1083 case ImmTyD16: OS << "D16"; break;
1084 case ImmTyFORMAT: OS << "FORMAT"; break;
1085 case ImmTyClamp: OS << "Clamp"; break;
1086 case ImmTyOModSI: OS << "OModSI"; break;
1087 case ImmTyDPP8: OS << "DPP8"; break;
1088 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1089 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1090 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1091 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1092 case ImmTyDppFI: OS << "DppFI"; break;
1093 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1094 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1095 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1096 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1097 case ImmTyDMask: OS << "DMask"; break;
1098 case ImmTyDim: OS << "Dim"; break;
1099 case ImmTyUNorm: OS << "UNorm"; break;
1100 case ImmTyDA: OS << "DA"; break;
1101 case ImmTyR128A16: OS << "R128A16"; break;
1102 case ImmTyA16: OS << "A16"; break;
1103 case ImmTyLWE: OS << "LWE"; break;
1104 case ImmTyOff: OS << "Off"; break;
1105 case ImmTyExpTgt: OS << "ExpTgt"; break;
1106 case ImmTyExpCompr: OS << "ExpCompr"; break;
1107 case ImmTyExpVM: OS << "ExpVM"; break;
1108 case ImmTyHwreg: OS << "Hwreg"; break;
1109 case ImmTySendMsg: OS << "SendMsg"; break;
1110 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1111 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1112 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1113 case ImmTyOpSel: OS << "OpSel"; break;
1114 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1115 case ImmTyNegLo: OS << "NegLo"; break;
1116 case ImmTyNegHi: OS << "NegHi"; break;
1117 case ImmTySwizzle: OS << "Swizzle"; break;
1118 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1119 case ImmTyHigh: OS << "High"; break;
1120 case ImmTyBLGP: OS << "BLGP"; break;
1121 case ImmTyCBSZ: OS << "CBSZ"; break;
1122 case ImmTyABID: OS << "ABID"; break;
1123 case ImmTyEndpgm: OS << "Endpgm"; break;
1124 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1125 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1126 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1127 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1128 case ImmTyByteSel: OS << "ByteSel" ; break;
1129 }
1130 // clang-format on
1131 }
1132
1133 void print(raw_ostream &OS) const override {
1134 switch (Kind) {
1135 case Register:
1136 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1137 break;
1138 case Immediate:
1139 OS << '<' << getImm();
1140 if (getImmTy() != ImmTyNone) {
1141 OS << " type: "; printImmTy(OS, getImmTy());
1142 }
1143 OS << " mods: " << Imm.Mods << '>';
1144 break;
1145 case Token:
1146 OS << '\'' << getToken() << '\'';
1147 break;
1148 case Expression:
1149 OS << "<expr " << *Expr << '>';
1150 break;
1151 }
1152 }
1153
1154 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1155 int64_t Val, SMLoc Loc,
1156 ImmTy Type = ImmTyNone,
1157 bool IsFPImm = false) {
1158 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1159 Op->Imm.Val = Val;
1160 Op->Imm.IsFPImm = IsFPImm;
1161 Op->Imm.Kind = ImmKindTyNone;
1162 Op->Imm.Type = Type;
1163 Op->Imm.Mods = Modifiers();
1164 Op->StartLoc = Loc;
1165 Op->EndLoc = Loc;
1166 return Op;
1167 }
1168
1169 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1170 StringRef Str, SMLoc Loc,
1171 bool HasExplicitEncodingSize = true) {
1172 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1173 Res->Tok.Data = Str.data();
1174 Res->Tok.Length = Str.size();
1175 Res->StartLoc = Loc;
1176 Res->EndLoc = Loc;
1177 return Res;
1178 }
1179
1180 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1181 unsigned RegNo, SMLoc S,
1182 SMLoc E) {
1183 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1184 Op->Reg.RegNo = RegNo;
1185 Op->Reg.Mods = Modifiers();
1186 Op->StartLoc = S;
1187 Op->EndLoc = E;
1188 return Op;
1189 }
1190
1191 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1192 const class MCExpr *Expr, SMLoc S) {
1193 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1194 Op->Expr = Expr;
1195 Op->StartLoc = S;
1196 Op->EndLoc = S;
1197 return Op;
1198 }
1199};
1200
1201raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1202 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1203 return OS;
1204}
1205
1206//===----------------------------------------------------------------------===//
1207// AsmParser
1208//===----------------------------------------------------------------------===//
1209
1210// Holds info related to the current kernel, e.g. count of SGPRs used.
1211// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1212// .amdgpu_hsa_kernel or at EOF.
1213class KernelScopeInfo {
1214 int SgprIndexUnusedMin = -1;
1215 int VgprIndexUnusedMin = -1;
1216 int AgprIndexUnusedMin = -1;
1217 MCContext *Ctx = nullptr;
1218 MCSubtargetInfo const *MSTI = nullptr;
1219
1220 void usesSgprAt(int i) {
1221 if (i >= SgprIndexUnusedMin) {
1222 SgprIndexUnusedMin = ++i;
1223 if (Ctx) {
1224 MCSymbol* const Sym =
1225 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1226 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1227 }
1228 }
1229 }
1230
1231 void usesVgprAt(int i) {
1232 if (i >= VgprIndexUnusedMin) {
1233 VgprIndexUnusedMin = ++i;
1234 if (Ctx) {
1235 MCSymbol* const Sym =
1236 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1237 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1238 VgprIndexUnusedMin);
1239 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1240 }
1241 }
1242 }
1243
1244 void usesAgprAt(int i) {
1245 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1246 if (!hasMAIInsts(*MSTI))
1247 return;
1248
1249 if (i >= AgprIndexUnusedMin) {
1250 AgprIndexUnusedMin = ++i;
1251 if (Ctx) {
1252 MCSymbol* const Sym =
1253 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1254 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1255
1256 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1257 MCSymbol* const vSym =
1258 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1259 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1260 VgprIndexUnusedMin);
1261 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1262 }
1263 }
1264 }
1265
1266public:
1267 KernelScopeInfo() = default;
1268
1269 void initialize(MCContext &Context) {
1270 Ctx = &Context;
1271 MSTI = Ctx->getSubtargetInfo();
1272
1273 usesSgprAt(SgprIndexUnusedMin = -1);
1274 usesVgprAt(VgprIndexUnusedMin = -1);
1275 if (hasMAIInsts(*MSTI)) {
1276 usesAgprAt(AgprIndexUnusedMin = -1);
1277 }
1278 }
1279
1280 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1281 unsigned RegWidth) {
1282 switch (RegKind) {
1283 case IS_SGPR:
1284 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1285 break;
1286 case IS_AGPR:
1287 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1288 break;
1289 case IS_VGPR:
1290 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1291 break;
1292 default:
1293 break;
1294 }
1295 }
1296};
1297
1298class AMDGPUAsmParser : public MCTargetAsmParser {
1299 MCAsmParser &Parser;
1300
1301 unsigned ForcedEncodingSize = 0;
1302 bool ForcedDPP = false;
1303 bool ForcedSDWA = false;
1304 KernelScopeInfo KernelScope;
1305
1306 /// @name Auto-generated Match Functions
1307 /// {
1308
1309#define GET_ASSEMBLER_HEADER
1310#include "AMDGPUGenAsmMatcher.inc"
1311
1312 /// }
1313
1314private:
1315 void createConstantSymbol(StringRef Id, int64_t Val);
1316
1317 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1318 bool OutOfRangeError(SMRange Range);
1319 /// Calculate VGPR/SGPR blocks required for given target, reserved
1320 /// registers, and user-specified NextFreeXGPR values.
1321 ///
1322 /// \param Features [in] Target features, used for bug corrections.
1323 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1324 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1325 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1326 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1327 /// descriptor field, if valid.
1328 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1329 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1330 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1331 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1332 /// \param VGPRBlocks [out] Result VGPR block count.
1333 /// \param SGPRBlocks [out] Result SGPR block count.
1334 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1335 const MCExpr *FlatScrUsed, bool XNACKUsed,
1336 std::optional<bool> EnableWavefrontSize32,
1337 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1338 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1339 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1340 bool ParseDirectiveAMDGCNTarget();
1341 bool ParseDirectiveAMDHSACodeObjectVersion();
1342 bool ParseDirectiveAMDHSAKernel();
1343 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1344 bool ParseDirectiveAMDKernelCodeT();
1345 // TODO: Possibly make subtargetHasRegister const.
1346 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1347 bool ParseDirectiveAMDGPUHsaKernel();
1348
1349 bool ParseDirectiveISAVersion();
1350 bool ParseDirectiveHSAMetadata();
1351 bool ParseDirectivePALMetadataBegin();
1352 bool ParseDirectivePALMetadata();
1353 bool ParseDirectiveAMDGPULDS();
1354
1355 /// Common code to parse out a block of text (typically YAML) between start and
1356 /// end directives.
1357 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1358 const char *AssemblerDirectiveEnd,
1359 std::string &CollectString);
1360
1361 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1362 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1363 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1364 unsigned &RegNum, unsigned &RegWidth,
1365 bool RestoreOnFailure = false);
1366 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1367 unsigned &RegNum, unsigned &RegWidth,
1369 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1370 unsigned &RegWidth,
1372 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1373 unsigned &RegWidth,
1375 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1376 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1377 bool ParseRegRange(unsigned& Num, unsigned& Width);
1378 unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
1379 unsigned RegWidth, SMLoc Loc);
1380
1381 bool isRegister();
1382 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1383 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1384 void initializeGprCountSymbol(RegisterKind RegKind);
1385 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1386 unsigned RegWidth);
1387 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1388 bool IsAtomic);
1389
1390public:
1391 enum OperandMode {
1392 OperandMode_Default,
1393 OperandMode_NSA,
1394 };
1395
1396 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1397
1398 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1399 const MCInstrInfo &MII,
1400 const MCTargetOptions &Options)
1401 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1403
1404 if (getFeatureBits().none()) {
1405 // Set default features.
1406 copySTI().ToggleFeature("southern-islands");
1407 }
1408
1409 FeatureBitset FB = getFeatureBits();
1410 if (!FB[AMDGPU::FeatureWavefrontSize64] &&
1411 !FB[AMDGPU::FeatureWavefrontSize32]) {
1412 // If there is no default wave size it must be a generation before gfx10,
1413 // these have FeatureWavefrontSize64 in their definition already. For
1414 // gfx10+ set wave32 as a default.
1415 copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
1416 }
1417
1418 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1419
1421 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1422 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1423 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1424 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1425 } else {
1426 createConstantSymbol(".option.machine_version_major", ISA.Major);
1427 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1428 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1429 }
1430 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1431 initializeGprCountSymbol(IS_VGPR);
1432 initializeGprCountSymbol(IS_SGPR);
1433 } else
1434 KernelScope.initialize(getContext());
1435
1436 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1437 createConstantSymbol(Symbol, Code);
1438
1439 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1440 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1441 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1442 }
1443
1444 bool hasMIMG_R128() const {
1445 return AMDGPU::hasMIMG_R128(getSTI());
1446 }
1447
1448 bool hasPackedD16() const {
1449 return AMDGPU::hasPackedD16(getSTI());
1450 }
1451
1452 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1453
1454 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1455
1456 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1457
1458 bool isSI() const {
1459 return AMDGPU::isSI(getSTI());
1460 }
1461
1462 bool isCI() const {
1463 return AMDGPU::isCI(getSTI());
1464 }
1465
1466 bool isVI() const {
1467 return AMDGPU::isVI(getSTI());
1468 }
1469
1470 bool isGFX9() const {
1471 return AMDGPU::isGFX9(getSTI());
1472 }
1473
1474 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1475 bool isGFX90A() const {
1476 return AMDGPU::isGFX90A(getSTI());
1477 }
1478
1479 bool isGFX940() const {
1480 return AMDGPU::isGFX940(getSTI());
1481 }
1482
1483 bool isGFX9Plus() const {
1484 return AMDGPU::isGFX9Plus(getSTI());
1485 }
1486
1487 bool isGFX10() const {
1488 return AMDGPU::isGFX10(getSTI());
1489 }
1490
1491 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1492
1493 bool isGFX11() const {
1494 return AMDGPU::isGFX11(getSTI());
1495 }
1496
1497 bool isGFX11Plus() const {
1498 return AMDGPU::isGFX11Plus(getSTI());
1499 }
1500
1501 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1502
1503 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1504
1505 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1506
1507 bool isGFX10_BEncoding() const {
1509 }
1510
1511 bool hasInv2PiInlineImm() const {
1512 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1513 }
1514
1515 bool hasFlatOffsets() const {
1516 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1517 }
1518
1519 bool hasArchitectedFlatScratch() const {
1520 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1521 }
1522
1523 bool hasSGPR102_SGPR103() const {
1524 return !isVI() && !isGFX9();
1525 }
1526
1527 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1528
1529 bool hasIntClamp() const {
1530 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1531 }
1532
1533 bool hasPartialNSAEncoding() const {
1534 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1535 }
1536
1537 unsigned getNSAMaxSize(bool HasSampler = false) const {
1538 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1539 }
1540
1541 unsigned getMaxNumUserSGPRs() const {
1543 }
1544
1545 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1546
1547 AMDGPUTargetStreamer &getTargetStreamer() {
1549 return static_cast<AMDGPUTargetStreamer &>(TS);
1550 }
1551
1552 const MCRegisterInfo *getMRI() const {
1553 // We need this const_cast because for some reason getContext() is not const
1554 // in MCAsmParser.
1555 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1556 }
1557
1558 const MCInstrInfo *getMII() const {
1559 return &MII;
1560 }
1561
1562 const FeatureBitset &getFeatureBits() const {
1563 return getSTI().getFeatureBits();
1564 }
1565
1566 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1567 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1568 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1569
1570 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1571 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1572 bool isForcedDPP() const { return ForcedDPP; }
1573 bool isForcedSDWA() const { return ForcedSDWA; }
1574 ArrayRef<unsigned> getMatchedVariants() const;
1575 StringRef getMatchedVariantName() const;
1576
1577 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1578 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1579 bool RestoreOnFailure);
1580 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1582 SMLoc &EndLoc) override;
1583 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1585 unsigned Kind) override;
1586 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1589 bool MatchingInlineAsm) override;
1590 bool ParseDirective(AsmToken DirectiveID) override;
1591 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1592 OperandMode Mode = OperandMode_Default);
1593 StringRef parseMnemonicSuffix(StringRef Name);
1595 SMLoc NameLoc, OperandVector &Operands) override;
1596 //bool ProcessInstruction(MCInst &Inst);
1597
1599
1600 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1601
1603 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1604 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1605 std::function<bool(int64_t &)> ConvertResult = nullptr);
1606
1607 ParseStatus parseOperandArrayWithPrefix(
1608 const char *Prefix, OperandVector &Operands,
1609 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1610 bool (*ConvertResult)(int64_t &) = nullptr);
1611
1613 parseNamedBit(StringRef Name, OperandVector &Operands,
1614 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1615 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1617 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1618 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1619 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1620 SMLoc &StringLoc);
1621
1622 bool isModifier();
1623 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1624 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1625 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1626 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1627 bool parseSP3NegModifier();
1628 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1629 bool HasLit = false);
1631 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1632 bool HasLit = false);
1633 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1634 bool AllowImm = true);
1635 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1636 bool AllowImm = true);
1637 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1638 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1639 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1640 ParseStatus tryParseIndexKey(OperandVector &Operands,
1641 AMDGPUOperand::ImmTy ImmTy);
1642 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1643 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1644
1645 ParseStatus parseDfmtNfmt(int64_t &Format);
1646 ParseStatus parseUfmt(int64_t &Format);
1647 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1648 int64_t &Format);
1649 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1650 int64_t &Format);
1651 ParseStatus parseFORMAT(OperandVector &Operands);
1652 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1653 ParseStatus parseNumericFormat(int64_t &Format);
1654 ParseStatus parseFlatOffset(OperandVector &Operands);
1655 ParseStatus parseR128A16(OperandVector &Operands);
1657 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1658 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1659
1660 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1661
1662 bool parseCnt(int64_t &IntVal);
1663 ParseStatus parseSWaitCnt(OperandVector &Operands);
1664
1665 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1666 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1667 ParseStatus parseDepCtr(OperandVector &Operands);
1668
1669 bool parseDelay(int64_t &Delay);
1670 ParseStatus parseSDelayALU(OperandVector &Operands);
1671
1672 ParseStatus parseHwreg(OperandVector &Operands);
1673
1674private:
1675 struct OperandInfoTy {
1676 SMLoc Loc;
1677 int64_t Val;
1678 bool IsSymbolic = false;
1679 bool IsDefined = false;
1680
1681 OperandInfoTy(int64_t Val) : Val(Val) {}
1682 };
1683
1684 struct StructuredOpField : OperandInfoTy {
1687 unsigned Width;
1688 bool IsDefined = false;
1689
1690 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1691 int64_t Default)
1692 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1693 virtual ~StructuredOpField() = default;
1694
1695 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1696 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1697 return false;
1698 }
1699
1700 virtual bool validate(AMDGPUAsmParser &Parser) const {
1701 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1702 return Error(Parser, "not supported on this GPU");
1703 if (!isUIntN(Width, Val))
1704 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1705 return true;
1706 }
1707 };
1708
1709 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1710 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1711
1712 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1713 bool validateSendMsg(const OperandInfoTy &Msg,
1714 const OperandInfoTy &Op,
1715 const OperandInfoTy &Stream);
1716
1717 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1718 OperandInfoTy &Width);
1719
1720 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1721 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1722 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1723
1724 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1725 const OperandVector &Operands) const;
1726 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1727 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1728 SMLoc getLitLoc(const OperandVector &Operands,
1729 bool SearchMandatoryLiterals = false) const;
1730 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1731 SMLoc getConstLoc(const OperandVector &Operands) const;
1732 SMLoc getInstLoc(const OperandVector &Operands) const;
1733
1734 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1735 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1736 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1737 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1738 bool validateSOPLiteral(const MCInst &Inst) const;
1739 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1740 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1741 const OperandVector &Operands);
1742 bool validateIntClampSupported(const MCInst &Inst);
1743 bool validateMIMGAtomicDMask(const MCInst &Inst);
1744 bool validateMIMGGatherDMask(const MCInst &Inst);
1745 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1746 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1747 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1748 bool validateMIMGD16(const MCInst &Inst);
1749 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1750 bool validateMIMGMSAA(const MCInst &Inst);
1751 bool validateOpSel(const MCInst &Inst);
1752 bool validateNeg(const MCInst &Inst, int OpName);
1753 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1754 bool validateVccOperand(unsigned Reg) const;
1755 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1756 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1757 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1758 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1759 bool validateAGPRLdSt(const MCInst &Inst) const;
1760 bool validateVGPRAlign(const MCInst &Inst) const;
1761 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1762 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1763 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1764 bool validateDivScale(const MCInst &Inst);
1765 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1766 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1767 const SMLoc &IDLoc);
1768 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1769 const unsigned CPol);
1770 bool validateExeczVcczOperands(const OperandVector &Operands);
1771 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1772 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1773 unsigned getConstantBusLimit(unsigned Opcode) const;
1774 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1775 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1776 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1777
1778 bool isSupportedMnemo(StringRef Mnemo,
1779 const FeatureBitset &FBS);
1780 bool isSupportedMnemo(StringRef Mnemo,
1781 const FeatureBitset &FBS,
1782 ArrayRef<unsigned> Variants);
1783 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1784
1785 bool isId(const StringRef Id) const;
1786 bool isId(const AsmToken &Token, const StringRef Id) const;
1787 bool isToken(const AsmToken::TokenKind Kind) const;
1788 StringRef getId() const;
1789 bool trySkipId(const StringRef Id);
1790 bool trySkipId(const StringRef Pref, const StringRef Id);
1791 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1792 bool trySkipToken(const AsmToken::TokenKind Kind);
1793 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1794 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1795 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1796
1797 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1798 AsmToken::TokenKind getTokenKind() const;
1799 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1801 StringRef getTokenStr() const;
1802 AsmToken peekToken(bool ShouldSkipSpace = true);
1803 AsmToken getToken() const;
1804 SMLoc getLoc() const;
1805 void lex();
1806
1807public:
1808 void onBeginOfFile() override;
1809 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1810
1811 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1812
1813 ParseStatus parseExpTgt(OperandVector &Operands);
1814 ParseStatus parseSendMsg(OperandVector &Operands);
1815 ParseStatus parseInterpSlot(OperandVector &Operands);
1816 ParseStatus parseInterpAttr(OperandVector &Operands);
1817 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1818 ParseStatus parseBoolReg(OperandVector &Operands);
1819
1820 bool parseSwizzleOperand(int64_t &Op,
1821 const unsigned MinVal,
1822 const unsigned MaxVal,
1823 const StringRef ErrMsg,
1824 SMLoc &Loc);
1825 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1826 const unsigned MinVal,
1827 const unsigned MaxVal,
1828 const StringRef ErrMsg);
1829 ParseStatus parseSwizzle(OperandVector &Operands);
1830 bool parseSwizzleOffset(int64_t &Imm);
1831 bool parseSwizzleMacro(int64_t &Imm);
1832 bool parseSwizzleQuadPerm(int64_t &Imm);
1833 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1834 bool parseSwizzleBroadcast(int64_t &Imm);
1835 bool parseSwizzleSwap(int64_t &Imm);
1836 bool parseSwizzleReverse(int64_t &Imm);
1837
1838 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1839 int64_t parseGPRIdxMacro();
1840
1841 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1842 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1843
1844 ParseStatus parseOModSI(OperandVector &Operands);
1845
1846 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1847 OptionalImmIndexMap &OptionalIdx);
1848 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1849 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1850 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1851 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1852
1853 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1854 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1855 OptionalImmIndexMap &OptionalIdx);
1856 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1857 OptionalImmIndexMap &OptionalIdx);
1858
1859 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1860 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1861
1862 bool parseDimId(unsigned &Encoding);
1864 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1866 ParseStatus parseDPPCtrl(OperandVector &Operands);
1867 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1868 int64_t parseDPPCtrlSel(StringRef Ctrl);
1869 int64_t parseDPPCtrlPerm();
1870 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1871 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1872 cvtDPP(Inst, Operands, true);
1873 }
1874 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1875 bool IsDPP8 = false);
1876 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1877 cvtVOP3DPP(Inst, Operands, true);
1878 }
1879
1880 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1881 AMDGPUOperand::ImmTy Type);
1882 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1883 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1884 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1885 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1886 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1887 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1888 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1889 uint64_t BasicInstType,
1890 bool SkipDstVcc = false,
1891 bool SkipSrcVcc = false);
1892
1893 ParseStatus parseEndpgm(OperandVector &Operands);
1894
1896};
1897
1898} // end anonymous namespace
1899
1900// May be called with integer type with equivalent bitwidth.
1901static const fltSemantics *getFltSemantics(unsigned Size) {
1902 switch (Size) {
1903 case 4:
1904 return &APFloat::IEEEsingle();
1905 case 8:
1906 return &APFloat::IEEEdouble();
1907 case 2:
1908 return &APFloat::IEEEhalf();
1909 default:
1910 llvm_unreachable("unsupported fp type");
1911 }
1912}
1913
1915 return getFltSemantics(VT.getSizeInBits() / 8);
1916}
1917
1919 switch (OperandType) {
1920 // When floating-point immediate is used as operand of type i16, the 32-bit
1921 // representation of the constant truncated to the 16 LSBs should be used.
1941 return &APFloat::IEEEsingle();
1947 return &APFloat::IEEEdouble();
1956 return &APFloat::IEEEhalf();
1964 return &APFloat::BFloat();
1965 default:
1966 llvm_unreachable("unsupported fp type");
1967 }
1968}
1969
1970//===----------------------------------------------------------------------===//
1971// Operand
1972//===----------------------------------------------------------------------===//
1973
1974static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1975 bool Lost;
1976
1977 // Convert literal to single precision
1979 APFloat::rmNearestTiesToEven,
1980 &Lost);
1981 // We allow precision lost but not overflow or underflow
1982 if (Status != APFloat::opOK &&
1983 Lost &&
1984 ((Status & APFloat::opOverflow) != 0 ||
1985 (Status & APFloat::opUnderflow) != 0)) {
1986 return false;
1987 }
1988
1989 return true;
1990}
1991
1992static bool isSafeTruncation(int64_t Val, unsigned Size) {
1993 return isUIntN(Size, Val) || isIntN(Size, Val);
1994}
1995
1996static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1997 if (VT.getScalarType() == MVT::i16)
1998 return isInlinableLiteral32(Val, HasInv2Pi);
1999
2000 if (VT.getScalarType() == MVT::f16)
2001 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2002
2003 assert(VT.getScalarType() == MVT::bf16);
2004
2005 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2006}
2007
2008bool AMDGPUOperand::isInlinableImm(MVT type) const {
2009
2010 // This is a hack to enable named inline values like
2011 // shared_base with both 32-bit and 64-bit operands.
2012 // Note that these values are defined as
2013 // 32-bit operands only.
2014 if (isInlineValue()) {
2015 return true;
2016 }
2017
2018 if (!isImmTy(ImmTyNone)) {
2019 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2020 return false;
2021 }
2022 // TODO: We should avoid using host float here. It would be better to
2023 // check the float bit values which is what a few other places do.
2024 // We've had bot failures before due to weird NaN support on mips hosts.
2025
2026 APInt Literal(64, Imm.Val);
2027
2028 if (Imm.IsFPImm) { // We got fp literal token
2029 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2031 AsmParser->hasInv2PiInlineImm());
2032 }
2033
2034 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2035 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2036 return false;
2037
2038 if (type.getScalarSizeInBits() == 16) {
2039 bool Lost = false;
2040 switch (type.getScalarType().SimpleTy) {
2041 default:
2042 llvm_unreachable("unknown 16-bit type");
2043 case MVT::bf16:
2044 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2045 &Lost);
2046 break;
2047 case MVT::f16:
2048 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2049 &Lost);
2050 break;
2051 case MVT::i16:
2052 FPLiteral.convert(APFloatBase::IEEEsingle(),
2053 APFloat::rmNearestTiesToEven, &Lost);
2054 break;
2055 }
2056 // We need to use 32-bit representation here because when a floating-point
2057 // inline constant is used as an i16 operand, its 32-bit representation
2058 // representation will be used. We will need the 32-bit value to check if
2059 // it is FP inline constant.
2060 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2061 return isInlineableLiteralOp16(ImmVal, type,
2062 AsmParser->hasInv2PiInlineImm());
2063 }
2064
2065 // Check if single precision literal is inlinable
2067 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2068 AsmParser->hasInv2PiInlineImm());
2069 }
2070
2071 // We got int literal token.
2072 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2074 AsmParser->hasInv2PiInlineImm());
2075 }
2076
2077 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2078 return false;
2079 }
2080
2081 if (type.getScalarSizeInBits() == 16) {
2083 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2084 type, AsmParser->hasInv2PiInlineImm());
2085 }
2086
2088 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2089 AsmParser->hasInv2PiInlineImm());
2090}
2091
2092bool AMDGPUOperand::isLiteralImm(MVT type) const {
2093 // Check that this immediate can be added as literal
2094 if (!isImmTy(ImmTyNone)) {
2095 return false;
2096 }
2097
2098 if (!Imm.IsFPImm) {
2099 // We got int literal token.
2100
2101 if (type == MVT::f64 && hasFPModifiers()) {
2102 // Cannot apply fp modifiers to int literals preserving the same semantics
2103 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2104 // disable these cases.
2105 return false;
2106 }
2107
2108 unsigned Size = type.getSizeInBits();
2109 if (Size == 64)
2110 Size = 32;
2111
2112 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2113 // types.
2114 return isSafeTruncation(Imm.Val, Size);
2115 }
2116
2117 // We got fp literal token
2118 if (type == MVT::f64) { // Expected 64-bit fp operand
2119 // We would set low 64-bits of literal to zeroes but we accept this literals
2120 return true;
2121 }
2122
2123 if (type == MVT::i64) { // Expected 64-bit int operand
2124 // We don't allow fp literals in 64-bit integer instructions. It is
2125 // unclear how we should encode them.
2126 return false;
2127 }
2128
2129 // We allow fp literals with f16x2 operands assuming that the specified
2130 // literal goes into the lower half and the upper half is zero. We also
2131 // require that the literal may be losslessly converted to f16.
2132 //
2133 // For i16x2 operands, we assume that the specified literal is encoded as a
2134 // single-precision float. This is pretty odd, but it matches SP3 and what
2135 // happens in hardware.
2136 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2137 : (type == MVT::v2i16) ? MVT::f32
2138 : (type == MVT::v2f32) ? MVT::f32
2139 : type;
2140
2141 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2142 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2143}
2144
2145bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2146 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2147}
2148
2149bool AMDGPUOperand::isVRegWithInputMods() const {
2150 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2151 // GFX90A allows DPP on 64-bit operands.
2152 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2153 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2154}
2155
2156template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2157 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2158 : AMDGPU::VGPR_16_Lo128RegClassID);
2159}
2160
2161bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2162 if (AsmParser->isVI())
2163 return isVReg32();
2164 if (AsmParser->isGFX9Plus())
2165 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2166 return false;
2167}
2168
2169bool AMDGPUOperand::isSDWAFP16Operand() const {
2170 return isSDWAOperand(MVT::f16);
2171}
2172
2173bool AMDGPUOperand::isSDWAFP32Operand() const {
2174 return isSDWAOperand(MVT::f32);
2175}
2176
2177bool AMDGPUOperand::isSDWAInt16Operand() const {
2178 return isSDWAOperand(MVT::i16);
2179}
2180
2181bool AMDGPUOperand::isSDWAInt32Operand() const {
2182 return isSDWAOperand(MVT::i32);
2183}
2184
2185bool AMDGPUOperand::isBoolReg() const {
2186 auto FB = AsmParser->getFeatureBits();
2187 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2188 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2189}
2190
2191uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2192{
2193 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2194 assert(Size == 2 || Size == 4 || Size == 8);
2195
2196 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2197
2198 if (Imm.Mods.Abs) {
2199 Val &= ~FpSignMask;
2200 }
2201 if (Imm.Mods.Neg) {
2202 Val ^= FpSignMask;
2203 }
2204
2205 return Val;
2206}
2207
2208void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2209 if (isExpr()) {
2211 return;
2212 }
2213
2214 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2215 Inst.getNumOperands())) {
2216 addLiteralImmOperand(Inst, Imm.Val,
2217 ApplyModifiers &
2218 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2219 } else {
2220 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2222 setImmKindNone();
2223 }
2224}
2225
2226void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2227 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2228 auto OpNum = Inst.getNumOperands();
2229 // Check that this operand accepts literals
2230 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2231
2232 if (ApplyModifiers) {
2233 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2234 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2235 Val = applyInputFPModifiers(Val, Size);
2236 }
2237
2238 APInt Literal(64, Val);
2239 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2240
2241 if (Imm.IsFPImm) { // We got fp literal token
2242 switch (OpTy) {
2248 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2249 AsmParser->hasInv2PiInlineImm())) {
2250 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2251 setImmKindConst();
2252 return;
2253 }
2254
2255 // Non-inlineable
2256 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2257 // For fp operands we check if low 32 bits are zeros
2258 if (Literal.getLoBits(32) != 0) {
2259 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2260 "Can't encode literal as exact 64-bit floating-point operand. "
2261 "Low 32-bits will be set to zero");
2262 Val &= 0xffffffff00000000u;
2263 }
2264
2266 setImmKindLiteral();
2267 return;
2268 }
2269
2270 // We don't allow fp literals in 64-bit integer instructions. It is
2271 // unclear how we should encode them. This case should be checked earlier
2272 // in predicate methods (isLiteralImm())
2273 llvm_unreachable("fp literal in 64-bit integer instruction.");
2274
2282 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2283 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2284 // loss of precision. The constant represents ideomatic fp32 value of
2285 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2286 // bits. Prevent rounding below.
2287 Inst.addOperand(MCOperand::createImm(0x3e22));
2288 setImmKindLiteral();
2289 return;
2290 }
2291 [[fallthrough]];
2292
2320 bool lost;
2321 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2322 // Convert literal to single precision
2323 FPLiteral.convert(*getOpFltSemantics(OpTy),
2324 APFloat::rmNearestTiesToEven, &lost);
2325 // We allow precision lost but not overflow or underflow. This should be
2326 // checked earlier in isLiteralImm()
2327
2328 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2329 Inst.addOperand(MCOperand::createImm(ImmVal));
2330 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2331 setImmKindMandatoryLiteral();
2332 } else {
2333 setImmKindLiteral();
2334 }
2335 return;
2336 }
2337 default:
2338 llvm_unreachable("invalid operand size");
2339 }
2340
2341 return;
2342 }
2343
2344 // We got int literal token.
2345 // Only sign extend inline immediates.
2346 switch (OpTy) {
2362 if (isSafeTruncation(Val, 32) &&
2363 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2364 AsmParser->hasInv2PiInlineImm())) {
2366 setImmKindConst();
2367 return;
2368 }
2369
2370 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2371 setImmKindLiteral();
2372 return;
2373
2379 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2381 setImmKindConst();
2382 return;
2383 }
2384
2385 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2386 : Lo_32(Val);
2387
2389 setImmKindLiteral();
2390 return;
2391
2395 if (isSafeTruncation(Val, 16) &&
2396 AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2397 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2398 setImmKindConst();
2399 return;
2400 }
2401
2402 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2403 setImmKindLiteral();
2404 return;
2405
2410 if (isSafeTruncation(Val, 16) &&
2411 AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2412 AsmParser->hasInv2PiInlineImm())) {
2414 setImmKindConst();
2415 return;
2416 }
2417
2418 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2419 setImmKindLiteral();
2420 return;
2421
2426 if (isSafeTruncation(Val, 16) &&
2427 AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2428 AsmParser->hasInv2PiInlineImm())) {
2430 setImmKindConst();
2431 return;
2432 }
2433
2434 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2435 setImmKindLiteral();
2436 return;
2437
2440 assert(isSafeTruncation(Val, 16));
2441 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2443 return;
2444 }
2447 assert(isSafeTruncation(Val, 16));
2448 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2449 AsmParser->hasInv2PiInlineImm()));
2450
2452 return;
2453 }
2454
2457 assert(isSafeTruncation(Val, 16));
2458 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2459 AsmParser->hasInv2PiInlineImm()));
2460
2462 return;
2463 }
2464
2466 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2467 setImmKindMandatoryLiteral();
2468 return;
2470 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2471 setImmKindMandatoryLiteral();
2472 return;
2473 default:
2474 llvm_unreachable("invalid operand size");
2475 }
2476}
2477
2478void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2479 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2480}
2481
2482bool AMDGPUOperand::isInlineValue() const {
2483 return isRegKind() && ::isInlineValue(getReg());
2484}
2485
2486//===----------------------------------------------------------------------===//
2487// AsmParser
2488//===----------------------------------------------------------------------===//
2489
2490void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2491 // TODO: make those pre-defined variables read-only.
2492 // Currently there is none suitable machinery in the core llvm-mc for this.
2493 // MCSymbol::isRedefinable is intended for another purpose, and
2494 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2495 MCContext &Ctx = getContext();
2496 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2497 Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
2498}
2499
2500static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2501 if (Is == IS_VGPR) {
2502 switch (RegWidth) {
2503 default: return -1;
2504 case 32:
2505 return AMDGPU::VGPR_32RegClassID;
2506 case 64:
2507 return AMDGPU::VReg_64RegClassID;
2508 case 96:
2509 return AMDGPU::VReg_96RegClassID;
2510 case 128:
2511 return AMDGPU::VReg_128RegClassID;
2512 case 160:
2513 return AMDGPU::VReg_160RegClassID;
2514 case 192:
2515 return AMDGPU::VReg_192RegClassID;
2516 case 224:
2517 return AMDGPU::VReg_224RegClassID;
2518 case 256:
2519 return AMDGPU::VReg_256RegClassID;
2520 case 288:
2521 return AMDGPU::VReg_288RegClassID;
2522 case 320:
2523 return AMDGPU::VReg_320RegClassID;
2524 case 352:
2525 return AMDGPU::VReg_352RegClassID;
2526 case 384:
2527 return AMDGPU::VReg_384RegClassID;
2528 case 512:
2529 return AMDGPU::VReg_512RegClassID;
2530 case 1024:
2531 return AMDGPU::VReg_1024RegClassID;
2532 }
2533 } else if (Is == IS_TTMP) {
2534 switch (RegWidth) {
2535 default: return -1;
2536 case 32:
2537 return AMDGPU::TTMP_32RegClassID;
2538 case 64:
2539 return AMDGPU::TTMP_64RegClassID;
2540 case 128:
2541 return AMDGPU::TTMP_128RegClassID;
2542 case 256:
2543 return AMDGPU::TTMP_256RegClassID;
2544 case 512:
2545 return AMDGPU::TTMP_512RegClassID;
2546 }
2547 } else if (Is == IS_SGPR) {
2548 switch (RegWidth) {
2549 default: return -1;
2550 case 32:
2551 return AMDGPU::SGPR_32RegClassID;
2552 case 64:
2553 return AMDGPU::SGPR_64RegClassID;
2554 case 96:
2555 return AMDGPU::SGPR_96RegClassID;
2556 case 128:
2557 return AMDGPU::SGPR_128RegClassID;
2558 case 160:
2559 return AMDGPU::SGPR_160RegClassID;
2560 case 192:
2561 return AMDGPU::SGPR_192RegClassID;
2562 case 224:
2563 return AMDGPU::SGPR_224RegClassID;
2564 case 256:
2565 return AMDGPU::SGPR_256RegClassID;
2566 case 288:
2567 return AMDGPU::SGPR_288RegClassID;
2568 case 320:
2569 return AMDGPU::SGPR_320RegClassID;
2570 case 352:
2571 return AMDGPU::SGPR_352RegClassID;
2572 case 384:
2573 return AMDGPU::SGPR_384RegClassID;
2574 case 512:
2575 return AMDGPU::SGPR_512RegClassID;
2576 }
2577 } else if (Is == IS_AGPR) {
2578 switch (RegWidth) {
2579 default: return -1;
2580 case 32:
2581 return AMDGPU::AGPR_32RegClassID;
2582 case 64:
2583 return AMDGPU::AReg_64RegClassID;
2584 case 96:
2585 return AMDGPU::AReg_96RegClassID;
2586 case 128:
2587 return AMDGPU::AReg_128RegClassID;
2588 case 160:
2589 return AMDGPU::AReg_160RegClassID;
2590 case 192:
2591 return AMDGPU::AReg_192RegClassID;
2592 case 224:
2593 return AMDGPU::AReg_224RegClassID;
2594 case 256:
2595 return AMDGPU::AReg_256RegClassID;
2596 case 288:
2597 return AMDGPU::AReg_288RegClassID;
2598 case 320:
2599 return AMDGPU::AReg_320RegClassID;
2600 case 352:
2601 return AMDGPU::AReg_352RegClassID;
2602 case 384:
2603 return AMDGPU::AReg_384RegClassID;
2604 case 512:
2605 return AMDGPU::AReg_512RegClassID;
2606 case 1024:
2607 return AMDGPU::AReg_1024RegClassID;
2608 }
2609 }
2610 return -1;
2611}
2612
2615 .Case("exec", AMDGPU::EXEC)
2616 .Case("vcc", AMDGPU::VCC)
2617 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2618 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2619 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2620 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2621 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2622 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2623 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2624 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2625 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2626 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2627 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2628 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2629 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2630 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2631 .Case("m0", AMDGPU::M0)
2632 .Case("vccz", AMDGPU::SRC_VCCZ)
2633 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2634 .Case("execz", AMDGPU::SRC_EXECZ)
2635 .Case("src_execz", AMDGPU::SRC_EXECZ)
2636 .Case("scc", AMDGPU::SRC_SCC)
2637 .Case("src_scc", AMDGPU::SRC_SCC)
2638 .Case("tba", AMDGPU::TBA)
2639 .Case("tma", AMDGPU::TMA)
2640 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2641 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2642 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2643 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2644 .Case("vcc_lo", AMDGPU::VCC_LO)
2645 .Case("vcc_hi", AMDGPU::VCC_HI)
2646 .Case("exec_lo", AMDGPU::EXEC_LO)
2647 .Case("exec_hi", AMDGPU::EXEC_HI)
2648 .Case("tma_lo", AMDGPU::TMA_LO)
2649 .Case("tma_hi", AMDGPU::TMA_HI)
2650 .Case("tba_lo", AMDGPU::TBA_LO)
2651 .Case("tba_hi", AMDGPU::TBA_HI)
2652 .Case("pc", AMDGPU::PC_REG)
2653 .Case("null", AMDGPU::SGPR_NULL)
2654 .Default(AMDGPU::NoRegister);
2655}
2656
2657bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2658 SMLoc &EndLoc, bool RestoreOnFailure) {
2659 auto R = parseRegister();
2660 if (!R) return true;
2661 assert(R->isReg());
2662 RegNo = R->getReg();
2663 StartLoc = R->getStartLoc();
2664 EndLoc = R->getEndLoc();
2665 return false;
2666}
2667
2668bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2669 SMLoc &EndLoc) {
2670 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2671}
2672
2673ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2674 SMLoc &EndLoc) {
2675 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2676 bool PendingErrors = getParser().hasPendingError();
2677 getParser().clearPendingErrors();
2678 if (PendingErrors)
2679 return ParseStatus::Failure;
2680 if (Result)
2681 return ParseStatus::NoMatch;
2682 return ParseStatus::Success;
2683}
2684
2685bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2686 RegisterKind RegKind, unsigned Reg1,
2687 SMLoc Loc) {
2688 switch (RegKind) {
2689 case IS_SPECIAL:
2690 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2691 Reg = AMDGPU::EXEC;
2692 RegWidth = 64;
2693 return true;
2694 }
2695 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2696 Reg = AMDGPU::FLAT_SCR;
2697 RegWidth = 64;
2698 return true;
2699 }
2700 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2701 Reg = AMDGPU::XNACK_MASK;
2702 RegWidth = 64;
2703 return true;
2704 }
2705 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2706 Reg = AMDGPU::VCC;
2707 RegWidth = 64;
2708 return true;
2709 }
2710 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2711 Reg = AMDGPU::TBA;
2712 RegWidth = 64;
2713 return true;
2714 }
2715 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2716 Reg = AMDGPU::TMA;
2717 RegWidth = 64;
2718 return true;
2719 }
2720 Error(Loc, "register does not fit in the list");
2721 return false;
2722 case IS_VGPR:
2723 case IS_SGPR:
2724 case IS_AGPR:
2725 case IS_TTMP:
2726 if (Reg1 != Reg + RegWidth / 32) {
2727 Error(Loc, "registers in a list must have consecutive indices");
2728 return false;
2729 }
2730 RegWidth += 32;
2731 return true;
2732 default:
2733 llvm_unreachable("unexpected register kind");
2734 }
2735}
2736
2737struct RegInfo {
2739 RegisterKind Kind;
2740};
2741
2742static constexpr RegInfo RegularRegisters[] = {
2743 {{"v"}, IS_VGPR},
2744 {{"s"}, IS_SGPR},
2745 {{"ttmp"}, IS_TTMP},
2746 {{"acc"}, IS_AGPR},
2747 {{"a"}, IS_AGPR},
2748};
2749
2750static bool isRegularReg(RegisterKind Kind) {
2751 return Kind == IS_VGPR ||
2752 Kind == IS_SGPR ||
2753 Kind == IS_TTMP ||
2754 Kind == IS_AGPR;
2755}
2756
2758 for (const RegInfo &Reg : RegularRegisters)
2759 if (Str.starts_with(Reg.Name))
2760 return &Reg;
2761 return nullptr;
2762}
2763
2764static bool getRegNum(StringRef Str, unsigned& Num) {
2765 return !Str.getAsInteger(10, Num);
2766}
2767
2768bool
2769AMDGPUAsmParser::isRegister(const AsmToken &Token,
2770 const AsmToken &NextToken) const {
2771
2772 // A list of consecutive registers: [s0,s1,s2,s3]
2773 if (Token.is(AsmToken::LBrac))
2774 return true;
2775
2776 if (!Token.is(AsmToken::Identifier))
2777 return false;
2778
2779 // A single register like s0 or a range of registers like s[0:1]
2780
2781 StringRef Str = Token.getString();
2782 const RegInfo *Reg = getRegularRegInfo(Str);
2783 if (Reg) {
2784 StringRef RegName = Reg->Name;
2785 StringRef RegSuffix = Str.substr(RegName.size());
2786 if (!RegSuffix.empty()) {
2787 RegSuffix.consume_back(".l");
2788 RegSuffix.consume_back(".h");
2789 unsigned Num;
2790 // A single register with an index: rXX
2791 if (getRegNum(RegSuffix, Num))
2792 return true;
2793 } else {
2794 // A range of registers: r[XX:YY].
2795 if (NextToken.is(AsmToken::LBrac))
2796 return true;
2797 }
2798 }
2799
2800 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2801}
2802
2803bool
2804AMDGPUAsmParser::isRegister()
2805{
2806 return isRegister(getToken(), peekToken());
2807}
2808
2809unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2810 unsigned SubReg, unsigned RegWidth,
2811 SMLoc Loc) {
2812 assert(isRegularReg(RegKind));
2813
2814 unsigned AlignSize = 1;
2815 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2816 // SGPR and TTMP registers must be aligned.
2817 // Max required alignment is 4 dwords.
2818 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2819 }
2820
2821 if (RegNum % AlignSize != 0) {
2822 Error(Loc, "invalid register alignment");
2823 return AMDGPU::NoRegister;
2824 }
2825
2826 unsigned RegIdx = RegNum / AlignSize;
2827 int RCID = getRegClass(RegKind, RegWidth);
2828 if (RCID == -1) {
2829 Error(Loc, "invalid or unsupported register size");
2830 return AMDGPU::NoRegister;
2831 }
2832
2833 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2834 const MCRegisterClass RC = TRI->getRegClass(RCID);
2835 if (RegIdx >= RC.getNumRegs()) {
2836 Error(Loc, "register index is out of range");
2837 return AMDGPU::NoRegister;
2838 }
2839
2840 unsigned Reg = RC.getRegister(RegIdx);
2841
2842 if (SubReg) {
2843 Reg = TRI->getSubReg(Reg, SubReg);
2844
2845 // Currently all regular registers have their .l and .h subregisters, so
2846 // we should never need to generate an error here.
2847 assert(Reg && "Invalid subregister!");
2848 }
2849
2850 return Reg;
2851}
2852
2853bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2854 int64_t RegLo, RegHi;
2855 if (!skipToken(AsmToken::LBrac, "missing register index"))
2856 return false;
2857
2858 SMLoc FirstIdxLoc = getLoc();
2859 SMLoc SecondIdxLoc;
2860
2861 if (!parseExpr(RegLo))
2862 return false;
2863
2864 if (trySkipToken(AsmToken::Colon)) {
2865 SecondIdxLoc = getLoc();
2866 if (!parseExpr(RegHi))
2867 return false;
2868 } else {
2869 RegHi = RegLo;
2870 }
2871
2872 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2873 return false;
2874
2875 if (!isUInt<32>(RegLo)) {
2876 Error(FirstIdxLoc, "invalid register index");
2877 return false;
2878 }
2879
2880 if (!isUInt<32>(RegHi)) {
2881 Error(SecondIdxLoc, "invalid register index");
2882 return false;
2883 }
2884
2885 if (RegLo > RegHi) {
2886 Error(FirstIdxLoc, "first register index should not exceed second index");
2887 return false;
2888 }
2889
2890 Num = static_cast<unsigned>(RegLo);
2891 RegWidth = 32 * ((RegHi - RegLo) + 1);
2892 return true;
2893}
2894
2895unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2896 unsigned &RegNum, unsigned &RegWidth,
2897 SmallVectorImpl<AsmToken> &Tokens) {
2898 assert(isToken(AsmToken::Identifier));
2899 unsigned Reg = getSpecialRegForName(getTokenStr());
2900 if (Reg) {
2901 RegNum = 0;
2902 RegWidth = 32;
2903 RegKind = IS_SPECIAL;
2904 Tokens.push_back(getToken());
2905 lex(); // skip register name
2906 }
2907 return Reg;
2908}
2909
2910unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2911 unsigned &RegNum, unsigned &RegWidth,
2912 SmallVectorImpl<AsmToken> &Tokens) {
2913 assert(isToken(AsmToken::Identifier));
2914 StringRef RegName = getTokenStr();
2915 auto Loc = getLoc();
2916
2917 const RegInfo *RI = getRegularRegInfo(RegName);
2918 if (!RI) {
2919 Error(Loc, "invalid register name");
2920 return AMDGPU::NoRegister;
2921 }
2922
2923 Tokens.push_back(getToken());
2924 lex(); // skip register name
2925
2926 RegKind = RI->Kind;
2927 StringRef RegSuffix = RegName.substr(RI->Name.size());
2928 unsigned SubReg = NoSubRegister;
2929 if (!RegSuffix.empty()) {
2930 // We don't know the opcode till we are done parsing, so we don't know if
2931 // registers should be 16 or 32 bit. It is therefore mandatory to put .l or
2932 // .h to correctly specify 16 bit registers. We also can't determine class
2933 // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16.
2934 if (RegSuffix.consume_back(".l"))
2935 SubReg = AMDGPU::lo16;
2936 else if (RegSuffix.consume_back(".h"))
2937 SubReg = AMDGPU::hi16;
2938
2939 // Single 32-bit register: vXX.
2940 if (!getRegNum(RegSuffix, RegNum)) {
2941 Error(Loc, "invalid register index");
2942 return AMDGPU::NoRegister;
2943 }
2944 RegWidth = 32;
2945 } else {
2946 // Range of registers: v[XX:YY]. ":YY" is optional.
2947 if (!ParseRegRange(RegNum, RegWidth))
2948 return AMDGPU::NoRegister;
2949 }
2950
2951 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2952}
2953
2954unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2955 unsigned &RegWidth,
2956 SmallVectorImpl<AsmToken> &Tokens) {
2957 unsigned Reg = AMDGPU::NoRegister;
2958 auto ListLoc = getLoc();
2959
2960 if (!skipToken(AsmToken::LBrac,
2961 "expected a register or a list of registers")) {
2962 return AMDGPU::NoRegister;
2963 }
2964
2965 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2966
2967 auto Loc = getLoc();
2968 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2969 return AMDGPU::NoRegister;
2970 if (RegWidth != 32) {
2971 Error(Loc, "expected a single 32-bit register");
2972 return AMDGPU::NoRegister;
2973 }
2974
2975 for (; trySkipToken(AsmToken::Comma); ) {
2976 RegisterKind NextRegKind;
2977 unsigned NextReg, NextRegNum, NextRegWidth;
2978 Loc = getLoc();
2979
2980 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2981 NextRegNum, NextRegWidth,
2982 Tokens)) {
2983 return AMDGPU::NoRegister;
2984 }
2985 if (NextRegWidth != 32) {
2986 Error(Loc, "expected a single 32-bit register");
2987 return AMDGPU::NoRegister;
2988 }
2989 if (NextRegKind != RegKind) {
2990 Error(Loc, "registers in a list must be of the same kind");
2991 return AMDGPU::NoRegister;
2992 }
2993 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2994 return AMDGPU::NoRegister;
2995 }
2996
2997 if (!skipToken(AsmToken::RBrac,
2998 "expected a comma or a closing square bracket")) {
2999 return AMDGPU::NoRegister;
3000 }
3001
3002 if (isRegularReg(RegKind))
3003 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3004
3005 return Reg;
3006}
3007
3008bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3009 unsigned &RegNum, unsigned &RegWidth,
3010 SmallVectorImpl<AsmToken> &Tokens) {
3011 auto Loc = getLoc();
3012 Reg = AMDGPU::NoRegister;
3013
3014 if (isToken(AsmToken::Identifier)) {
3015 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3016 if (Reg == AMDGPU::NoRegister)
3017 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3018 } else {
3019 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3020 }
3021
3022 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3023 if (Reg == AMDGPU::NoRegister) {
3024 assert(Parser.hasPendingError());
3025 return false;
3026 }
3027
3028 if (!subtargetHasRegister(*TRI, Reg)) {
3029 if (Reg == AMDGPU::SGPR_NULL) {
3030 Error(Loc, "'null' operand is not supported on this GPU");
3031 } else {
3032 Error(Loc, "register not available on this GPU");
3033 }
3034 return false;
3035 }
3036
3037 return true;
3038}
3039
3040bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3041 unsigned &RegNum, unsigned &RegWidth,
3042 bool RestoreOnFailure /*=false*/) {
3043 Reg = AMDGPU::NoRegister;
3044
3046 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3047 if (RestoreOnFailure) {
3048 while (!Tokens.empty()) {
3049 getLexer().UnLex(Tokens.pop_back_val());
3050 }
3051 }
3052 return true;
3053 }
3054 return false;
3055}
3056
3057std::optional<StringRef>
3058AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3059 switch (RegKind) {
3060 case IS_VGPR:
3061 return StringRef(".amdgcn.next_free_vgpr");
3062 case IS_SGPR:
3063 return StringRef(".amdgcn.next_free_sgpr");
3064 default:
3065 return std::nullopt;
3066 }
3067}
3068
3069void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3070 auto SymbolName = getGprCountSymbolName(RegKind);
3071 assert(SymbolName && "initializing invalid register kind");
3072 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3073 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
3074}
3075
3076bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3077 unsigned DwordRegIndex,
3078 unsigned RegWidth) {
3079 // Symbols are only defined for GCN targets
3080 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3081 return true;
3082
3083 auto SymbolName = getGprCountSymbolName(RegKind);
3084 if (!SymbolName)
3085 return true;
3086 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3087
3088 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3089 int64_t OldCount;
3090
3091 if (!Sym->isVariable())
3092 return !Error(getLoc(),
3093 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3094 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
3095 return !Error(
3096 getLoc(),
3097 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3098
3099 if (OldCount <= NewMax)
3100 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
3101
3102 return true;
3103}
3104
3105std::unique_ptr<AMDGPUOperand>
3106AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3107 const auto &Tok = getToken();
3108 SMLoc StartLoc = Tok.getLoc();
3109 SMLoc EndLoc = Tok.getEndLoc();
3110 RegisterKind RegKind;
3111 unsigned Reg, RegNum, RegWidth;
3112
3113 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3114 return nullptr;
3115 }
3116 if (isHsaAbi(getSTI())) {
3117 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3118 return nullptr;
3119 } else
3120 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3121 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3122}
3123
3124ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3125 bool HasSP3AbsModifier, bool HasLit) {
3126 // TODO: add syntactic sugar for 1/(2*PI)
3127
3128 if (isRegister())
3129 return ParseStatus::NoMatch;
3130 assert(!isModifier());
3131
3132 if (!HasLit) {
3133 HasLit = trySkipId("lit");
3134 if (HasLit) {
3135 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3136 return ParseStatus::Failure;
3137 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
3138 if (S.isSuccess() &&
3139 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3140 return ParseStatus::Failure;
3141 return S;
3142 }
3143 }
3144
3145 const auto& Tok = getToken();
3146 const auto& NextTok = peekToken();
3147 bool IsReal = Tok.is(AsmToken::Real);
3148 SMLoc S = getLoc();
3149 bool Negate = false;
3150
3151 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3152 lex();
3153 IsReal = true;
3154 Negate = true;
3155 }
3156
3157 AMDGPUOperand::Modifiers Mods;
3158 Mods.Lit = HasLit;
3159
3160 if (IsReal) {
3161 // Floating-point expressions are not supported.
3162 // Can only allow floating-point literals with an
3163 // optional sign.
3164
3165 StringRef Num = getTokenStr();
3166 lex();
3167
3168 APFloat RealVal(APFloat::IEEEdouble());
3169 auto roundMode = APFloat::rmNearestTiesToEven;
3170 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3171 return ParseStatus::Failure;
3172 if (Negate)
3173 RealVal.changeSign();
3174
3175 Operands.push_back(
3176 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3177 AMDGPUOperand::ImmTyNone, true));
3178 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3179 Op.setModifiers(Mods);
3180
3181 return ParseStatus::Success;
3182
3183 } else {
3184 int64_t IntVal;
3185 const MCExpr *Expr;
3186 SMLoc S = getLoc();
3187
3188 if (HasSP3AbsModifier) {
3189 // This is a workaround for handling expressions
3190 // as arguments of SP3 'abs' modifier, for example:
3191 // |1.0|
3192 // |-1|
3193 // |1+x|
3194 // This syntax is not compatible with syntax of standard
3195 // MC expressions (due to the trailing '|').
3196 SMLoc EndLoc;
3197 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3198 return ParseStatus::Failure;
3199 } else {
3200 if (Parser.parseExpression(Expr))
3201 return ParseStatus::Failure;
3202 }
3203
3204 if (Expr->evaluateAsAbsolute(IntVal)) {
3205 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3206 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3207 Op.setModifiers(Mods);
3208 } else {
3209 if (HasLit)
3210 return ParseStatus::NoMatch;
3211 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3212 }
3213
3214 return ParseStatus::Success;
3215 }
3216
3217 return ParseStatus::NoMatch;
3218}
3219
3220ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3221 if (!isRegister())
3222 return ParseStatus::NoMatch;
3223
3224 if (auto R = parseRegister()) {
3225 assert(R->isReg());
3226 Operands.push_back(std::move(R));
3227 return ParseStatus::Success;
3228 }
3229 return ParseStatus::Failure;
3230}
3231
3232ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3233 bool HasSP3AbsMod, bool HasLit) {
3234 ParseStatus Res = parseReg(Operands);
3235 if (!Res.isNoMatch())
3236 return Res;
3237 if (isModifier())
3238 return ParseStatus::NoMatch;
3239 return parseImm(Operands, HasSP3AbsMod, HasLit);
3240}
3241
3242bool
3243AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3244 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3245 const auto &str = Token.getString();
3246 return str == "abs" || str == "neg" || str == "sext";
3247 }
3248 return false;
3249}
3250
3251bool
3252AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3253 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3254}
3255
3256bool
3257AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3258 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3259}
3260
3261bool
3262AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3263 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3264}
3265
3266// Check if this is an operand modifier or an opcode modifier
3267// which may look like an expression but it is not. We should
3268// avoid parsing these modifiers as expressions. Currently
3269// recognized sequences are:
3270// |...|
3271// abs(...)
3272// neg(...)
3273// sext(...)
3274// -reg
3275// -|...|
3276// -abs(...)
3277// name:...
3278//
3279bool
3280AMDGPUAsmParser::isModifier() {
3281
3282 AsmToken Tok = getToken();
3283 AsmToken NextToken[2];
3284 peekTokens(NextToken);
3285
3286 return isOperandModifier(Tok, NextToken[0]) ||
3287 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3288 isOpcodeModifierWithVal(Tok, NextToken[0]);
3289}
3290
3291// Check if the current token is an SP3 'neg' modifier.
3292// Currently this modifier is allowed in the following context:
3293//
3294// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3295// 2. Before an 'abs' modifier: -abs(...)
3296// 3. Before an SP3 'abs' modifier: -|...|
3297//
3298// In all other cases "-" is handled as a part
3299// of an expression that follows the sign.
3300//
3301// Note: When "-" is followed by an integer literal,
3302// this is interpreted as integer negation rather
3303// than a floating-point NEG modifier applied to N.
3304// Beside being contr-intuitive, such use of floating-point
3305// NEG modifier would have resulted in different meaning
3306// of integer literals used with VOP1/2/C and VOP3,
3307// for example:
3308// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3309// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3310// Negative fp literals with preceding "-" are
3311// handled likewise for uniformity
3312//
3313bool
3314AMDGPUAsmParser::parseSP3NegModifier() {
3315
3316 AsmToken NextToken[2];
3317 peekTokens(NextToken);
3318
3319 if (isToken(AsmToken::Minus) &&
3320 (isRegister(NextToken[0], NextToken[1]) ||
3321 NextToken[0].is(AsmToken::Pipe) ||
3322 isId(NextToken[0], "abs"))) {
3323 lex();
3324 return true;
3325 }
3326
3327 return false;
3328}
3329
3331AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3332 bool AllowImm) {
3333 bool Neg, SP3Neg;
3334 bool Abs, SP3Abs;
3335 bool Lit;
3336 SMLoc Loc;
3337
3338 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3339 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3340 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3341
3342 SP3Neg = parseSP3NegModifier();
3343
3344 Loc = getLoc();
3345 Neg = trySkipId("neg");
3346 if (Neg && SP3Neg)
3347 return Error(Loc, "expected register or immediate");
3348 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3349 return ParseStatus::Failure;
3350
3351 Abs = trySkipId("abs");
3352 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3353 return ParseStatus::Failure;
3354
3355 Lit = trySkipId("lit");
3356 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3357 return ParseStatus::Failure;
3358
3359 Loc = getLoc();
3360 SP3Abs = trySkipToken(AsmToken::Pipe);
3361 if (Abs && SP3Abs)
3362 return Error(Loc, "expected register or immediate");
3363
3364 ParseStatus Res;
3365 if (AllowImm) {
3366 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3367 } else {
3368 Res = parseReg(Operands);
3369 }
3370 if (!Res.isSuccess())
3371 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3372
3373 if (Lit && !Operands.back()->isImm())
3374 Error(Loc, "expected immediate with lit modifier");
3375
3376 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3377 return ParseStatus::Failure;
3378 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3379 return ParseStatus::Failure;
3380 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3381 return ParseStatus::Failure;
3382 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3383 return ParseStatus::Failure;
3384
3385 AMDGPUOperand::Modifiers Mods;
3386 Mods.Abs = Abs || SP3Abs;
3387 Mods.Neg = Neg || SP3Neg;
3388 Mods.Lit = Lit;
3389
3390 if (Mods.hasFPModifiers() || Lit) {
3391 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3392 if (Op.isExpr())
3393 return Error(Op.getStartLoc(), "expected an absolute expression");
3394 Op.setModifiers(Mods);
3395 }
3396 return ParseStatus::Success;
3397}
3398
3400AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3401 bool AllowImm) {
3402 bool Sext = trySkipId("sext");
3403 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3404 return ParseStatus::Failure;
3405
3406 ParseStatus Res;
3407 if (AllowImm) {
3408 Res = parseRegOrImm(Operands);
3409 } else {
3410 Res = parseReg(Operands);
3411 }
3412 if (!Res.isSuccess())
3413 return Sext ? ParseStatus::Failure : Res;
3414
3415 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3416 return ParseStatus::Failure;
3417
3418 AMDGPUOperand::Modifiers Mods;
3419 Mods.Sext = Sext;
3420
3421 if (Mods.hasIntModifiers()) {
3422 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3423 if (Op.isExpr())
3424 return Error(Op.getStartLoc(), "expected an absolute expression");
3425 Op.setModifiers(Mods);
3426 }
3427
3428 return ParseStatus::Success;
3429}
3430
3431ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3432 return parseRegOrImmWithFPInputMods(Operands, false);
3433}
3434
3435ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3436 return parseRegOrImmWithIntInputMods(Operands, false);
3437}
3438
3439ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3440 auto Loc = getLoc();
3441 if (trySkipId("off")) {
3442 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3443 AMDGPUOperand::ImmTyOff, false));
3444 return ParseStatus::Success;
3445 }
3446
3447 if (!isRegister())
3448 return ParseStatus::NoMatch;
3449
3450 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3451 if (Reg) {
3452 Operands.push_back(std::move(Reg));
3453 return ParseStatus::Success;
3454 }
3455
3456 return ParseStatus::Failure;
3457}
3458
3459unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3460 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3461
3462 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3463 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3464 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3465 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3466 return Match_InvalidOperand;
3467
3468 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3469 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3470 // v_mac_f32/16 allow only dst_sel == DWORD;
3471 auto OpNum =
3472 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3473 const auto &Op = Inst.getOperand(OpNum);
3474 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3475 return Match_InvalidOperand;
3476 }
3477 }
3478
3479 return Match_Success;
3480}
3481
3483 static const unsigned Variants[] = {
3487 };
3488
3489 return ArrayRef(Variants);
3490}
3491
3492// What asm variants we should check
3493ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3494 if (isForcedDPP() && isForcedVOP3()) {
3495 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3496 return ArrayRef(Variants);
3497 }
3498 if (getForcedEncodingSize() == 32) {
3499 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3500 return ArrayRef(Variants);
3501 }
3502
3503 if (isForcedVOP3()) {
3504 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3505 return ArrayRef(Variants);
3506 }
3507
3508 if (isForcedSDWA()) {
3509 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3511 return ArrayRef(Variants);
3512 }
3513
3514 if (isForcedDPP()) {
3515 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3516 return ArrayRef(Variants);
3517 }
3518
3519 return getAllVariants();
3520}
3521
3522StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3523 if (isForcedDPP() && isForcedVOP3())
3524 return "e64_dpp";
3525
3526 if (getForcedEncodingSize() == 32)
3527 return "e32";
3528
3529 if (isForcedVOP3())
3530 return "e64";
3531
3532 if (isForcedSDWA())
3533 return "sdwa";
3534
3535 if (isForcedDPP())
3536 return "dpp";
3537
3538 return "";
3539}
3540
3541unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3542 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3543 for (MCPhysReg Reg : Desc.implicit_uses()) {
3544 switch (Reg) {
3545 case AMDGPU::FLAT_SCR:
3546 case AMDGPU::VCC:
3547 case AMDGPU::VCC_LO:
3548 case AMDGPU::VCC_HI:
3549 case AMDGPU::M0:
3550 return Reg;
3551 default:
3552 break;
3553 }
3554 }
3555 return AMDGPU::NoRegister;
3556}
3557
3558// NB: This code is correct only when used to check constant
3559// bus limitations because GFX7 support no f16 inline constants.
3560// Note that there are no cases when a GFX7 opcode violates
3561// constant bus limitations due to the use of an f16 constant.
3562bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3563 unsigned OpIdx) const {
3564 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3565
3566 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3567 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3568 return false;
3569 }
3570
3571 const MCOperand &MO = Inst.getOperand(OpIdx);
3572
3573 int64_t Val = MO.getImm();
3574 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3575
3576 switch (OpSize) { // expected operand size
3577 case 8:
3578 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3579 case 4:
3580 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3581 case 2: {
3582 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3586 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3587
3592
3597
3602
3607 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3608
3613 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3614
3615 llvm_unreachable("invalid operand type");
3616 }
3617 default:
3618 llvm_unreachable("invalid operand size");
3619 }
3620}
3621
3622unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3623 if (!isGFX10Plus())
3624 return 1;
3625
3626 switch (Opcode) {
3627 // 64-bit shift instructions can use only one scalar value input
3628 case AMDGPU::V_LSHLREV_B64_e64:
3629 case AMDGPU::V_LSHLREV_B64_gfx10:
3630 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3631 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3632 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3633 case AMDGPU::V_LSHRREV_B64_e64:
3634 case AMDGPU::V_LSHRREV_B64_gfx10:
3635 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3636 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3637 case AMDGPU::V_ASHRREV_I64_e64:
3638 case AMDGPU::V_ASHRREV_I64_gfx10:
3639 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3640 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3641 case AMDGPU::V_LSHL_B64_e64:
3642 case AMDGPU::V_LSHR_B64_e64:
3643 case AMDGPU::V_ASHR_I64_e64:
3644 return 1;
3645 default:
3646 return 2;
3647 }
3648}
3649
3650constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3652
3653// Get regular operand indices in the same order as specified
3654// in the instruction (but append mandatory literals to the end).
3656 bool AddMandatoryLiterals = false) {
3657
3658 int16_t ImmIdx =
3659 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3660
3661 if (isVOPD(Opcode)) {
3662 int16_t ImmDeferredIdx =
3663 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3664 : -1;
3665
3666 return {getNamedOperandIdx(Opcode, OpName::src0X),
3667 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3668 getNamedOperandIdx(Opcode, OpName::src0Y),
3669 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3670 ImmDeferredIdx,
3671 ImmIdx};
3672 }
3673
3674 return {getNamedOperandIdx(Opcode, OpName::src0),
3675 getNamedOperandIdx(Opcode, OpName::src1),
3676 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3677}
3678
3679bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3680 const MCOperand &MO = Inst.getOperand(OpIdx);
3681 if (MO.isImm())
3682 return !isInlineConstant(Inst, OpIdx);
3683 if (MO.isReg()) {
3684 auto Reg = MO.getReg();
3685 if (!Reg)
3686 return false;
3687 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3688 auto PReg = mc2PseudoReg(Reg);
3689 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3690 }
3691 return true;
3692}
3693
3694// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3695// Writelane is special in that it can use SGPR and M0 (which would normally
3696// count as using the constant bus twice - but in this case it is allowed since
3697// the lane selector doesn't count as a use of the constant bus). However, it is
3698// still required to abide by the 1 SGPR rule.
3699static bool checkWriteLane(const MCInst &Inst) {
3700 const unsigned Opcode = Inst.getOpcode();
3701 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3702 return false;
3703 const MCOperand &LaneSelOp = Inst.getOperand(2);
3704 if (!LaneSelOp.isReg())
3705 return false;
3706 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3707 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3708}
3709
3710bool AMDGPUAsmParser::validateConstantBusLimitations(
3711 const MCInst &Inst, const OperandVector &Operands) {
3712 const unsigned Opcode = Inst.getOpcode();
3713 const MCInstrDesc &Desc = MII.get(Opcode);
3714 unsigned LastSGPR = AMDGPU::NoRegister;
3715 unsigned ConstantBusUseCount = 0;
3716 unsigned NumLiterals = 0;
3717 unsigned LiteralSize;
3718
3719 if (!(Desc.TSFlags &
3722 !isVOPD(Opcode))
3723 return true;
3724
3725 if (checkWriteLane(Inst))
3726 return true;
3727
3728 // Check special imm operands (used by madmk, etc)
3729 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3730 ++NumLiterals;
3731 LiteralSize = 4;
3732 }
3733
3734 SmallDenseSet<unsigned> SGPRsUsed;
3735 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3736 if (SGPRUsed != AMDGPU::NoRegister) {
3737 SGPRsUsed.insert(SGPRUsed);
3738 ++ConstantBusUseCount;
3739 }
3740
3741 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3742
3743 for (int OpIdx : OpIndices) {
3744 if (OpIdx == -1)
3745 continue;
3746
3747 const MCOperand &MO = Inst.getOperand(OpIdx);
3748 if (usesConstantBus(Inst, OpIdx)) {
3749 if (MO.isReg()) {
3750 LastSGPR = mc2PseudoReg(MO.getReg());
3751 // Pairs of registers with a partial intersections like these
3752 // s0, s[0:1]
3753 // flat_scratch_lo, flat_scratch
3754 // flat_scratch_lo, flat_scratch_hi
3755 // are theoretically valid but they are disabled anyway.
3756 // Note that this code mimics SIInstrInfo::verifyInstruction
3757 if (SGPRsUsed.insert(LastSGPR).second) {
3758 ++ConstantBusUseCount;
3759 }
3760 } else { // Expression or a literal
3761
3762 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3763 continue; // special operand like VINTERP attr_chan
3764
3765 // An instruction may use only one literal.
3766 // This has been validated on the previous step.
3767 // See validateVOPLiteral.
3768 // This literal may be used as more than one operand.
3769 // If all these operands are of the same size,
3770 // this literal counts as one scalar value.
3771 // Otherwise it counts as 2 scalar values.
3772 // See "GFX10 Shader Programming", section 3.6.2.3.
3773
3774 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3775 if (Size < 4)
3776 Size = 4;
3777
3778 if (NumLiterals == 0) {
3779 NumLiterals = 1;
3780 LiteralSize = Size;
3781 } else if (LiteralSize != Size) {
3782 NumLiterals = 2;
3783 }
3784 }
3785 }
3786 }
3787 ConstantBusUseCount += NumLiterals;
3788
3789 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3790 return true;
3791
3792 SMLoc LitLoc = getLitLoc(Operands);
3793 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3794 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3795 Error(Loc, "invalid operand (violates constant bus restrictions)");
3796 return false;
3797}
3798
3799bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3800 const MCInst &Inst, const OperandVector &Operands) {
3801
3802 const unsigned Opcode = Inst.getOpcode();
3803 if (!isVOPD(Opcode))
3804 return true;
3805
3806 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3807
3808 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3809 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3810 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3811 ? Opr.getReg()
3813 };
3814
3815 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3816 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3817
3818 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3819 auto InvalidCompOprIdx =
3820 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3821 if (!InvalidCompOprIdx)
3822 return true;
3823
3824 auto CompOprIdx = *InvalidCompOprIdx;
3825 auto ParsedIdx =
3826 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3827 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3828 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3829
3830 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3831 if (CompOprIdx == VOPD::Component::DST) {
3832 Error(Loc, "one dst register must be even and the other odd");
3833 } else {
3834 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3835 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3836 " operands must use different VGPR banks");
3837 }
3838
3839 return false;
3840}
3841
3842bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3843
3844 const unsigned Opc = Inst.getOpcode();
3845 const MCInstrDesc &Desc = MII.get(Opc);
3846
3847 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3848 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3849 assert(ClampIdx != -1);
3850 return Inst.getOperand(ClampIdx).getImm() == 0;
3851 }
3852
3853 return true;
3854}
3855
3858
3859bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3860 const SMLoc &IDLoc) {
3861
3862 const unsigned Opc = Inst.getOpcode();
3863 const MCInstrDesc &Desc = MII.get(Opc);
3864
3865 if ((Desc.TSFlags & MIMGFlags) == 0)
3866 return true;
3867
3868 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3869 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3870 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3871
3872 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
3873 return true;
3874
3875 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3876 return true;
3877
3878 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3879 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3880 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3881 if (DMask == 0)
3882 DMask = 1;
3883
3884 bool IsPackedD16 = false;
3885 unsigned DataSize =
3886 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3887 if (hasPackedD16()) {
3888 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3889 IsPackedD16 = D16Idx >= 0;
3890 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3891 DataSize = (DataSize + 1) / 2;
3892 }
3893
3894 if ((VDataSize / 4) == DataSize + TFESize)
3895 return true;
3896
3897 StringRef Modifiers;
3898 if (isGFX90A())
3899 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3900 else
3901 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3902
3903 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3904 return false;
3905}
3906
3907bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3908 const SMLoc &IDLoc) {
3909 const unsigned Opc = Inst.getOpcode();
3910 const MCInstrDesc &Desc = MII.get(Opc);
3911
3912 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3913 return true;
3914
3916
3917 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3919 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3920 int RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
3921 : AMDGPU::OpName::rsrc;
3922 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3923 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3924 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3925
3926 assert(VAddr0Idx != -1);
3927 assert(SrsrcIdx != -1);
3928 assert(SrsrcIdx > VAddr0Idx);
3929
3930 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3931 if (BaseOpcode->BVH) {
3932 if (IsA16 == BaseOpcode->A16)
3933 return true;
3934 Error(IDLoc, "image address size does not match a16");
3935 return false;
3936 }
3937
3938 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3940 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3941 unsigned ActualAddrSize =
3942 IsNSA ? SrsrcIdx - VAddr0Idx
3943 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3944
3945 unsigned ExpectedAddrSize =
3946 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3947
3948 if (IsNSA) {
3949 if (hasPartialNSAEncoding() &&
3950 ExpectedAddrSize >
3952 int VAddrLastIdx = SrsrcIdx - 1;
3953 unsigned VAddrLastSize =
3954 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3955
3956 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3957 }
3958 } else {
3959 if (ExpectedAddrSize > 12)
3960 ExpectedAddrSize = 16;
3961
3962 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3963 // This provides backward compatibility for assembly created
3964 // before 160b/192b/224b types were directly supported.
3965 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3966 return true;
3967 }
3968
3969 if (ActualAddrSize == ExpectedAddrSize)
3970 return true;
3971
3972 Error(IDLoc, "image address size does not match dim and a16");
3973 return false;
3974}
3975
3976bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3977
3978 const unsigned Opc = Inst.getOpcode();
3979 const MCInstrDesc &Desc = MII.get(Opc);
3980
3981 if ((Desc.TSFlags & MIMGFlags) == 0)
3982 return true;
3983 if (!Desc.mayLoad() || !Desc.mayStore())
3984 return true; // Not atomic
3985
3986 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3987 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3988
3989 // This is an incomplete check because image_atomic_cmpswap
3990 // may only use 0x3 and 0xf while other atomic operations
3991 // may use 0x1 and 0x3. However these limitations are
3992 // verified when we check that dmask matches dst size.
3993 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3994}
3995
3996bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3997
3998 const unsigned Opc = Inst.getOpcode();
3999 const MCInstrDesc &Desc = MII.get(Opc);
4000
4001 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4002 return true;
4003
4004 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4005 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4006
4007 // GATHER4 instructions use dmask in a different fashion compared to
4008 // other MIMG instructions. The only useful DMASK values are
4009 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4010 // (red,red,red,red) etc.) The ISA document doesn't mention
4011 // this.
4012 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4013}
4014
4015bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4016 const OperandVector &Operands) {
4017 if (!isGFX10Plus())
4018 return true;
4019
4020 const unsigned Opc = Inst.getOpcode();
4021 const MCInstrDesc &Desc = MII.get(Opc);
4022
4023 if ((Desc.TSFlags & MIMGFlags) == 0)
4024 return true;
4025
4026 // image_bvh_intersect_ray instructions do not have dim
4027 if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH)
4028 return true;
4029
4030 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4031 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4032 if (Op.isDim())
4033 return true;
4034 }
4035 return false;
4036}
4037
4038bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4039 const unsigned Opc = Inst.getOpcode();
4040 const MCInstrDesc &Desc = MII.get(Opc);
4041
4042 if ((Desc.TSFlags & MIMGFlags) == 0)
4043 return true;
4044
4046 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4048
4049 if (!BaseOpcode->MSAA)
4050 return true;
4051
4052 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4053 assert(DimIdx != -1);
4054
4055 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4057
4058 return DimInfo->MSAA;
4059}
4060
4061static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4062{
4063 switch (Opcode) {
4064 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4065 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4066 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4067 return true;
4068 default:
4069 return false;
4070 }
4071}
4072
4073// movrels* opcodes should only allow VGPRS as src0.
4074// This is specified in .td description for vop1/vop3,
4075// but sdwa is handled differently. See isSDWAOperand.
4076bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4077 const OperandVector &Operands) {
4078
4079 const unsigned Opc = Inst.getOpcode();
4080 const MCInstrDesc &Desc = MII.get(Opc);
4081
4082 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4083 return true;
4084
4085 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4086 assert(Src0Idx != -1);
4087
4088 SMLoc ErrLoc;
4089 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4090 if (Src0.isReg()) {
4091 auto Reg = mc2PseudoReg(Src0.getReg());
4092 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4093 if (!isSGPR(Reg, TRI))
4094 return true;
4095 ErrLoc = getRegLoc(Reg, Operands);
4096 } else {
4097 ErrLoc = getConstLoc(Operands);
4098 }
4099
4100 Error(ErrLoc, "source operand must be a VGPR");
4101 return false;
4102}
4103
4104bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4105 const OperandVector &Operands) {
4106
4107 const unsigned Opc = Inst.getOpcode();
4108
4109 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4110 return true;
4111
4112 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4113 assert(Src0Idx != -1);
4114
4115 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4116 if (!Src0.isReg())
4117 return true;
4118
4119 auto Reg = mc2PseudoReg(Src0.getReg());
4120 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4121 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4122 Error(getRegLoc(Reg, Operands),
4123 "source operand must be either a VGPR or an inline constant");
4124 return false;
4125 }
4126
4127 return true;
4128}
4129
4130bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4131 const OperandVector &Operands) {
4132 unsigned Opcode = Inst.getOpcode();
4133 const MCInstrDesc &Desc = MII.get(Opcode);
4134
4135 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4136 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4137 return true;
4138
4139 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4140 if (Src2Idx == -1)
4141 return true;
4142
4143 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4144 Error(getConstLoc(Operands),
4145 "inline constants are not allowed for this operand");
4146 return false;
4147 }
4148
4149 return true;
4150}
4151
4152bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4153 const OperandVector &Operands) {
4154 const unsigned Opc = Inst.getOpcode();
4155 const MCInstrDesc &Desc = MII.get(Opc);
4156
4157 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4158 return true;
4159
4160 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4161 if (Src2Idx == -1)
4162 return true;
4163
4164 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4165 if (!Src2.isReg())
4166 return true;
4167
4168 MCRegister Src2Reg = Src2.getReg();
4169 MCRegister DstReg = Inst.getOperand(0).getReg();
4170 if (Src2Reg == DstReg)
4171 return true;
4172
4173 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4174 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4175 return true;
4176
4177 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4178 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
4179 "source 2 operand must not partially overlap with dst");
4180 return false;
4181 }
4182
4183 return true;
4184}
4185
4186bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4187 switch (Inst.getOpcode()) {
4188 default:
4189 return true;
4190 case V_DIV_SCALE_F32_gfx6_gfx7:
4191 case V_DIV_SCALE_F32_vi:
4192 case V_DIV_SCALE_F32_gfx10:
4193 case V_DIV_SCALE_F64_gfx6_gfx7:
4194 case V_DIV_SCALE_F64_vi:
4195 case V_DIV_SCALE_F64_gfx10:
4196 break;
4197 }
4198
4199 // TODO: Check that src0 = src1 or src2.
4200
4201 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4202 AMDGPU::OpName::src2_modifiers,
4203 AMDGPU::OpName::src2_modifiers}) {
4205 .getImm() &
4207 return false;
4208 }
4209 }
4210
4211 return true;
4212}
4213
4214bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4215
4216 const unsigned Opc = Inst.getOpcode();
4217 const MCInstrDesc &Desc = MII.get(Opc);
4218
4219 if ((Desc.TSFlags & MIMGFlags) == 0)
4220 return true;
4221
4222 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4223 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4224 if (isCI() || isSI())
4225 return false;
4226 }
4227
4228 return true;
4229}
4230
4231static bool IsRevOpcode(const unsigned Opcode)
4232{
4233 switch (Opcode) {
4234 case AMDGPU::V_SUBREV_F32_e32:
4235 case AMDGPU::V_SUBREV_F32_e64:
4236 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4237 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4238 case AMDGPU::V_SUBREV_F32_e32_vi:
4239 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4240 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4241 case AMDGPU::V_SUBREV_F32_e64_vi:
4242
4243 case AMDGPU::V_SUBREV_CO_U32_e32:
4244 case AMDGPU::V_SUBREV_CO_U32_e64:
4245 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4246 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4247
4248 case AMDGPU::V_SUBBREV_U32_e32:
4249 case AMDGPU::V_SUBBREV_U32_e64:
4250 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4251 case AMDGPU::V_SUBBREV_U32_e32_vi:
4252 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4253 case AMDGPU::V_SUBBREV_U32_e64_vi:
4254
4255 case AMDGPU::V_SUBREV_U32_e32:
4256 case AMDGPU::V_SUBREV_U32_e64:
4257 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4258 case AMDGPU::V_SUBREV_U32_e32_vi:
4259 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4260 case AMDGPU::V_SUBREV_U32_e64_vi:
4261
4262 case AMDGPU::V_SUBREV_F16_e32:
4263 case AMDGPU::V_SUBREV_F16_e64:
4264 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4265 case AMDGPU::V_SUBREV_F16_e32_vi:
4266 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4267 case AMDGPU::V_SUBREV_F16_e64_vi:
4268
4269 case AMDGPU::V_SUBREV_U16_e32:
4270 case AMDGPU::V_SUBREV_U16_e64:
4271 case AMDGPU::V_SUBREV_U16_e32_vi:
4272 case AMDGPU::V_SUBREV_U16_e64_vi:
4273
4274 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4275 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4276 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4277
4278 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4279 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4280
4281 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4282 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4283
4284 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4285 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4286
4287 case AMDGPU::V_LSHRREV_B32_e32:
4288 case AMDGPU::V_LSHRREV_B32_e64:
4289 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4290 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4291 case AMDGPU::V_LSHRREV_B32_e32_vi:
4292 case AMDGPU::V_LSHRREV_B32_e64_vi:
4293 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4294 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4295
4296 case AMDGPU::V_ASHRREV_I32_e32:
4297 case AMDGPU::V_ASHRREV_I32_e64:
4298 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4299 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4300 case AMDGPU::V_ASHRREV_I32_e32_vi:
4301 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4302 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4303 case AMDGPU::V_ASHRREV_I32_e64_vi:
4304
4305 case AMDGPU::V_LSHLREV_B32_e32:
4306 case AMDGPU::V_LSHLREV_B32_e64:
4307 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4308 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4309 case AMDGPU::V_LSHLREV_B32_e32_vi:
4310 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4311 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4312 case AMDGPU::V_LSHLREV_B32_e64_vi:
4313
4314 case AMDGPU::V_LSHLREV_B16_e32:
4315 case AMDGPU::V_LSHLREV_B16_e64:
4316 case AMDGPU::V_LSHLREV_B16_e32_vi:
4317 case AMDGPU::V_LSHLREV_B16_e64_vi:
4318 case AMDGPU::V_LSHLREV_B16_gfx10:
4319
4320 case AMDGPU::V_LSHRREV_B16_e32:
4321 case AMDGPU::V_LSHRREV_B16_e64:
4322 case AMDGPU::V_LSHRREV_B16_e32_vi:
4323 case AMDGPU::V_LSHRREV_B16_e64_vi:
4324 case AMDGPU::V_LSHRREV_B16_gfx10:
4325
4326 case AMDGPU::V_ASHRREV_I16_e32:
4327 case AMDGPU::V_ASHRREV_I16_e64:
4328 case AMDGPU::V_ASHRREV_I16_e32_vi:
4329 case AMDGPU::V_ASHRREV_I16_e64_vi:
4330 case AMDGPU::V_ASHRREV_I16_gfx10:
4331
4332 case AMDGPU::V_LSHLREV_B64_e64:
4333 case AMDGPU::V_LSHLREV_B64_gfx10:
4334 case AMDGPU::V_LSHLREV_B64_vi:
4335
4336 case AMDGPU::V_LSHRREV_B64_e64:
4337 case AMDGPU::V_LSHRREV_B64_gfx10:
4338 case AMDGPU::V_LSHRREV_B64_vi:
4339
4340 case AMDGPU::V_ASHRREV_I64_e64:
4341 case AMDGPU::V_ASHRREV_I64_gfx10:
4342 case AMDGPU::V_ASHRREV_I64_vi:
4343
4344 case AMDGPU::V_PK_LSHLREV_B16:
4345 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4346 case AMDGPU::V_PK_LSHLREV_B16_vi:
4347
4348 case AMDGPU::V_PK_LSHRREV_B16:
4349 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4350 case AMDGPU::V_PK_LSHRREV_B16_vi:
4351 case AMDGPU::V_PK_ASHRREV_I16:
4352 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4353 case AMDGPU::V_PK_ASHRREV_I16_vi:
4354 return true;
4355 default:
4356 return false;
4357 }
4358}
4359
4360std::optional<StringRef>
4361AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4362
4363 using namespace SIInstrFlags;
4364 const unsigned Opcode = Inst.getOpcode();
4365 const MCInstrDesc &Desc = MII.get(Opcode);
4366
4367 // lds_direct register is defined so that it can be used
4368 // with 9-bit operands only. Ignore encodings which do not accept these.
4369 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4370 if ((Desc.TSFlags & Enc) == 0)
4371 return std::nullopt;
4372
4373 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4374 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4375 if (SrcIdx == -1)
4376 break;
4377 const auto &Src = Inst.getOperand(SrcIdx);
4378 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4379
4380 if (isGFX90A() || isGFX11Plus())
4381 return StringRef("lds_direct is not supported on this GPU");
4382
4383 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4384 return StringRef("lds_direct cannot be used with this instruction");
4385
4386 if (SrcName != OpName::src0)
4387 return StringRef("lds_direct may be used as src0 only");
4388 }
4389 }
4390
4391 return std::nullopt;
4392}
4393
4394SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4395 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4396 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4397 if (Op.isFlatOffset())
4398 return Op.getStartLoc();
4399 }
4400 return getLoc();
4401}
4402
4403bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4404 const OperandVector &Operands) {
4405 auto Opcode = Inst.getOpcode();
4406 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4407 if (OpNum == -1)
4408 return true;
4409
4410 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4411 if ((TSFlags & SIInstrFlags::FLAT))
4412 return validateFlatOffset(Inst, Operands);
4413
4414 if ((TSFlags & SIInstrFlags::SMRD))
4415 return validateSMEMOffset(Inst, Operands);
4416
4417 const auto &Op = Inst.getOperand(OpNum);
4418 if (isGFX12Plus() &&
4419 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4420 const unsigned OffsetSize = 24;
4421 if (!isIntN(OffsetSize, Op.getImm())) {
4422 Error(getFlatOffsetLoc(Operands),
4423 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4424 return false;
4425 }
4426 } else {
4427 const unsigned OffsetSize = 16;
4428 if (!isUIntN(OffsetSize, Op.getImm())) {
4429 Error(getFlatOffsetLoc(Operands),
4430 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4431 return false;
4432 }
4433 }
4434 return true;
4435}
4436
4437bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4438 const OperandVector &Operands) {
4439 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4440 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4441 return true;
4442
4443 auto Opcode = Inst.getOpcode();
4444 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4445 assert(OpNum != -1);
4446
4447 const auto &Op = Inst.getOperand(OpNum);
4448 if (!hasFlatOffsets() && Op.getImm() != 0) {
4449 Error(getFlatOffsetLoc(Operands),
4450 "flat offset modifier is not supported on this GPU");
4451 return false;
4452 }
4453
4454 // For pre-GFX12 FLAT instructions the offset must be positive;
4455 // MSB is ignored and forced to zero.
4456 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4457 bool AllowNegative =
4459 isGFX12Plus();
4460 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4461 Error(getFlatOffsetLoc(Operands),
4462 Twine("expected a ") +
4463 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4464 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4465 return false;
4466 }
4467
4468 return true;
4469}
4470
4471SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4472 // Start with second operand because SMEM Offset cannot be dst or src0.
4473 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4474 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4475 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4476 return Op.getStartLoc();
4477 }
4478 return getLoc();
4479}
4480
4481bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4482 const OperandVector &Operands) {
4483 if (isCI() || isSI())
4484 return true;
4485
4486 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4487 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4488 return true;
4489
4490 auto Opcode = Inst.getOpcode();
4491 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4492 if (OpNum == -1)
4493 return true;
4494
4495 const auto &Op = Inst.getOperand(OpNum);
4496 if (!Op.isImm())
4497 return true;
4498
4499 uint64_t Offset = Op.getImm();
4500 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4503 return true;
4504
4505 Error(getSMEMOffsetLoc(Operands),
4506 isGFX12Plus() ? "expected a 24-bit signed offset"
4507 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4508 : "expected a 21-bit signed offset");
4509
4510 return false;
4511}
4512
4513bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4514 unsigned Opcode = Inst.getOpcode();
4515 const MCInstrDesc &Desc = MII.get(Opcode);
4516 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4517 return true;
4518
4519 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4520 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4521
4522 const int OpIndices[] = { Src0Idx, Src1Idx };
4523
4524 unsigned NumExprs = 0;
4525 unsigned NumLiterals = 0;
4527
4528 for (int OpIdx : OpIndices) {
4529 if (OpIdx == -1) break;
4530
4531 const MCOperand &MO = Inst.getOperand(OpIdx);
4532 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4533 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4534 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4535 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4536 if (NumLiterals == 0 || LiteralValue != Value) {
4538 ++NumLiterals;
4539 }
4540 } else if (MO.isExpr()) {
4541 ++NumExprs;
4542 }
4543 }
4544 }
4545
4546 return NumLiterals + NumExprs <= 1;
4547}
4548
4549bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4550 const unsigned Opc = Inst.getOpcode();
4551 if (isPermlane16(Opc)) {
4552 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4553 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4554
4555 if (OpSel & ~3)
4556 return false;
4557 }
4558
4559 uint64_t TSFlags = MII.get(Opc).TSFlags;
4560
4561 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4562 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4563 if (OpSelIdx != -1) {
4564 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4565 return false;
4566 }
4567 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4568 if (OpSelHiIdx != -1) {
4569 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4570 return false;
4571 }
4572 }
4573
4574 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4575 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4576 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4577 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4578 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4579 if (OpSel & 3)
4580 return false;
4581 }
4582
4583 return true;
4584}
4585
4586bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
4587 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4588
4589 const unsigned Opc = Inst.getOpcode();
4590 uint64_t TSFlags = MII.get(Opc).TSFlags;
4591
4592 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4593 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4594 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4595 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4596 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4597 !(TSFlags & SIInstrFlags::IsSWMMAC))
4598 return true;
4599
4600 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4601 if (NegIdx == -1)
4602 return true;
4603
4604 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4605
4606 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4607 // on some src operands but not allowed on other.
4608 // It is convenient that such instructions don't have src_modifiers operand
4609 // for src operands that don't allow neg because they also don't allow opsel.
4610
4611 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4612 AMDGPU::OpName::src1_modifiers,
4613 AMDGPU::OpName::src2_modifiers};
4614
4615 for (unsigned i = 0; i < 3; ++i) {
4616 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4617 if (Neg & (1 << i))
4618 return false;
4619 }
4620 }
4621
4622 return true;
4623}
4624
4625bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4626 const OperandVector &Operands) {
4627 const unsigned Opc = Inst.getOpcode();
4628 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4629 if (DppCtrlIdx >= 0) {
4630 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4631
4632 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4633 AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4634 // DP ALU DPP is supported for row_newbcast only on GFX9*
4635 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4636 Error(S, "DP ALU dpp only supports row_newbcast");
4637 return false;
4638 }
4639 }
4640
4641 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4642 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4643
4644 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4645 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4646 if (Src1Idx >= 0) {
4647 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4648 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4649 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
4650 auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());
4651 SMLoc S = getRegLoc(Reg, Operands);
4652 Error(S, "invalid operand for instruction");
4653 return false;
4654 }
4655 if (Src1.isImm()) {
4656 Error(getInstLoc(Operands),
4657 "src1 immediate operand invalid for instruction");
4658 return false;
4659 }
4660 }
4661 }
4662
4663 return true;
4664}
4665
4666// Check if VCC register matches wavefront size
4667bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4668 auto FB = getFeatureBits();
4669 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4670 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4671}
4672
4673// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4674bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4675 const OperandVector &Operands) {
4676 unsigned Opcode = Inst.getOpcode();
4677 const MCInstrDesc &Desc = MII.get(Opcode);
4678 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4679 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4680 !HasMandatoryLiteral && !isVOPD(Opcode))
4681 return true;
4682
4683 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4684
4685 unsigned NumExprs = 0;
4686 unsigned NumLiterals = 0;
4688
4689 for (int OpIdx : OpIndices) {
4690 if (OpIdx == -1)
4691 continue;
4692
4693 const MCOperand &MO = Inst.getOperand(OpIdx);
4694 if (!MO.isImm() && !MO.isExpr())
4695 continue;
4696 if (!isSISrcOperand(Desc, OpIdx))
4697 continue;
4698
4699 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4700 uint64_t Value = static_cast<uint64_t>(MO.getImm());
4701 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4702 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4703 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4704
4705 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4706 Error(getLitLoc(Operands), "invalid operand for instruction");
4707 return false;
4708 }
4709
4710 if (IsFP64 && IsValid32Op)
4711 Value = Hi_32(Value);
4712
4713 if (NumLiterals == 0 || LiteralValue != Value) {
4715 ++NumLiterals;
4716 }
4717 } else if (MO.isExpr()) {
4718 ++NumExprs;
4719 }
4720 }
4721 NumLiterals += NumExprs;
4722
4723 if (!NumLiterals)
4724 return true;
4725
4726 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4727 Error(getLitLoc(Operands), "literal operands are not supported");
4728 return false;
4729 }
4730
4731 if (NumLiterals > 1) {
4732 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4733 return false;
4734 }
4735
4736 return true;
4737}
4738
4739// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4740static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4741 const MCRegisterInfo *MRI) {
4742 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4743 if (OpIdx < 0)
4744 return -1;
4745
4746 const MCOperand &Op = Inst.getOperand(OpIdx);
4747 if (!Op.isReg())
4748 return -1;
4749
4750 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4751 auto Reg = Sub ? Sub : Op.getReg();
4752 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4753 return AGPR32.contains(Reg) ? 1 : 0;
4754}
4755
4756bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4757 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4758 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4760 SIInstrFlags::DS)) == 0)
4761 return true;
4762
4763 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4764 : AMDGPU::OpName::vdata;
4765
4766 const MCRegisterInfo *MRI = getMRI();
4767 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4768 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4769
4770 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4771 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4772 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4773 return false;
4774 }
4775
4776 auto FB = getFeatureBits();
4777 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4778 if (DataAreg < 0 || DstAreg < 0)
4779 return true;
4780 return DstAreg == DataAreg;
4781 }
4782
4783 return DstAreg < 1 && DataAreg < 1;
4784}
4785
4786bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4787 auto FB = getFeatureBits();
4788 if (!FB[AMDGPU::FeatureGFX90AInsts])
4789 return true;
4790
4791 const MCRegisterInfo *MRI = getMRI();
4792 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4793 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4794 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4795 const MCOperand &Op = Inst.getOperand(I);
4796 if (!Op.isReg())
4797 continue;
4798
4799 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4800 if (!Sub)
4801 continue;
4802
4803 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4804 return false;
4805 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4806 return false;
4807 }
4808
4809 return true;
4810}
4811
4812SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4813 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4814 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4815 if (Op.isBLGP())
4816 return Op.getStartLoc();
4817 }
4818 return SMLoc();
4819}
4820
4821bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4822 const OperandVector &Operands) {
4823 unsigned Opc = Inst.getOpcode();
4824 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4825 if (BlgpIdx == -1)
4826 return true;
4827 SMLoc BLGPLoc = getBLGPLoc(Operands);
4828 if (!BLGPLoc.isValid())
4829 return true;
4830 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4831 auto FB = getFeatureBits();
4832 bool UsesNeg = false;
4833 if (FB[AMDGPU::FeatureGFX940Insts]) {
4834 switch (Opc) {
4835 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4836 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4837 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4838 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4839 UsesNeg = true;
4840 }
4841 }
4842
4843 if (IsNeg == UsesNeg)
4844 return true;
4845
4846 Error(BLGPLoc,
4847 UsesNeg ? "invalid modifier: blgp is not supported"
4848 : "invalid modifier: neg is not supported");
4849
4850 return false;
4851}
4852
4853bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4854 const OperandVector &Operands) {
4855 if (!isGFX11Plus())
4856 return true;
4857
4858 unsigned Opc = Inst.getOpcode();
4859 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4860 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4861 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4862 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4863 return true;
4864
4865 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4866 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4867 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4868 if (Reg == AMDGPU::SGPR_NULL)
4869 return true;
4870
4871 SMLoc RegLoc = getRegLoc(Reg, Operands);
4872 Error(RegLoc, "src0 must be null");
4873 return false;
4874}
4875
4876bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4877 const OperandVector &Operands) {
4878 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4879 if ((TSFlags & SIInstrFlags::DS) == 0)
4880 return true;
4881 if (TSFlags & SIInstrFlags::GWS)
4882 return validateGWS(Inst, Operands);
4883 // Only validate GDS for non-GWS instructions.
4884 if (hasGDS())
4885 return true;
4886 int GDSIdx =
4887 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4888 if (GDSIdx < 0)
4889 return true;
4890 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4891 if (GDS) {
4892 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4893 Error(S, "gds modifier is not supported on this GPU");
4894 return false;
4895 }
4896 return true;
4897}
4898
4899// gfx90a has an undocumented limitation:
4900// DS_GWS opcodes must use even aligned registers.
4901bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4902 const OperandVector &Operands) {
4903 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4904 return true;
4905
4906 int Opc = Inst.getOpcode();
4907 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4908 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4909 return true;
4910
4911 const MCRegisterInfo *MRI = getMRI();
4912 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4913 int Data0Pos =
4914 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4915 assert(Data0Pos != -1);
4916 auto Reg = Inst.getOperand(Data0Pos).getReg();
4917 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4918 if (RegIdx & 1) {
4919 SMLoc RegLoc = getRegLoc(Reg, Operands);
4920 Error(RegLoc, "vgpr must be even aligned");
4921 return false;
4922 }
4923
4924 return true;
4925}
4926
4927bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4928 const OperandVector &Operands,
4929 const SMLoc &IDLoc) {
4930 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4931 AMDGPU::OpName::cpol);
4932 if (CPolPos == -1)
4933 return true;
4934
4935 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4936
4937 if (isGFX12Plus())
4938 return validateTHAndScopeBits(Inst, Operands, CPol);
4939
4940 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4941 if (TSFlags & SIInstrFlags::SMRD) {
4942 if (CPol && (isSI() || isCI())) {
4943 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4944 Error(S, "cache policy is not supported for SMRD instructions");
4945 return false;
4946 }
4947 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4948 Error(IDLoc, "invalid cache policy for SMEM instruction");
4949 return false;
4950 }
4951 }
4952
4953 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4954 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4957 if (!(TSFlags & AllowSCCModifier)) {
4958 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4959 StringRef CStr(S.getPointer());
4960 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4961 Error(S,
4962 "scc modifier is not supported for this instruction on this GPU");
4963 return false;
4964 }
4965 }
4966
4968 return true;
4969
4970 if (TSFlags & SIInstrFlags::IsAtomicRet) {
4971 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4972 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4973 : "instruction must use glc");
4974 return false;
4975 }
4976 } else {
4977 if (CPol & CPol::GLC) {
4978 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4979 StringRef CStr(S.getPointer());
4981 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4982 Error(S, isGFX940() ? "instruction must not use sc0"
4983 : "instruction must not use glc");
4984 return false;
4985 }
4986 }
4987
4988 return true;
4989}
4990
4991bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4992 const OperandVector &Operands,
4993 const unsigned CPol) {
4994 const unsigned TH = CPol & AMDGPU::CPol::TH;
4995 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
4996
4997 const unsigned Opcode = Inst.getOpcode();
4998 const MCInstrDesc &TID = MII.get(Opcode);
4999
5000 auto PrintError = [&](StringRef Msg) {
5001 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5002 Error(S, Msg);
5003 return false;
5004 };
5005
5006 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5009 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5010
5011 if (TH == 0)
5012 return true;
5013
5014 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5015 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5016 (TH == AMDGPU::CPol::TH_NT_HT)))
5017 return PrintError("invalid th value for SMEM instruction");
5018
5019 if (TH == AMDGPU::CPol::TH_BYPASS) {
5020 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5022 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5024 return PrintError("scope and th combination is not valid");
5025 }
5026
5027 bool IsStore = TID.mayStore();
5028 bool IsAtomic =
5030
5031 if (IsAtomic) {
5032 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5033 return PrintError("invalid th value for atomic instructions");
5034 } else if (IsStore) {
5035 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5036 return PrintError("invalid th value for store instructions");
5037 } else {
5038 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5039 return PrintError("invalid th value for load instructions");
5040 }
5041
5042 return true;
5043}
5044
5045bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
5046 if (!isGFX11Plus())
5047 return true;
5048 for (auto &Operand : Operands) {
5049 if (!Operand->isReg())
5050 continue;
5051 unsigned Reg = Operand->getReg();
5052 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
5053 Error(getRegLoc(Reg, Operands),
5054 "execz and vccz are not supported on this GPU");
5055 return false;
5056 }
5057 }
5058 return true;
5059}
5060
5061bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5062 const OperandVector &Operands) {
5063 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5064 if (Desc.mayStore() &&
5066 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5067 if (Loc != getInstLoc(Operands)) {
5068 Error(Loc, "TFE modifier has no meaning for store instructions");
5069 return false;
5070 }
5071 }
5072
5073 return true;
5074}
5075
5076bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5077 const SMLoc &IDLoc,
5078 const OperandVector &Operands) {
5079 if (auto ErrMsg = validateLdsDirect(Inst)) {
5080 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
5081 return false;
5082 }
5083 if (!validateSOPLiteral(Inst)) {
5084 Error(getLitLoc(Operands),
5085 "only one unique literal operand is allowed");
5086 return false;
5087 }
5088 if (!validateVOPLiteral(Inst, Operands)) {
5089 return false;
5090 }
5091 if (!validateConstantBusLimitations(Inst, Operands)) {
5092 return false;
5093 }
5094 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5095 return false;
5096 }
5097 if (!validateIntClampSupported(Inst)) {
5098 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5099 "integer clamping is not supported on this GPU");
5100 return false;
5101 }
5102 if (!validateOpSel(Inst)) {
5103 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5104 "invalid op_sel operand");
5105 return false;
5106 }
5107 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5108 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5109 "invalid neg_lo operand");
5110 return false;
5111 }
5112 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5113 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5114 "invalid neg_hi operand");
5115 return false;
5116 }
5117 if (!validateDPP(Inst, Operands)) {
5118 return false;
5119 }
5120 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5121 if (!validateMIMGD16(Inst)) {
5122 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5123 "d16 modifier is not supported on this GPU");
5124 return false;
5125 }
5126 if (!validateMIMGDim(Inst, Operands)) {
5127 Error(IDLoc, "missing dim operand");
5128 return false;
5129 }
5130 if (!validateMIMGMSAA(Inst)) {
5131 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5132 "invalid dim; must be MSAA type");
5133 return false;
5134 }
5135 if (!validateMIMGDataSize(Inst, IDLoc)) {
5136 return false;
5137 }
5138 if (!validateMIMGAddrSize(Inst, IDLoc))
5139 return false;
5140 if (!validateMIMGAtomicDMask(Inst)) {
5141 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5142 "invalid atomic image dmask");
5143 return false;
5144 }
5145 if (!validateMIMGGatherDMask(Inst)) {
5146 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5147 "invalid image_gather dmask: only one bit must be set");
5148 return false;
5149 }
5150 if (!validateMovrels(Inst, Operands)) {
5151 return false;
5152 }
5153 if (!validateOffset(Inst, Operands)) {
5154 return false;
5155 }
5156 if (!validateMAIAccWrite(Inst, Operands)) {
5157 return false;
5158 }
5159 if (!validateMAISrc2(Inst, Operands)) {
5160 return false;
5161 }
5162 if (!validateMFMA(Inst, Operands)) {
5163 return false;
5164 }
5165 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5166 return false;
5167 }
5168
5169 if (!validateAGPRLdSt(Inst)) {
5170 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5171 ? "invalid register class: data and dst should be all VGPR or AGPR"
5172 : "invalid register class: agpr loads and stores not supported on this GPU"
5173 );
5174 return false;
5175 }
5176 if (!validateVGPRAlign(Inst)) {
5177 Error(IDLoc,
5178 "invalid register class: vgpr tuples must be 64 bit aligned");
5179 return false;
5180 }
5181 if (!validateDS(Inst, Operands)) {
5182 return false;
5183 }
5184
5185 if (!validateBLGP(Inst, Operands)) {
5186 return false;
5187 }
5188
5189 if (!validateDivScale(Inst)) {
5190 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5191 return false;
5192 }
5193 if (!validateWaitCnt(Inst, Operands)) {
5194 return false;
5195 }
5196 if (!validateExeczVcczOperands(Operands)) {
5197 return false;
5198 }
5199 if (!validateTFE(Inst, Operands)) {
5200 return false;
5201 }
5202
5203 return true;
5204}
5205
5207 const FeatureBitset &FBS,
5208 unsigned VariantID = 0);
5209
5210static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5211 const FeatureBitset &AvailableFeatures,
5212 unsigned VariantID);
5213
5214bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5215 const FeatureBitset &FBS) {
5216 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5217}
5218
5219bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5220 const FeatureBitset &FBS,
5221 ArrayRef<unsigned> Variants) {
5222 for (auto Variant : Variants) {
5223 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5224 return true;
5225 }
5226
5227 return false;
5228}
5229
5230bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5231 const SMLoc &IDLoc) {
5232 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5233
5234 // Check if requested instruction variant is supported.
5235 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5236 return false;
5237
5238 // This instruction is not supported.
5239 // Clear any other pending errors because they are no longer relevant.
5240 getParser().clearPendingErrors();
5241
5242 // Requested instruction variant is not supported.
5243 // Check if any other variants are supported.
5244 StringRef VariantName = getMatchedVariantName();
5245 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5246 return Error(IDLoc,
5247 Twine(VariantName,
5248 " variant of this instruction is not supported"));
5249 }
5250
5251 // Check if this instruction may be used with a different wavesize.
5252 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5253 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5254
5255 FeatureBitset FeaturesWS32 = getFeatureBits();
5256 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5257 .flip(AMDGPU::FeatureWavefrontSize32);
5258 FeatureBitset AvailableFeaturesWS32 =
5259 ComputeAvailableFeatures(FeaturesWS32);
5260
5261 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5262 return Error(IDLoc, "instruction requires wavesize=32");
5263 }
5264
5265 // Finally check if this instruction is supported on any other GPU.
5266 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5267 return Error(IDLoc, "instruction not supported on this GPU");
5268 }
5269
5270 // Instruction not supported on any GPU. Probably a typo.
5271 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5272 return Error(IDLoc, "invalid instruction" + Suggestion);
5273}
5274
5276 uint64_t InvalidOprIdx) {
5277 assert(InvalidOprIdx < Operands.size());
5278 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5279 if (Op.isToken() && InvalidOprIdx > 1) {
5280 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5281 return PrevOp.isToken() && PrevOp.getToken() == "::";
5282 }
5283 return false;
5284}
5285
5286bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5288 MCStreamer &Out,
5290 bool MatchingInlineAsm) {
5291 MCInst Inst;
5292 unsigned Result = Match_Success;
5293 for (auto Variant : getMatchedVariants()) {
5294 uint64_t EI;
5295 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5296 Variant);
5297 // We order match statuses from least to most specific. We use most specific
5298 // status as resulting
5299 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5300 if (R == Match_Success || R == Match_MissingFeature ||
5301 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5302 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5303 Result != Match_MissingFeature)) {
5304 Result = R;
5305 ErrorInfo = EI;
5306 }
5307 if (R == Match_Success)
5308 break;
5309 }
5310
5311 if (Result == Match_Success) {
5312 if (!validateInstruction(Inst, IDLoc, Operands)) {
5313 return true;
5314 }
5315 Inst.setLoc(IDLoc);
5316 Out.emitInstruction(Inst, getSTI());
5317 return false;
5318 }
5319
5320 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5321 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5322 return true;
5323 }
5324
5325 switch (Result) {
5326 default: break;
5327 case Match_MissingFeature:
5328 // It has been verified that the specified instruction
5329 // mnemonic is valid. A match was found but it requires
5330 // features which are not supported on this GPU.
5331 return Error(IDLoc, "operands are not valid for this GPU or mode");
5332
5333 case Match_InvalidOperand: {
5334 SMLoc ErrorLoc = IDLoc;
5335 if (ErrorInfo != ~0ULL) {
5336 if (ErrorInfo >= Operands.size()) {
5337 return Error(IDLoc, "too few operands for instruction");
5338 }
5339 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5340 if (ErrorLoc == SMLoc())
5341 ErrorLoc = IDLoc;
5342
5344 return Error(ErrorLoc, "invalid VOPDY instruction");
5345 }
5346 return Error(ErrorLoc, "invalid operand for instruction");
5347 }
5348
5349 case Match_MnemonicFail:
5350 llvm_unreachable("Invalid instructions should have been handled already");
5351 }
5352 llvm_unreachable("Implement any new match types added!");
5353}
5354
5355bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5356 int64_t Tmp = -1;
5357 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5358 return true;
5359 }
5360 if (getParser().parseAbsoluteExpression(Tmp)) {
5361 return true;
5362 }
5363 Ret = static_cast<uint32_t>(Tmp);
5364 return false;
5365}
5366
5367bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5368 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5369 return TokError("directive only supported for amdgcn architecture");
5370
5371 std::string TargetIDDirective;
5372 SMLoc TargetStart = getTok().getLoc();
5373 if (getParser().parseEscapedString(TargetIDDirective))
5374 return true;
5375
5376 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5377 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5378 return getParser().Error(TargetRange.Start,
5379 (Twine(".amdgcn_target directive's target id ") +
5380 Twine(TargetIDDirective) +
5381 Twine(" does not match the specified target id ") +
5382 Twine(getTargetStreamer().getTargetID()->toString())).str());
5383
5384 return false;
5385}
5386
5387bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5388 return Error(Range.Start, "value out of range", Range);
5389}
5390
5391bool AMDGPUAsmParser::calculateGPRBlocks(
5392 const FeatureBitset &Features, const MCExpr *VCCUsed,
5393 const MCExpr *FlatScrUsed, bool XNACKUsed,
5394 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5395 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5396 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5397 // TODO(scott.linder): These calculations are duplicated from
5398 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5399 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5400 MCContext &Ctx = getContext();
5401
5402 const MCExpr *NumSGPRs = NextFreeSGPR;
5403 int64_t EvaluatedSGPRs;
5404
5405 if (Version.Major >= 10)
5407 else {
5408 unsigned MaxAddressableNumSGPRs =
5410
5411 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5412 !Features.test(FeatureSGPRInitBug) &&
5413 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5414 return OutOfRangeError(SGPRRange);
5415
5416 const MCExpr *ExtraSGPRs =
5417 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5418 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5419
5420 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5421 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5422 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5423 return OutOfRangeError(SGPRRange);
5424
5425 if (Features.test(FeatureSGPRInitBug))
5426 NumSGPRs =
5428 }
5429
5430 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5431 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5432 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5433 unsigned Granule) -> const MCExpr * {
5434 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5435 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5436 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5437 const MCExpr *AlignToGPR =
5438 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5439 const MCExpr *DivGPR =
5440 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5441 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5442 return SubGPR;
5443 };
5444
5445 VGPRBlocks = GetNumGPRBlocks(
5446 NextFreeVGPR,
5447 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5448 SGPRBlocks =
5449 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5450
5451 return false;
5452}
5453
5454bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5455 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5456 return TokError("directive only supported for amdgcn architecture");
5457
5458 if (!isHsaAbi(getSTI()))
5459 return TokError("directive only supported for amdhsa OS");
5460
5461 StringRef KernelName;
5462 if (getParser().parseIdentifier(KernelName))
5463 return true;
5464
5467 &getSTI(), getContext());
5468
5469 StringSet<> Seen;
5470
5471 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5472
5473 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5474 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5475
5476 SMRange VGPRRange;
5477 const MCExpr *NextFreeVGPR = ZeroExpr;
5478 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5479 uint64_t SharedVGPRCount = 0;
5480 uint64_t PreloadLength = 0;
5481 uint64_t PreloadOffset = 0;
5482 SMRange SGPRRange;
5483 const MCExpr *NextFreeSGPR = ZeroExpr;
5484
5485 // Count the number of user SGPRs implied from the enabled feature bits.
5486 unsigned ImpliedUserSGPRCount = 0;
5487
5488 // Track if the asm explicitly contains the directive for the user SGPR
5489 // count.
5490 std::optional<unsigned> ExplicitUserSGPRCount;
5491 const MCExpr *ReserveVCC = OneExpr;
5492 const MCExpr *ReserveFlatScr = OneExpr;
5493 std::optional<bool> EnableWavefrontSize32;
5494
5495 while (true) {
5496 while (trySkipToken(AsmToken::EndOfStatement));
5497
5498 StringRef ID;
5499 SMRange IDRange = getTok().getLocRange();
5500 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5501 return true;
5502
5503 if (ID == ".end_amdhsa_kernel")
5504 break;
5505
5506 if (!Seen.insert(ID).second)
5507 return TokError(".amdhsa_ directives cannot be repeated");
5508
5509 SMLoc ValStart = getLoc();
5510 const MCExpr *ExprVal;
5511 if (getParser().parseExpression(ExprVal))
5512 return true;
5513 SMLoc ValEnd = getLoc();
5514 SMRange ValRange = SMRange(ValStart, ValEnd);
5515
5516 int64_t IVal = 0;
5517 uint64_t Val = IVal;
5518 bool EvaluatableExpr;
5519 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5520 if (IVal < 0)
5521 return OutOfRangeError(ValRange);
5522 Val = IVal;
5523 }
5524
5525#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5526 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5527 return OutOfRangeError(RANGE); \
5528 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5529 getContext());
5530
5531// Some fields use the parsed value immediately which requires the expression to
5532// be solvable.
5533#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5534 if (!(RESOLVED)) \
5535 return Error(IDRange.Start, "directive should have resolvable expression", \
5536 IDRange);
5537
5538 if (ID == ".amdhsa_group_segment_fixed_size") {
5540 CHAR_BIT>(Val))
5541 return OutOfRangeError(ValRange);
5542 KD.group_segment_fixed_size = ExprVal;
5543 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5545 CHAR_BIT>(Val))
5546 return OutOfRangeError(ValRange);
5547 KD.private_segment_fixed_size = ExprVal;
5548 } else if (ID == ".amdhsa_kernarg_size") {
5549 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
5550 return OutOfRangeError(ValRange);
5551 KD.kernarg_size = ExprVal;
5552 } else if (ID == ".amdhsa_user_sgpr_count") {
5553 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5554 ExplicitUserSGPRCount = Val;
5555 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5556 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5558 return Error(IDRange.Start,
5559 "directive is not supported with architected flat scratch",
5560 IDRange);
5562 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5563 ExprVal, ValRange);
5564 if (Val)
5565 ImpliedUserSGPRCount += 4;
5566 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5567 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5568 if (!hasKernargPreload())
5569 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5570
5571 if (Val > getMaxNumUserSGPRs())
5572 return OutOfRangeError(ValRange);
5573 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
5574 ValRange);
5575 if (Val) {
5576 ImpliedUserSGPRCount += Val;
5577 PreloadLength = Val;
5578 }
5579 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5580 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5581 if (!hasKernargPreload())
5582 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5583
5584 if (Val >= 1024)
5585 return OutOfRangeError(ValRange);
5586 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
5587 ValRange);
5588 if (Val)
5589 PreloadOffset = Val;
5590 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5591 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5593 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5594 ValRange);
5595 if (Val)
5596 ImpliedUserSGPRCount += 2;
5597 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5598 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5600 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5601 ValRange);
5602 if (Val)
5603 ImpliedUserSGPRCount += 2;
5604 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5605 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5607 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5608 ExprVal, ValRange);
5609 if (Val)
5610 ImpliedUserSGPRCount += 2;
5611 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5612 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5614 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5615 ValRange);
5616 if (Val)
5617 ImpliedUserSGPRCount += 2;
5618 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5620 return Error(IDRange.Start,
5621 "directive is not supported with architected flat scratch",
5622 IDRange);
5623 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5625 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5626 ExprVal, ValRange);
5627 if (Val)
5628 ImpliedUserSGPRCount += 2;
5629 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5630 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5632 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5633 ExprVal, ValRange);
5634 if (Val)
5635 ImpliedUserSGPRCount += 1;
5636 } else if (ID == ".amdhsa_wavefront_size32") {
5637 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5638 if (IVersion.Major < 10)
5639 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5640 EnableWavefrontSize32 = Val;
5642 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5643 ValRange);
5644 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5646 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5647 ValRange);
5648 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5650 return Error(IDRange.Start,
5651 "directive is not supported with architected flat scratch",
5652 IDRange);
5654 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5655 ValRange);
5656 } else if (ID == ".amdhsa_enable_private_segment") {
5658 return Error(
5659 IDRange.Start,
5660 "directive is not supported without architected flat scratch",
5661 IDRange);
5663 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5664 ValRange);
5665 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5667 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5668 ValRange);
5669 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5671 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5672 ValRange);
5673 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5675 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5676 ValRange);
5677 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5679 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5680 ValRange);
5681 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5683 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5684 ValRange);
5685 } else if (ID == ".amdhsa_next_free_vgpr") {
5686 VGPRRange = ValRange;
5687 NextFreeVGPR = ExprVal;
5688 } else if (ID == ".amdhsa_next_free_sgpr") {
5689 SGPRRange = ValRange;
5690 NextFreeSGPR = ExprVal;
5691 } else if (ID == ".amdhsa_accum_offset") {
5692 if (!isGFX90A())
5693 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5694 AccumOffset = ExprVal;
5695 } else if (ID == ".amdhsa_reserve_vcc") {
5696 if (EvaluatableExpr && !isUInt<1>(Val))
5697 return OutOfRangeError(ValRange);
5698 ReserveVCC = ExprVal;
5699 } else if (ID == ".amdhsa_reserve_flat_scratch") {
5700 if (IVersion.Major < 7)
5701 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5703 return Error(IDRange.Start,
5704 "directive is not supported with architected flat scratch",
5705 IDRange);
5706 if (EvaluatableExpr && !isUInt<1>(Val))
5707 return OutOfRangeError(ValRange);
5708 ReserveFlatScr = ExprVal;
5709 } else if (ID == ".amdhsa_reserve_xnack_mask") {
5710 if (IVersion.Major < 8)
5711 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5712 if (!isUInt<1>(Val))
5713 return OutOfRangeError(ValRange);
5714 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5715 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5716 IDRange);
5717 } else if (ID == ".amdhsa_float_round_mode_32") {
5719 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5720 ValRange);
5721 } else if (ID == ".amdhsa_float_round_mode_16_64") {
5723 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5724 ValRange);
5725 } else if (ID == ".amdhsa_float_denorm_mode_32") {
5727 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5728 ValRange);
5729 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5731 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5732 ValRange);
5733 } else if (ID == ".amdhsa_dx10_clamp") {
5734 if (IVersion.Major >= 12)
5735 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5737 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5738 ValRange);
5739 } else if (ID == ".amdhsa_ieee_mode") {
5740 if (IVersion.Major >= 12)
5741 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5743 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5744 ValRange);
5745 } else if (ID == ".amdhsa_fp16_overflow") {
5746 if (IVersion.Major < 9)
5747 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5749 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5750 ValRange);
5751 } else if (ID == ".amdhsa_tg_split") {
5752 if (!isGFX90A())
5753 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5754 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
5755 ExprVal, ValRange);
5756 } else if (ID == ".amdhsa_workgroup_processor_mode") {
5757 if (IVersion.Major < 10)
5758 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5760 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5761 ValRange);
5762 } else if (ID == ".amdhsa_memory_ordered") {
5763 if (IVersion.Major < 10)
5764 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5766 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5767 ValRange);
5768 } else if (ID == ".amdhsa_forward_progress") {
5769 if (IVersion.Major < 10)
5770 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5772 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5773 ValRange);
5774 } else if (ID == ".amdhsa_shared_vgpr_count") {
5775 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5776 if (IVersion.Major < 10 || IVersion.Major >= 12)
5777 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
5778 IDRange);
5779 SharedVGPRCount = Val;
5781 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5782 ValRange);
5783 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5786 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5787 ExprVal, ValRange);
5788 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5790 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5791 ExprVal, ValRange);
5792 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5795 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5796 ExprVal, ValRange);
5797 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5799 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5800 ExprVal, ValRange);
5801 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5803 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5804 ExprVal, ValRange);
5805 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5807 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5808 ExprVal, ValRange);
5809 } else if (ID == ".amdhsa_exception_int_div_zero") {
5811 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5812 ExprVal, ValRange);
5813 } else if (ID == ".amdhsa_round_robin_scheduling") {
5814 if (IVersion.Major < 12)
5815 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
5817 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5818 ValRange);
5819 } else {
5820 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5821 }
5822
5823#undef PARSE_BITS_ENTRY
5824 }
5825
5826 if (!Seen.contains(".amdhsa_next_free_vgpr"))
5827 return TokError(".amdhsa_next_free_vgpr directive is required");
5828
5829 if (!Seen.contains(".amdhsa_next_free_sgpr"))
5830 return TokError(".amdhsa_next_free_sgpr directive is required");
5831
5832 const MCExpr *VGPRBlocks;
5833 const MCExpr *SGPRBlocks;
5834 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5835 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5836 EnableWavefrontSize32, NextFreeVGPR,
5837 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5838 SGPRBlocks))
5839 return true;
5840
5841 int64_t EvaluatedVGPRBlocks;
5842 bool VGPRBlocksEvaluatable =
5843 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
5844 if (VGPRBlocksEvaluatable &&
5845 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5846 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
5847 return OutOfRangeError(VGPRRange);
5848 }
5850 KD.compute_pgm_rsrc1, VGPRBlocks,
5851 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5852 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
5853
5854 int64_t EvaluatedSGPRBlocks;
5855 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
5856 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5857 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
5858 return OutOfRangeError(SGPRRange);
5860 KD.compute_pgm_rsrc1, SGPRBlocks,
5861 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5862 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
5863
5864 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5865 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5866 "enabled user SGPRs");
5867
5868 unsigned UserSGPRCount =
5869 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5870
5871 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5872 return TokError("too many user SGPRs enabled");
5874 KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
5875 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5876 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
5877
5878 int64_t IVal = 0;
5879 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
5880 return TokError("Kernarg size should be resolvable");
5881 uint64_t kernarg_size = IVal;
5882 if (PreloadLength && kernarg_size &&
5883 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5884 return TokError("Kernarg preload length + offset is larger than the "
5885 "kernarg segment size");
5886
5887 if (isGFX90A()) {
5888 if (!Seen.contains(".amdhsa_accum_offset"))
5889 return TokError(".amdhsa_accum_offset directive is required");
5890 int64_t EvaluatedAccum;
5891 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
5892 uint64_t UEvaluatedAccum = EvaluatedAccum;
5893 if (AccumEvaluatable &&
5894 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
5895 return TokError("accum_offset should be in range [4..256] in "
5896 "increments of 4");
5897
5898 int64_t EvaluatedNumVGPR;
5899 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
5900 AccumEvaluatable &&
5901 UEvaluatedAccum >
5902 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
5903 return TokError("accum_offset exceeds total VGPR allocation");
5904 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
5906 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
5907 MCConstantExpr::create(1, getContext()), getContext());
5909 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5910 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5911 getContext());
5912 }
5913
5914 if (IVersion.Major >= 10 && IVersion.Major < 12) {
5915 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5916 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5917 return TokError("shared_vgpr_count directive not valid on "
5918 "wavefront size 32");
5919 }
5920
5921 if (VGPRBlocksEvaluatable &&
5922 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
5923 63)) {
5924 return TokError("shared_vgpr_count*2 + "
5925 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5926 "exceed 63\n");
5927 }
5928 }
5929
5930 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5931 NextFreeVGPR, NextFreeSGPR,
5932 ReserveVCC, ReserveFlatScr);
5933 return false;
5934}
5935
5936bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5938 if (ParseAsAbsoluteExpression(Version))
5939 return true;
5940
5941 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5942 return false;
5943}
5944
5945bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5947 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5948 // assembly for backwards compatibility.
5949 if (ID == "max_scratch_backing_memory_byte_size") {
5950 Parser.eatToEndOfStatement();
5951 return false;
5952 }
5953
5954 SmallString<40> ErrStr;
5955 raw_svector_ostream Err(ErrStr);
5956 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
5957 return TokError(Err.str());
5958 }
5959 Lex();
5960
5961 if (ID == "enable_wavefront_size32") {
5962 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5963 if (!isGFX10Plus())
5964 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5965 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5966 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5967 } else {
5968 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5969 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5970 }
5971 }
5972
5973 if (ID == "wavefront_size") {
5974 if (C.wavefront_size == 5) {
5975 if (!isGFX10Plus())
5976 return TokError("wavefront_size=5 is only allowed on GFX10+");
5977 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5978 return TokError("wavefront_size=5 requires +WavefrontSize32");
5979 } else if (C.wavefront_size == 6) {
5980 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5981 return TokError("wavefront_size=6 requires +WavefrontSize64");
5982 }
5983 }
5984
5985 return false;
5986}
5987
5988bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5989 AMDGPUMCKernelCodeT KernelCode;
5990 KernelCode.initDefault(&getSTI(), getContext());
5991
5992 while (true) {
5993 // Lex EndOfStatement. This is in a while loop, because lexing a comment
5994 // will set the current token to EndOfStatement.
5995 while(trySkipToken(AsmToken::EndOfStatement));
5996
5997 StringRef ID;
5998 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5999 return true;
6000
6001 if (ID == ".end_amd_kernel_code_t")
6002 break;
6003
6004 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6005 return true;
6006 }
6007
6008 KernelCode.validate(&getSTI(), getContext());
6009 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6010
6011 return false;
6012}
6013
6014bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6015 StringRef KernelName;
6016 if (!parseId(KernelName, "expected symbol name"))
6017 return true;
6018
6019 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6021
6022 KernelScope.initialize(getContext());
6023 return false;
6024}
6025
6026bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6027 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
6028 return Error(getLoc(),
6029 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6030 "architectures");
6031 }
6032
6033 auto TargetIDDirective = getLexer().getTok().getStringContents();
6034 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6035 return Error(getParser().getTok().getLoc(), "target id must match options");
6036
6037 getTargetStreamer().EmitISAVersion();
6038 Lex();
6039
6040 return false;
6041}
6042
6043bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6044 assert(isHsaAbi(getSTI()));
6045
6046 std::string HSAMetadataString;
6047 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6048 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6049 return true;
6050
6051 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6052 return Error(getLoc(), "invalid HSA metadata");
6053
6054 return false;
6055}
6056
6057/// Common code to parse out a block of text (typically YAML) between start and
6058/// end directives.
6059bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6060 const char *AssemblerDirectiveEnd,
6061 std::string &CollectString) {
6062
6063 raw_string_ostream CollectStream(CollectString);
6064
6065 getLexer().setSkipSpace(false);
6066
6067 bool FoundEnd = false;
6068 while (!isToken(AsmToken::Eof)) {
6069 while (isToken(AsmToken::Space)) {
6070 CollectStream << getTokenStr();
6071 Lex();
6072 }
6073
6074 if (trySkipId(AssemblerDirectiveEnd)) {
6075 FoundEnd = true;
6076 break;
6077 }
6078
6079 CollectStream << Parser.parseStringToEndOfStatement()
6080 << getContext().getAsmInfo()->getSeparatorString();
6081
6082 Parser.eatToEndOfStatement();
6083 }
6084
6085 getLexer().setSkipSpace(true);
6086
6087 if (isToken(AsmToken::Eof) && !FoundEnd) {
6088 return TokError(Twine("expected directive ") +
6089 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6090 }
6091
6092 CollectStream.flush();
6093 return false;
6094}
6095
6096/// Parse the assembler directive for new MsgPack-format PAL metadata.
6097bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6098 std::string String;
6099 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6101 return true;
6102
6103 auto PALMetadata = getTargetStreamer().getPALMetadata();
6104 if (!PALMetadata->setFromString(String))
6105 return Error(getLoc(), "invalid PAL metadata");
6106 return false;
6107}
6108
6109/// Parse the assembler directive for old linear-format PAL metadata.
6110bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6111 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6112 return Error(getLoc(),
6113 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6114 "not available on non-amdpal OSes")).str());
6115 }
6116
6117 auto PALMetadata = getTargetStreamer().getPALMetadata();
6118 PALMetadata->setLegacy();
6119 for (;;) {
6121 if (ParseAsAbsoluteExpression(Key)) {
6122 return TokError(Twine("invalid value in ") +
6124 }
6125 if (!trySkipToken(AsmToken::Comma)) {
6126 return TokError(Twine("expected an even number of values in ") +
6128 }
6129 if (ParseAsAbsoluteExpression(Value)) {
6130 return TokError(Twine("invalid value in ") +
6132 }
6133 PALMetadata->setRegister(Key, Value);
6134 if (!trySkipToken(AsmToken::Comma))
6135 break;
6136 }
6137 return false;
6138}
6139
6140/// ParseDirectiveAMDGPULDS
6141/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6142bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6143 if (getParser().checkForValidSection())
6144 return true;
6145
6147 SMLoc NameLoc = getLoc();
6148 if (getParser().parseIdentifier(Name))
6149 return TokError("expected identifier in directive");
6150
6151 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6152 if (getParser().parseComma())
6153 return true;
6154
6155 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6156
6157 int64_t Size;
6158 SMLoc SizeLoc = getLoc();
6159 if (getParser().parseAbsoluteExpression(Size))
6160 return true;
6161 if (Size < 0)
6162 return Error(SizeLoc, "size must be non-negative");
6163 if (Size > LocalMemorySize)
6164 return Error(SizeLoc, "size is too large");
6165
6166 int64_t Alignment = 4;
6167 if (trySkipToken(AsmToken::Comma)) {
6168 SMLoc AlignLoc = getLoc();
6169 if (getParser().parseAbsoluteExpression(Alignment))
6170 return true;
6171 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6172 return Error(AlignLoc, "alignment must be a power of two");
6173
6174 // Alignment larger than the size of LDS is possible in theory, as long
6175 // as the linker manages to place to symbol at address 0, but we do want
6176 // to make sure the alignment fits nicely into a 32-bit integer.
6177 if (Alignment >= 1u << 31)
6178 return Error(AlignLoc, "alignment is too large");
6179 }
6180
6181 if (parseEOL())
6182 return true;
6183
6184 Symbol->redefineIfPossible();
6185 if (!Symbol->isUndefined())
6186 return Error(NameLoc, "invalid symbol redefinition");
6187
6188 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6189 return false;
6190}
6191
6192bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6193 StringRef IDVal = DirectiveID.getString();
6194
6195 if (isHsaAbi(getSTI())) {
6196 if (IDVal == ".amdhsa_kernel")
6197 return ParseDirectiveAMDHSAKernel();
6198
6199 if (IDVal == ".amdhsa_code_object_version")
6200 return ParseDirectiveAMDHSACodeObjectVersion();
6201
6202 // TODO: Restructure/combine with PAL metadata directive.
6204 return ParseDirectiveHSAMetadata();
6205 } else {
6206 if (IDVal == ".amd_kernel_code_t")
6207 return ParseDirectiveAMDKernelCodeT();
6208
6209 if (IDVal == ".amdgpu_hsa_kernel")
6210 return ParseDirectiveAMDGPUHsaKernel();
6211
6212 if (IDVal == ".amd_amdgpu_isa")
6213 return ParseDirectiveISAVersion();
6214
6216 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6217 Twine(" directive is "
6218 "not available on non-amdhsa OSes"))
6219 .str());
6220 }
6221 }
6222
6223 if (IDVal == ".amdgcn_target")
6224 return ParseDirectiveAMDGCNTarget();
6225
6226 if (IDVal == ".amdgpu_lds")
6227 return ParseDirectiveAMDGPULDS();
6228
6229 if (IDVal == PALMD::AssemblerDirectiveBegin)
6230 return ParseDirectivePALMetadataBegin();
6231
6232 if (IDVal == PALMD::AssemblerDirective)
6233 return ParseDirectivePALMetadata();
6234
6235 return true;
6236}
6237
6238bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6239 unsigned RegNo) {
6240
6241 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
6242 return isGFX9Plus();
6243
6244 // GFX10+ has 2 more SGPRs 104 and 105.
6245 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
6246 return hasSGPR104_SGPR105();
6247
6248 switch (RegNo) {
6249 case AMDGPU::SRC_SHARED_BASE_LO:
6250 case AMDGPU::SRC_SHARED_BASE:
6251 case AMDGPU::SRC_SHARED_LIMIT_LO:
6252 case AMDGPU::SRC_SHARED_LIMIT:
6253 case AMDGPU::SRC_PRIVATE_BASE_LO:
6254 case AMDGPU::SRC_PRIVATE_BASE:
6255 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
6256 case AMDGPU::SRC_PRIVATE_LIMIT:
6257 return isGFX9Plus();
6258 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
6259 return isGFX9Plus() && !isGFX11Plus();
6260 case AMDGPU::TBA:
6261 case AMDGPU::TBA_LO:
6262 case AMDGPU::TBA_HI:
6263 case AMDGPU::TMA:
6264 case AMDGPU::TMA_LO:
6265 case AMDGPU::TMA_HI:
6266 return !isGFX9Plus();
6267 case AMDGPU::XNACK_MASK:
6268 case AMDGPU::XNACK_MASK_LO:
6269 case AMDGPU::XNACK_MASK_HI:
6270 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6271 case AMDGPU::SGPR_NULL:
6272 return isGFX10Plus();
6273 default:
6274 break;
6275 }
6276
6277 if (isCI())
6278 return true;
6279
6280 if (isSI() || isGFX10Plus()) {
6281 // No flat_scr on SI.
6282 // On GFX10Plus flat scratch is not a valid register operand and can only be
6283 // accessed with s_setreg/s_getreg.
6284 switch (RegNo) {
6285 case AMDGPU::FLAT_SCR:
6286 case AMDGPU::FLAT_SCR_LO:
6287 case AMDGPU::FLAT_SCR_HI:
6288 return false;
6289 default:
6290 return true;
6291 }
6292 }
6293
6294 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6295 // SI/CI have.
6296 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
6297 return hasSGPR102_SGPR103();
6298
6299 return true;
6300}
6301
6302ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6303 StringRef Mnemonic,
6304 OperandMode Mode) {
6305 ParseStatus Res = parseVOPD(Operands);
6306 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6307 return Res;
6308
6309 // Try to parse with a custom parser
6310 Res = MatchOperandParserImpl(Operands, Mnemonic);
6311
6312 // If we successfully parsed the operand or if there as an error parsing,
6313 // we are done.
6314 //
6315 // If we are parsing after we reach EndOfStatement then this means we
6316 // are appending default values to the Operands list. This is only done
6317 // by custom parser, so we shouldn't continue on to the generic parsing.
6318 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6319 return Res;
6320
6321 SMLoc RBraceLoc;
6322 SMLoc LBraceLoc = getLoc();
6323 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6324 unsigned Prefix = Operands.size();
6325
6326 for (;;) {
6327 auto Loc = getLoc();
6328 Res = parseReg(Operands);
6329 if (Res.isNoMatch())
6330 Error(Loc, "expected a register");
6331 if (!Res.isSuccess())
6332 return ParseStatus::Failure;
6333
6334 RBraceLoc = getLoc();
6335 if (trySkipToken(AsmToken::RBrac))
6336 break;
6337
6338 if (!skipToken(AsmToken::Comma,
6339 "expected a comma or a closing square bracket"))
6340 return ParseStatus::Failure;
6341 }
6342
6343 if (Operands.size() - Prefix > 1) {
6344 Operands.insert(Operands.begin() + Prefix,
6345 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6346 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6347 }
6348
6349 return ParseStatus::Success;
6350 }
6351
6352 return parseRegOrImm(Operands);
6353}
6354
6355StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6356 // Clear any forced encodings from the previous instruction.
6357 setForcedEncodingSize(0);
6358 setForcedDPP(false);
6359 setForcedSDWA(false);
6360
6361 if (Name.ends_with("_e64_dpp")) {
6362 setForcedDPP(true);
6363 setForcedEncodingSize(64);
6364 return Name.substr(0, Name.size() - 8);
6365 }
6366 if (Name.ends_with("_e64")) {
6367 setForcedEncodingSize(64);
6368 return Name.substr(0, Name.size() - 4);
6369 }
6370 if (Name.ends_with("_e32")) {
6371 setForcedEncodingSize(32);
6372 return Name.substr(0, Name.size() - 4);
6373 }
6374 if (Name.ends_with("_dpp")) {
6375 setForcedDPP(true);
6376 return Name.substr(0, Name.size() - 4);
6377 }
6378 if (Name.ends_with("_sdwa")) {
6379 setForcedSDWA(true);
6380 return Name.substr(0, Name.size() - 5);
6381 }
6382 return Name;
6383}
6384
6385static void applyMnemonicAliases(StringRef &Mnemonic,
6386 const FeatureBitset &Features,
6387 unsigned VariantID);
6388
6389bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
6391 SMLoc NameLoc, OperandVector &Operands) {
6392 // Add the instruction mnemonic
6393 Name = parseMnemonicSuffix(Name);
6394
6395 // If the target architecture uses MnemonicAlias, call it here to parse
6396 // operands correctly.
6397 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6398
6399 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6400
6401 bool IsMIMG = Name.starts_with("image_");
6402
6403 while (!trySkipToken(AsmToken::EndOfStatement)) {
6404 OperandMode Mode = OperandMode_Default;
6405 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6406 Mode = OperandMode_NSA;
6407 ParseStatus Res = parseOperand(Operands, Name, Mode);
6408
6409 if (!Res.isSuccess()) {
6410 checkUnsupportedInstruction(Name, NameLoc);
6411 if (!Parser.hasPendingError()) {
6412 // FIXME: use real operand location rather than the current location.
6413 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6414 : "not a valid operand.";
6415 Error(getLoc(), Msg);
6416 }
6417 while (!trySkipToken(AsmToken::EndOfStatement)) {
6418 lex();
6419 }
6420 return true;
6421 }
6422
6423 // Eat the comma or space if there is one.
6424 trySkipToken(AsmToken::Comma);
6425 }
6426
6427 return false;
6428}
6429
6430//===----------------------------------------------------------------------===//
6431// Utility functions
6432//===----------------------------------------------------------------------===//
6433
6434ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6436 SMLoc S = getLoc();
6437 if (!trySkipId(Name))
6438 return ParseStatus::NoMatch;
6439
6440 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6441 return ParseStatus::Success;
6442}
6443
6444ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6445 int64_t &IntVal) {
6446
6447 if (!trySkipId(Prefix, AsmToken::Colon))
6448 return ParseStatus::NoMatch;
6449
6451}
6452
6453ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6454 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6455 std::function<bool(int64_t &)> ConvertResult) {
6456 SMLoc S = getLoc();
6457 int64_t Value = 0;
6458
6459 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6460 if (!Res.isSuccess())
6461 return Res;
6462
6463 if (ConvertResult && !ConvertResult(Value)) {
6464 Error(S, "invalid " + StringRef(Prefix) + " value.");
6465 }
6466
6467 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6468 return ParseStatus::Success;
6469}
6470
6471ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6472 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6473 bool (*ConvertResult)(int64_t &)) {
6474 SMLoc S = getLoc();
6475 if (!trySkipId(Prefix, AsmToken::Colon))
6476 return ParseStatus::NoMatch;
6477
6478 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6479 return ParseStatus::Failure;
6480
6481 unsigned Val = 0;
6482 const unsigned MaxSize = 4;
6483
6484 // FIXME: How to verify the number of elements matches the number of src
6485 // operands?
6486 for (int I = 0; ; ++I) {
6487 int64_t Op;
6488 SMLoc Loc = getLoc();
6489 if (!parseExpr(Op))
6490 return ParseStatus::Failure;
6491
6492 if (Op != 0 && Op != 1)
6493 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6494
6495 Val |= (Op << I);
6496
6497 if (trySkipToken(AsmToken::RBrac))
6498 break;
6499
6500 if (I + 1 == MaxSize)
6501 return Error(getLoc(), "expected a closing square bracket");
6502
6503 if (!skipToken(AsmToken::Comma, "expected a comma"))
6504 return ParseStatus::Failure;
6505 }
6506
6507 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6508 return ParseStatus::Success;
6509}
6510
6511ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6513 AMDGPUOperand::ImmTy ImmTy) {
6514 int64_t Bit;
6515 SMLoc S = getLoc();
6516
6517 if (trySkipId(Name)) {
6518 Bit = 1;
6519 } else if (trySkipId("no", Name)) {
6520 Bit = 0;
6521 } else {
6522 return ParseStatus::NoMatch;
6523 }
6524
6525 if (Name == "r128" && !hasMIMG_R128())
6526 return Error(S, "r128 modifier is not supported on this GPU");
6527 if (Name == "a16" && !hasA16())
6528 return Error(S, "a16 modifier is not supported on this GPU");
6529
6530 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6531 ImmTy = AMDGPUOperand::ImmTyR128A16;
6532
6533 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6534 return ParseStatus::Success;
6535}
6536
6537unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6538 bool &Disabling) const {
6539 Disabling = Id.consume_front("no");
6540
6541 if (isGFX940() && !Mnemo.starts_with("s_")) {
6542 return StringSwitch<unsigned>(Id)
6543 .Case("nt", AMDGPU::CPol::NT)
6544 .Case("sc0", AMDGPU::CPol::SC0)
6545 .Case("sc1", AMDGPU::CPol::SC1)
6546 .Default(0);
6547 }
6548
6549 return StringSwitch<unsigned>(Id)
6550 .Case("dlc", AMDGPU::CPol::DLC)
6551 .Case("glc", AMDGPU::CPol::GLC)
6552 .Case("scc", AMDGPU::CPol::SCC)
6553 .Case("slc", AMDGPU::CPol::SLC)
6554 .Default(0);
6555}
6556
6557ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6558 if (isGFX12Plus()) {
6559 SMLoc StringLoc = getLoc();
6560
6561 int64_t CPolVal = 0;
6564
6565 for (;;) {
6566 if (ResTH.isNoMatch()) {
6567 int64_t TH;
6568 ResTH = parseTH(Operands, TH);
6569 if (ResTH.isFailure())
6570 return ResTH;
6571 if (ResTH.isSuccess()) {
6572 CPolVal |= TH;
6573 continue;
6574 }
6575 }
6576
6577 if (ResScope.isNoMatch()) {
6578 int64_t Scope;
6579 ResScope = parseScope(Operands, Scope);
6580 if (ResScope.isFailure())
6581 return ResScope;
6582 if (ResScope.isSuccess()) {
6583 CPolVal |= Scope;
6584 continue;
6585 }
6586 }
6587
6588 break;
6589 }
6590
6591 if (ResTH.isNoMatch() && ResScope.isNoMatch())
6592 return ParseStatus::NoMatch;
6593
6594 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6595 AMDGPUOperand::ImmTyCPol));
6596 return ParseStatus::Success;
6597 }
6598
6599 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6600 SMLoc OpLoc = getLoc();
6601 unsigned Enabled = 0, Seen = 0;
6602 for (;;) {
6603 SMLoc S = getLoc();
6604 bool Disabling;
6605 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6606 if (!CPol)
6607 break;
6608
6609 lex();
6610
6611 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6612 return Error(S, "dlc modifier is not supported on this GPU");
6613
6614 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6615 return Error(S, "scc modifier is not supported on this GPU");
6616
6617 if (Seen & CPol)
6618 return Error(S, "duplicate cache policy modifier");
6619
6620 if (!Disabling)
6621 Enabled |= CPol;
6622
6623 Seen |= CPol;
6624 }
6625
6626 if (!Seen)
6627 return ParseStatus::NoMatch;
6628
6629 Operands.push_back(
6630 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6631 return ParseStatus::Success;
6632}
6633
6634ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6635 int64_t &Scope) {
6636 Scope = AMDGPU::CPol::SCOPE_CU; // default;
6637
6639 SMLoc StringLoc;
6640 ParseStatus Res;
6641
6642 Res = parseStringWithPrefix("scope", Value, StringLoc);
6643 if (!Res.isSuccess())
6644 return Res;
6645
6647 .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU)
6648 .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE)
6649 .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV)
6650 .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS)
6651 .Default(0xffffffff);
6652
6653 if (Scope == 0xffffffff)
6654 return Error(StringLoc, "invalid scope value");
6655
6656 return ParseStatus::Success;
6657}
6658
6659ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6660 TH = AMDGPU::CPol::TH_RT; // default
6661
6663 SMLoc StringLoc;
6664 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
6665 if (!Res.isSuccess())
6666 return Res;
6667
6668 if (Value == "TH_DEFAULT")
6670 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
6671 Value == "TH_LOAD_NT_WB") {
6672 return Error(StringLoc, "invalid th value");
6673 } else if (Value.consume_front("TH_ATOMIC_")) {
6675 } else if (Value.consume_front("TH_LOAD_")) {
6677 } else if (Value.consume_front("TH_STORE_")) {
6679 } else {
6680 return Error(StringLoc, "invalid th value");
6681 }
6682
6683 if (Value == "BYPASS")
6685
6686 if (TH != 0) {
6693 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
6696 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
6698 .Default(0xffffffff);
6699 else
6705 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
6706 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
6707 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
6708 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
6709 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
6710 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
6711 .Default(0xffffffff);
6712 }
6713
6714 if (TH == 0xffffffff)
6715 return Error(StringLoc, "invalid th value");
6716
6717 return ParseStatus::Success;
6718}
6719
6721 MCInst& Inst, const OperandVector& Operands,
6722 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6723 AMDGPUOperand::ImmTy ImmT,
6724 int64_t Default = 0) {
6725 auto i = OptionalIdx.find(ImmT);
6726 if (i != OptionalIdx.end()) {
6727 unsigned Idx = i->second;
6728 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6729 } else {
6731 }
6732}
6733
6734ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6736 SMLoc &StringLoc) {
6737 if (!trySkipId(Prefix, AsmToken::Colon))
6738 return ParseStatus::NoMatch;
6739
6740 StringLoc = getLoc();
6741 return parseId(Value, "expected an identifier") ? ParseStatus::Success
6743}
6744
6745//===----------------------------------------------------------------------===//
6746// MTBUF format
6747//===----------------------------------------------------------------------===//
6748
6749bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6750 int64_t MaxVal,
6751 int64_t &Fmt) {
6752 int64_t Val;
6753 SMLoc Loc = getLoc();
6754
6755 auto Res = parseIntWithPrefix(Pref, Val);
6756 if (Res.isFailure())
6757 return false;
6758 if (Res.isNoMatch())
6759 return true;
6760
6761 if (Val < 0 || Val > MaxVal) {
6762 Error(Loc, Twine("out of range ", StringRef(Pref)));
6763 return false;
6764 }
6765
6766 Fmt = Val;
6767 return true;
6768}
6769
6770ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
6771 AMDGPUOperand::ImmTy ImmTy) {
6772 const char *Pref = "index_key";
6773 int64_t ImmVal = 0;
6774 SMLoc Loc = getLoc();
6775 auto Res = parseIntWithPrefix(Pref, ImmVal);
6776 if (!Res.isSuccess())
6777 return Res;
6778
6779 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6780 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6781
6782 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6783 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6784
6785 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
6786 return ParseStatus::Success;
6787}
6788
6789ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
6790 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6791}
6792
6793ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
6794 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6795}
6796
6797// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6798// values to live in a joint format operand in the MCInst encoding.
6799ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6800 using namespace llvm::AMDGPU::MTBUFFormat;
6801
6802 int64_t Dfmt = DFMT_UNDEF;
6803 int64_t Nfmt = NFMT_UNDEF;
6804
6805 // dfmt and nfmt can appear in either order, and each is optional.
6806 for (int I = 0; I < 2; ++I) {
6807 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6808 return ParseStatus::Failure;
6809
6810 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6811 return ParseStatus::Failure;
6812
6813 // Skip optional comma between dfmt/nfmt
6814 // but guard against 2 commas following each other.
6815 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6816 !peekToken().is(AsmToken::Comma)) {
6817 trySkipToken(AsmToken::Comma);
6818 }
6819 }
6820
6821 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6822 return ParseStatus::NoMatch;
6823
6824 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6825 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6826
6827 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6828 return ParseStatus::Success;
6829}
6830
6831ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6832 using namespace llvm::AMDGPU::MTBUFFormat;
6833
6834 int64_t Fmt = UFMT_UNDEF;
6835
6836 if (!tryParseFmt("format", UFMT_MAX, Fmt))
6837 return ParseStatus::Failure;
6838
6839 if (Fmt == UFMT_UNDEF)
6840 return ParseStatus::NoMatch;
6841
6842 Format = Fmt;
6843 return ParseStatus::Success;
6844}
6845
6846bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6847 int64_t &Nfmt,
6848 StringRef FormatStr,
6849 SMLoc Loc) {
6850 using namespace llvm::AMDGPU::MTBUFFormat;
6851 int64_t Format;
6852
6853 Format = getDfmt(FormatStr);
6854 if (Format != DFMT_UNDEF) {
6855 Dfmt = Format;
6856 return true;
6857 }
6858
6859 Format = getNfmt(FormatStr, getSTI());
6860 if (Format != NFMT_UNDEF) {
6861 Nfmt = Format;
6862 return true;
6863 }
6864
6865 Error(Loc, "unsupported format");
6866 return false;
6867}
6868
6869ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6870 SMLoc FormatLoc,
6871 int64_t &Format) {
6872 using namespace llvm::AMDGPU::MTBUFFormat;
6873
6874 int64_t Dfmt = DFMT_UNDEF;
6875 int64_t Nfmt = NFMT_UNDEF;
6876 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6877 return ParseStatus::Failure;
6878
6879 if (trySkipToken(AsmToken::Comma)) {
6880 StringRef Str;
6881 SMLoc Loc = getLoc();
6882 if (!parseId(Str, "expected a format string") ||
6883 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6884 return ParseStatus::Failure;
6885 if (Dfmt == DFMT_UNDEF)
6886 return Error(Loc, "duplicate numeric format");
6887 if (Nfmt == NFMT_UNDEF)
6888 return Error(Loc, "duplicate data format");
6889 }
6890
6891 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6892 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6893
6894 if (isGFX10Plus()) {
6895 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6896 if (Ufmt == UFMT_UNDEF)
6897 return Error(FormatLoc, "unsupported format");
6898 Format = Ufmt;
6899 } else {
6900 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6901 }
6902
6903 return ParseStatus::Success;
6904}
6905
6906ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6907 SMLoc Loc,
6908 int64_t &Format) {
6909 using namespace llvm::AMDGPU::MTBUFFormat;
6910
6911 auto Id = getUnifiedFormat(FormatStr, getSTI());
6912 if (Id == UFMT_UNDEF)
6913 return ParseStatus::NoMatch;
6914
6915 if (!isGFX10Plus())
6916 return Error(Loc, "unified format is not supported on this GPU");
6917
6918 Format = Id;
6919 return ParseStatus::Success;
6920}
6921
6922ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6923 using namespace llvm::AMDGPU::MTBUFFormat;
6924 SMLoc Loc = getLoc();
6925
6926 if (!parseExpr(Format))
6927 return ParseStatus::Failure;
6928 if (!isValidFormatEncoding(Format, getSTI()))
6929 return Error(Loc, "out of range format");
6930
6931 return ParseStatus::Success;
6932}
6933
6934ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6935 using namespace llvm::AMDGPU::MTBUFFormat;
6936
6937 if (!trySkipId("format", AsmToken::Colon))
6938 return ParseStatus::NoMatch;
6939
6940 if (trySkipToken(AsmToken::LBrac)) {
6941 StringRef FormatStr;
6942 SMLoc Loc = getLoc();
6943 if (!parseId(FormatStr, "expected a format string"))
6944 return ParseStatus::Failure;
6945
6946 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6947 if (Res.isNoMatch())
6948 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6949 if (!Res.isSuccess())
6950 return Res;
6951
6952 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6953 return ParseStatus::Failure;
6954
6955 return ParseStatus::Success;
6956 }
6957
6958 return parseNumericFormat(Format);
6959}
6960
6961ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6962 using namespace llvm::AMDGPU::MTBUFFormat;
6963
6964 int64_t Format = getDefaultFormatEncoding(getSTI());
6965 ParseStatus Res;
6966 SMLoc Loc = getLoc();
6967
6968 // Parse legacy format syntax.
6969 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6970 if (Res.isFailure())
6971 return Res;
6972
6973 bool FormatFound = Res.isSuccess();
6974
6975 Operands.push_back(
6976 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6977
6978 if (FormatFound)
6979 trySkipToken(AsmToken::Comma);
6980
6981 if (isToken(AsmToken::EndOfStatement)) {
6982 // We are expecting an soffset operand,
6983 // but let matcher handle the error.
6984 return ParseStatus::Success;
6985 }
6986
6987 // Parse soffset.
6988 Res = parseRegOrImm(Operands);
6989 if (!Res.isSuccess())
6990 return Res;
6991
6992 trySkipToken(AsmToken::Comma);
6993
6994 if (!FormatFound) {
6995 Res = parseSymbolicOrNumericFormat(Format);
6996 if (Res.isFailure())
6997 return Res;
6998 if (Res.isSuccess()) {
6999 auto Size = Operands.size();
7000 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7001 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7002 Op.setImm(Format);
7003 }
7004 return ParseStatus::Success;
7005 }
7006
7007 if (isId("format") && peekToken().is(AsmToken::Colon))
7008 return Error(getLoc(), "duplicate format");
7009 return ParseStatus::Success;
7010}
7011
7012ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7013 ParseStatus Res =
7014 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7015 if (Res.isNoMatch()) {
7016 Res = parseIntWithPrefix("inst_offset", Operands,
7017 AMDGPUOperand::ImmTyInstOffset);
7018 }
7019 return Res;
7020}
7021
7022ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7023 ParseStatus Res =
7024 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7025 if (Res.isNoMatch())
7026 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7027 return Res;
7028}
7029
7030ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7031 ParseStatus Res =
7032 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7033 if (Res.isNoMatch()) {
7034 Res =
7035 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7036 }
7037 return Res;
7038}
7039
7040//===----------------------------------------------------------------------===//
7041// Exp
7042//===----------------------------------------------------------------------===//
7043
7044void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7045 OptionalImmIndexMap OptionalIdx;
7046
7047 unsigned OperandIdx[4];
7048 unsigned EnMask = 0;
7049 int SrcIdx = 0;
7050
7051 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7052 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7053
7054 // Add the register arguments
7055 if (Op.isReg()) {
7056 assert(SrcIdx < 4);
7057 OperandIdx[SrcIdx] = Inst.size();
7058 Op.addRegOperands(Inst, 1);
7059 ++SrcIdx;
7060 continue;
7061 }
7062
7063 if (Op.isOff()) {
7064 assert(SrcIdx < 4);
7065 OperandIdx[SrcIdx] = Inst.size();
7066 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
7067 ++SrcIdx;
7068 continue;
7069 }
7070
7071 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7072 Op.addImmOperands(Inst, 1);
7073 continue;
7074 }
7075
7076 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7077 continue;
7078
7079 // Handle optional arguments
7080 OptionalIdx[Op.getImmTy()] = i;
7081 }
7082
7083 assert(SrcIdx == 4);
7084
7085 bool Compr = false;
7086 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7087 Compr = true;
7088 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7089 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
7090 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
7091 }
7092
7093 for (auto i = 0; i < SrcIdx; ++i) {
7094 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
7095 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7096 }
7097 }
7098
7099 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7100 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7101
7102 Inst.addOperand(MCOperand::createImm(EnMask));
7103}
7104
7105//===----------------------------------------------------------------------===//
7106// s_waitcnt
7107//===----------------------------------------------------------------------===//
7108
7109static bool
7111 const AMDGPU::IsaVersion ISA,
7112 int64_t &IntVal,
7113 int64_t CntVal,
7114 bool Saturate,
7115 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7116 unsigned (*decode)(const IsaVersion &Version, unsigned))
7117{
7118 bool Failed = false;
7119
7120 IntVal = encode(ISA, IntVal, CntVal);
7121 if (CntVal != decode(ISA, IntVal)) {
7122 if (Saturate) {
7123 IntVal = encode(ISA, IntVal, -1);
7124 } else {
7125 Failed = true;
7126 }
7127 }
7128 return Failed;
7129}
7130
7131bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7132
7133 SMLoc CntLoc = getLoc();
7134 StringRef CntName = getTokenStr();
7135
7136 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7137 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7138 return false;
7139
7140 int64_t CntVal;
7141 SMLoc ValLoc = getLoc();
7142 if (!parseExpr(CntVal))
7143 return false;
7144
7146
7147 bool Failed = true;
7148 bool Sat = CntName.ends_with("_sat");
7149
7150 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7151 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7152 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7153 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7154 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7155 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7156 } else {
7157 Error(CntLoc, "invalid counter name " + CntName);
7158 return false;
7159 }
7160
7161 if (Failed) {
7162 Error(ValLoc, "too large value for " + CntName);
7163 return false;
7164 }
7165
7166 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7167 return false;
7168
7169 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7170 if (isToken(AsmToken::EndOfStatement)) {
7171 Error(getLoc(), "expected a counter name");
7172 return false;
7173 }
7174 }
7175
7176 return true;
7177}
7178
7179ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7181 int64_t Waitcnt = getWaitcntBitMask(ISA);
7182 SMLoc S = getLoc();
7183
7184 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7185 while (!isToken(AsmToken::EndOfStatement)) {
7186 if (!parseCnt(Waitcnt))
7187 return ParseStatus::Failure;
7188 }
7189 } else {
7190 if (!parseExpr(Waitcnt))
7191 return ParseStatus::Failure;
7192 }
7193
7194 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7195 return ParseStatus::Success;
7196}
7197
7198bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7199 SMLoc FieldLoc = getLoc();
7200 StringRef FieldName = getTokenStr();
7201 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7202 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7203 return false;
7204
7205 SMLoc ValueLoc = getLoc();
7206 StringRef ValueName = getTokenStr();
7207 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7208 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7209 return false;
7210
7211 unsigned Shift;
7212 if (FieldName == "instid0") {
7213 Shift = 0;
7214 } else if (FieldName == "instskip") {
7215 Shift = 4;
7216 } else if (FieldName == "instid1") {
7217 Shift = 7;
7218 } else {
7219 Error(FieldLoc, "invalid field name " + FieldName);
7220 return false;
7221 }
7222
7223 int Value;
7224 if (Shift == 4) {
7225 // Parse values for instskip.
7227 .Case("SAME", 0)
7228 .Case("NEXT", 1)
7229 .Case("SKIP_1", 2)
7230 .Case("SKIP_2", 3)
7231 .Case("SKIP_3", 4)
7232 .Case("SKIP_4", 5)
7233 .Default(-1);
7234 } else {
7235 // Parse values for instid0 and instid1.
7237 .Case("NO_DEP", 0)
7238 .Case("VALU_DEP_1", 1)
7239 .Case("VALU_DEP_2", 2)
7240 .Case("VALU_DEP_3", 3)
7241 .Case("VALU_DEP_4", 4)
7242 .Case("TRANS32_DEP_1", 5)
7243 .Case("TRANS32_DEP_2", 6)
7244 .Case("TRANS32_DEP_3", 7)
7245 .Case("FMA_ACCUM_CYCLE_1", 8)
7246 .Case("SALU_CYCLE_1", 9)
7247 .Case("SALU_CYCLE_2", 10)
7248 .Case("SALU_CYCLE_3", 11)
7249 .Default(-1);
7250 }
7251 if (Value < 0) {
7252 Error(ValueLoc, "invalid value name " + ValueName);
7253 return false;
7254 }
7255
7256 Delay |= Value << Shift;
7257 return true;
7258}
7259
7260ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7261 int64_t Delay = 0;
7262 SMLoc S = getLoc();
7263
7264 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7265 do {
7266 if (!parseDelay(Delay))
7267 return ParseStatus::Failure;
7268 } while (trySkipToken(AsmToken::Pipe));
7269 } else {
7270 if (!parseExpr(Delay))
7271 return ParseStatus::Failure;
7272 }
7273
7274 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7275 return ParseStatus::Success;
7276}
7277
7278bool
7279AMDGPUOperand::isSWaitCnt() const {
7280 return isImm();
7281}
7282
7283bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7284
7285//===----------------------------------------------------------------------===//
7286// DepCtr
7287//===----------------------------------------------------------------------===//
7288
7289void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7290 StringRef DepCtrName) {
7291 switch (ErrorId) {
7292 case OPR_ID_UNKNOWN:
7293 Error(Loc, Twine("invalid counter name ", DepCtrName));
7294 return;
7295 case OPR_ID_UNSUPPORTED:
7296 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7297 return;
7298 case OPR_ID_DUPLICATE:
7299 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7300 return;
7301 case OPR_VAL_INVALID:
7302 Error(Loc, Twine("invalid value for ", DepCtrName));
7303 return;
7304 default:
7305 assert(false);
7306 }
7307}
7308
7309bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7310
7311 using namespace llvm::AMDGPU::DepCtr;
7312
7313 SMLoc DepCtrLoc = getLoc();
7314 StringRef DepCtrName = getTokenStr();
7315
7316 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7317 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7318 return false;
7319
7320 int64_t ExprVal;
7321 if (!parseExpr(ExprVal))
7322 return false;
7323
7324 unsigned PrevOprMask = UsedOprMask;
7325 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7326
7327 if (CntVal < 0) {
7328 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7329 return false;
7330 }
7331
7332 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7333 return false;
7334
7335 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7336 if (isToken(AsmToken::EndOfStatement)) {
7337 Error(getLoc(), "expected a counter name");
7338 return false;
7339 }
7340 }
7341
7342 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7343 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7344 return true;
7345}
7346
7347ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7348 using namespace llvm::AMDGPU::DepCtr;
7349
7350 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7351 SMLoc Loc = getLoc();
7352
7353 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7354 unsigned UsedOprMask = 0;
7355 while (!isToken(AsmToken::EndOfStatement)) {
7356 if (!parseDepCtr(DepCtr, UsedOprMask))
7357 return ParseStatus::Failure;
7358 }
7359 } else {
7360 if (!parseExpr(DepCtr))
7361 return ParseStatus::Failure;
7362 }
7363
7364 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7365 return ParseStatus::Success;
7366}
7367
7368bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7369
7370//===----------------------------------------------------------------------===//
7371// hwreg
7372//===----------------------------------------------------------------------===//
7373
7374ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7375 OperandInfoTy &Offset,
7376 OperandInfoTy &Width) {
7377 using namespace llvm::AMDGPU::Hwreg;
7378
7379 if (!trySkipId("hwreg", AsmToken::LParen))
7380 return ParseStatus::NoMatch;
7381
7382 // The register may be specified by name or using a numeric code
7383 HwReg.Loc = getLoc();
7384 if (isToken(AsmToken::Identifier) &&
7385 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7386 HwReg.IsSymbolic = true;
7387 lex(); // skip register name
7388 } else if (!parseExpr(HwReg.Val, "a register name")) {
7389 return ParseStatus::Failure;
7390 }
7391
7392 if (trySkipToken(AsmToken::RParen))
7393 return ParseStatus::Success;
7394
7395 // parse optional params
7396 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7397 return ParseStatus::Failure;
7398
7399 Offset.Loc = getLoc();
7400 if (!parseExpr(Offset.Val))
7401 return ParseStatus::Failure;
7402
7403 if (!skipToken(AsmToken::Comma, "expected a comma"))
7404 return ParseStatus::Failure;
7405
7406 Width.Loc = getLoc();
7407 if (!parseExpr(Width.Val) ||
7408 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7409 return ParseStatus::Failure;
7410
7411 return ParseStatus::Success;
7412}
7413
7414ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7415 using namespace llvm::AMDGPU::Hwreg;
7416
7417 int64_t ImmVal = 0;
7418 SMLoc Loc = getLoc();
7419
7420 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
7421 HwregId::Default);
7422 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
7423 HwregOffset::Default);
7424 struct : StructuredOpField {
7425 using StructuredOpField::StructuredOpField;
7426 bool validate(AMDGPUAsmParser &Parser) const override {
7427 if (!isUIntN(Width, Val - 1))
7428 return Error(Parser, "only values from 1 to 32 are legal");
7429 return true;
7430 }
7431 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
7432 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
7433
7434 if (Res.isNoMatch())
7435 Res = parseHwregFunc(HwReg, Offset, Width);
7436
7437 if (Res.isSuccess()) {
7438 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
7439 return ParseStatus::Failure;
7440 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
7441 }
7442
7443 if (Res.isNoMatch() &&
7444 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
7446
7447 if (!Res.isSuccess())
7448 return ParseStatus::Failure;
7449
7450 if (!isUInt<16>(ImmVal))
7451 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7452 Operands.push_back(
7453 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7454 return ParseStatus::Success;
7455}
7456
7457bool AMDGPUOperand::isHwreg() const {
7458 return isImmTy(ImmTyHwreg);
7459}
7460
7461//===----------------------------------------------------------------------===//
7462// sendmsg
7463//===----------------------------------------------------------------------===//
7464
7465bool
7466AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7467 OperandInfoTy &Op,
7468 OperandInfoTy &Stream) {
7469 using namespace llvm::AMDGPU::SendMsg;
7470
7471 Msg.Loc = getLoc();
7472 if (isToken(AsmToken::Identifier) &&
7473 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7474 Msg.IsSymbolic = true;
7475 lex(); // skip message name
7476 } else if (!parseExpr(Msg.Val, "a message name")) {
7477 return false;
7478 }
7479
7480 if (trySkipToken(AsmToken::Comma)) {
7481 Op.IsDefined = true;
7482 Op.Loc = getLoc();
7483 if (isToken(AsmToken::Identifier) &&
7484 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
7486 lex(); // skip operation name
7487 } else if (!parseExpr(Op.Val, "an operation name")) {
7488 return false;
7489 }
7490
7491 if (trySkipToken(AsmToken::Comma)) {
7492 Stream.IsDefined = true;
7493 Stream.Loc = getLoc();
7494 if (!parseExpr(Stream.Val))
7495 return false;
7496 }
7497 }
7498
7499 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7500}
7501
7502bool
7503AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7504 const OperandInfoTy &Op,
7505 const OperandInfoTy &Stream) {
7506 using namespace llvm::AMDGPU::SendMsg;
7507
7508 // Validation strictness depends on whether message is specified
7509 // in a symbolic or in a numeric form. In the latter case
7510 // only encoding possibility is checked.
7511 bool Strict = Msg.IsSymbolic;
7512
7513 if (Strict) {
7514 if (Msg.Val == OPR_ID_UNSUPPORTED) {
7515 Error(Msg.Loc, "specified message id is not supported on this GPU");
7516 return false;
7517 }
7518 } else {
7519 if (!isValidMsgId(Msg.Val, getSTI())) {
7520 Error(Msg.Loc, "invalid message id");
7521 return false;
7522 }
7523 }
7524 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
7525 if (Op.IsDefined) {
7526 Error(Op.Loc, "message does not support operations");
7527 } else {
7528 Error(Msg.Loc, "missing message operation");
7529 }
7530 return false;
7531 }
7532 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
7533 if (Op.Val == OPR_ID_UNSUPPORTED)
7534 Error(Op.Loc, "specified operation id is not supported on this GPU");
7535 else
7536 Error(Op.Loc, "invalid operation id");
7537 return false;
7538 }
7539 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
7540 Stream.IsDefined) {
7541 Error(Stream.Loc, "message operation does not support streams");
7542 return false;
7543 }
7544 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
7545 Error(Stream.Loc, "invalid message stream id");
7546 return false;
7547 }
7548 return true;
7549}
7550
7551ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7552 using namespace llvm::AMDGPU::SendMsg;
7553
7554 int64_t ImmVal = 0;
7555 SMLoc Loc = getLoc();
7556
7557 if (trySkipId("sendmsg", AsmToken::LParen)) {
7558 OperandInfoTy Msg(OPR_ID_UNKNOWN);
7559 OperandInfoTy Op(OP_NONE_);
7560 OperandInfoTy Stream(STREAM_ID_NONE_);
7561 if (parseSendMsgBody(Msg, Op, Stream) &&
7562 validateSendMsg(Msg, Op, Stream)) {
7563 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
7564 } else {
7565 return ParseStatus::Failure;
7566 }
7567 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7568 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7569 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7570 } else {
7571 return ParseStatus::Failure;
7572 }
7573
7574 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7575 return ParseStatus::Success;
7576}
7577
7578bool AMDGPUOperand::isSendMsg() const {
7579 return isImmTy(ImmTySendMsg);
7580}
7581
7582//===----------------------------------------------------------------------===//
7583// v_interp
7584//===----------------------------------------------------------------------===//
7585
7586ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7587 StringRef Str;
7588 SMLoc S = getLoc();
7589
7590 if (!parseId(Str))
7591 return ParseStatus::NoMatch;
7592
7593 int Slot = StringSwitch<int>(Str)
7594 .Case("p10", 0)
7595 .Case("p20", 1)
7596 .Case("p0", 2)
7597 .Default(-1);
7598
7599 if (Slot == -1)
7600 return Error(S, "invalid interpolation slot");
7601
7602 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
7603 AMDGPUOperand::ImmTyInterpSlot));
7604 return ParseStatus::Success;
7605}
7606
7607ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7608 StringRef Str;
7609 SMLoc S = getLoc();
7610
7611 if (!parseId(Str))
7612 return ParseStatus::NoMatch;
7613
7614 if (!Str.starts_with("attr"))
7615 return Error(S, "invalid interpolation attribute");
7616
7617 StringRef Chan = Str.take_back(2);
7618 int AttrChan = StringSwitch<int>(Chan)
7619 .Case(".x", 0)
7620 .Case(".y", 1)
7621 .Case(".z", 2)
7622 .Case(".w", 3)
7623 .Default(-1);
7624 if (AttrChan == -1)
7625 return Error(S, "invalid or missing interpolation attribute channel");
7626
7627 Str = Str.drop_back(2).drop_front(4);
7628
7629 uint8_t Attr;
7630 if (Str.getAsInteger(10, Attr))
7631 return Error(S, "invalid or missing interpolation attribute number");
7632
7633 if (Attr > 32)
7634 return Error(S, "out of bounds interpolation attribute number");
7635
7636 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
7637
7638 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
7639 AMDGPUOperand::ImmTyInterpAttr));
7640 Operands.push_back(AMDGPUOperand::CreateImm(
7641 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7642 return ParseStatus::Success;
7643}
7644
7645//===----------------------------------------------------------------------===//
7646// exp
7647//===----------------------------------------------------------------------===//
7648
7649ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7650 using namespace llvm::AMDGPU::Exp;
7651
7652 StringRef Str;
7653 SMLoc S = getLoc();
7654
7655 if (!parseId(Str))
7656 return ParseStatus::NoMatch;
7657
7658 unsigned Id = getTgtId(Str);
7659 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
7660 return Error(S, (Id == ET_INVALID)
7661 ? "invalid exp target"
7662 : "exp target is not supported on this GPU");
7663
7664 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
7665 AMDGPUOperand::ImmTyExpTgt));
7666 return ParseStatus::Success;
7667}
7668
7669//===----------------------------------------------------------------------===//
7670// parser helpers
7671//===----------------------------------------------------------------------===//
7672
7673bool
7674AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7675 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
7676}
7677
7678bool
7679AMDGPUAsmParser::isId(const StringRef Id) const {
7680 return isId(getToken(), Id);
7681}
7682
7683bool
7684AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7685 return getTokenKind() == Kind;
7686}
7687
7688StringRef AMDGPUAsmParser::getId() const {
7689 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7690}
7691
7692bool
7693AMDGPUAsmParser::trySkipId(const StringRef Id) {
7694 if (isId(Id)) {
7695 lex();
7696 return true;
7697 }
7698 return false;
7699}
7700
7701bool
7702AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7703 if (isToken(AsmToken::Identifier)) {
7704 StringRef Tok = getTokenStr();
7705 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
7706 lex();
7707 return true;
7708 }
7709 }
7710 return false;
7711}
7712
7713bool
7714AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7715 if (isId(Id) && peekToken().is(Kind)) {
7716 lex();
7717 lex();
7718 return true;
7719 }
7720 return false;
7721}
7722
7723bool
7724AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7725 if (isToken(Kind)) {
7726 lex();
7727 return true;
7728 }
7729 return false;
7730}
7731
7732bool
7733AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7734 const StringRef ErrMsg) {
7735 if (!trySkipToken(Kind)) {
7736 Error(getLoc(), ErrMsg);
7737 return false;
7738 }
7739 return true;
7740}
7741
7742bool
7743AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7744 SMLoc S = getLoc();
7745
7746 const MCExpr *Expr;
7747 if (Parser.parseExpression(Expr))
7748 return false;
7749
7750 if (Expr->evaluateAsAbsolute(Imm))
7751 return true;
7752
7753 if (Expected.empty()) {
7754 Error(S, "expected absolute expression");
7755 } else {
7756 Error(S, Twine("expected ", Expected) +
7757 Twine(" or an absolute expression"));
7758 }
7759 return false;
7760}
7761
7762bool
7763AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7764 SMLoc S = getLoc();
7765
7766 const MCExpr *Expr;
7767 if (Parser.parseExpression(Expr))
7768 return false;
7769
7770 int64_t IntVal;
7771 if (Expr->evaluateAsAbsolute(IntVal)) {
7772 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7773 } else {
7774 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7775 }
7776 return true;
7777}
7778
7779bool
7780AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7781 if (isToken(AsmToken::String)) {
7782 Val = getToken().getStringContents();
7783 lex();
7784 return true;
7785 }
7786 Error(getLoc(), ErrMsg);
7787 return false;
7788}
7789
7790bool
7791AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7792 if (isToken(AsmToken::Identifier)) {
7793 Val = getTokenStr();
7794 lex();
7795 return true;
7796 }
7797 if (!ErrMsg.empty())
7798 Error(getLoc(), ErrMsg);
7799 return false;
7800}
7801
7803AMDGPUAsmParser::getToken() const {
7804 return Parser.getTok();
7805}
7806
7807AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7808 return isToken(AsmToken::EndOfStatement)
7809 ? getToken()
7810 : getLexer().peekTok(ShouldSkipSpace);
7811}
7812
7813void
7814AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7815 auto TokCount = getLexer().peekTokens(Tokens);
7816
7817 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7818 Tokens[Idx] = AsmToken(AsmToken::Error, "");
7819}
7820
7822AMDGPUAsmParser::getTokenKind() const {
7823 return getLexer().getKind();
7824}
7825
7826SMLoc
7827AMDGPUAsmParser::getLoc() const {
7828 return getToken().getLoc();
7829}
7830
7832AMDGPUAsmParser::getTokenStr() const {
7833 return getToken().getString();
7834}
7835
7836void
7837AMDGPUAsmParser::lex() {
7838 Parser.Lex();
7839}
7840
7841SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7842 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7843}
7844
7845SMLoc
7846AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7847 const OperandVector &Operands) const {
7848 for (unsigned i = Operands.size() - 1; i > 0; --i) {
7849 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7850 if (Test(Op))
7851 return Op.getStartLoc();
7852 }
7853 return getInstLoc(Operands);
7854}
7855
7856SMLoc
7857AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7858 const OperandVector &Operands) const {
7859 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7860 return getOperandLoc(Test, Operands);
7861}
7862
7863SMLoc
7864AMDGPUAsmParser::getRegLoc(unsigned Reg,
7865 const OperandVector &Operands) const {
7866 auto Test = [=](const AMDGPUOperand& Op) {
7867 return Op.isRegKind() && Op.getReg() == Reg;
7868 };
7869 return getOperandLoc(Test, Operands);
7870}
7871
7872SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7873 bool SearchMandatoryLiterals) const {
7874 auto Test = [](const AMDGPUOperand& Op) {
7875 return Op.IsImmKindLiteral() || Op.isExpr();
7876 };
7877 SMLoc Loc = getOperandLoc(Test, Operands);
7878 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7879 Loc = getMandatoryLitLoc(Operands);
7880 return Loc;
7881}
7882
7883SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7884 auto Test = [](const AMDGPUOperand &Op) {
7885 return Op.IsImmKindMandatoryLiteral();
7886 };
7887 return getOperandLoc(Test, Operands);
7888}
7889
7890SMLoc
7891AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7892 auto Test = [](const AMDGPUOperand& Op) {
7893 return Op.isImmKindConst();
7894 };
7895 return getOperandLoc(Test, Operands);
7896}
7897
7899AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
7900 if (!trySkipToken(AsmToken::LCurly))
7901 return ParseStatus::NoMatch;
7902
7903 bool First = true;
7904 while (!trySkipToken(AsmToken::RCurly)) {
7905 if (!First &&
7906 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
7907 return ParseStatus::Failure;
7908
7909 StringRef Id = getTokenStr();
7910 SMLoc IdLoc = getLoc();
7911 if (!skipToken(AsmToken::Identifier, "field name expected") ||
7912 !skipToken(AsmToken::Colon, "colon expected"))
7913 return ParseStatus::Failure;
7914
7915 auto I =
7916 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
7917 if (I == Fields.end())
7918 return Error(IdLoc, "unknown field");
7919 if ((*I)->IsDefined)
7920 return Error(IdLoc, "duplicate field");
7921
7922 // TODO: Support symbolic values.
7923 (*I)->Loc = getLoc();
7924 if (!parseExpr((*I)->Val))
7925 return ParseStatus::Failure;
7926 (*I)->IsDefined = true;
7927
7928 First = false;
7929 }
7930 return ParseStatus::Success;
7931}
7932
7933bool AMDGPUAsmParser::validateStructuredOpFields(
7935 return all_of(Fields, [this](const StructuredOpField *F) {
7936 return F->validate(*this);
7937 });
7938}
7939
7940//===----------------------------------------------------------------------===//
7941// swizzle
7942//===----------------------------------------------------------------------===//
7943
7945static unsigned
7946encodeBitmaskPerm(const unsigned AndMask,
7947 const unsigned OrMask,
7948 const unsigned XorMask) {
7949 using namespace llvm::AMDGPU::Swizzle;
7950
7951 return BITMASK_PERM_ENC |
7952 (AndMask << BITMASK_AND_SHIFT) |
7953 (OrMask << BITMASK_OR_SHIFT) |
7954 (XorMask << BITMASK_XOR_SHIFT);
7955}
7956
7957bool
7958AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7959 const unsigned MinVal,
7960 const unsigned MaxVal,
7961 const StringRef ErrMsg,
7962 SMLoc &Loc) {
7963 if (!skipToken(AsmToken::Comma, "expected a comma")) {
7964 return false;
7965 }
7966 Loc = getLoc();
7967 if (!parseExpr(Op)) {
7968 return false;
7969 }
7970 if (Op < MinVal || Op > MaxVal) {
7971 Error(Loc, ErrMsg);
7972 return false;
7973 }
7974
7975 return true;
7976}
7977
7978bool
7979AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7980 const unsigned MinVal,
7981 const unsigned MaxVal,
7982 const StringRef ErrMsg) {
7983 SMLoc Loc;
7984 for (unsigned i = 0; i < OpNum; ++i) {
7985 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7986 return false;
7987 }
7988
7989 return true;
7990}
7991
7992bool
7993AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7994 using namespace llvm::AMDGPU::Swizzle;
7995
7996 int64_t Lane[LANE_NUM];
7997 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7998 "expected a 2-bit lane id")) {
8000 for (unsigned I = 0; I < LANE_NUM; ++I) {
8001 Imm |= Lane[I] << (LANE_SHIFT * I);
8002 }
8003 return true;
8004 }
8005 return false;
8006}
8007
8008bool
8009AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8010 using namespace llvm::AMDGPU::Swizzle;
8011
8012 SMLoc Loc;
8013 int64_t GroupSize;
8014 int64_t LaneIdx;
8015
8016 if (!parseSwizzleOperand(GroupSize,
8017 2, 32,
8018 "group size must be in the interval [2,32]",
8019 Loc)) {
8020 return false;
8021 }
8022 if (!isPowerOf2_64(GroupSize)) {
8023 Error(Loc, "group size must be a power of two");
8024 return false;
8025 }
8026 if (parseSwizzleOperand(LaneIdx,
8027 0, GroupSize - 1,
8028 "lane id must be in the interval [0,group size - 1]",
8029 Loc)) {
8030 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8031 return true;
8032 }
8033 return false;
8034}
8035
8036bool
8037AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8038 using namespace llvm::AMDGPU::Swizzle;
8039
8040 SMLoc Loc;
8041 int64_t GroupSize;
8042
8043 if (!parseSwizzleOperand(GroupSize,
8044 2, 32,
8045 "group size must be in the interval [2,32]",
8046 Loc)) {
8047 return false;
8048 }
8049 if (!isPowerOf2_64(GroupSize)) {
8050 Error(Loc, "group size must be a power of two");
8051 return false;
8052 }
8053
8054 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8055 return true;
8056}
8057
8058bool
8059AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8060 using namespace llvm::AMDGPU::Swizzle;
8061
8062 SMLoc Loc;
8063 int64_t GroupSize;
8064
8065 if (!parseSwizzleOperand(GroupSize,
8066 1, 16,
8067 "group size must be in the interval [1,16]",
8068 Loc)) {
8069 return false;
8070 }
8071 if (!isPowerOf2_64(GroupSize)) {
8072 Error(Loc, "group size must be a power of two");
8073 return false;
8074 }
8075
8076 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8077 return true;
8078}
8079
8080bool
8081AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8082 using namespace llvm::AMDGPU::Swizzle;
8083
8084 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8085 return false;
8086 }
8087
8088 StringRef Ctl;
8089 SMLoc StrLoc = getLoc();
8090 if (!parseString(Ctl)) {
8091 return false;
8092 }
8093 if (Ctl.size() != BITMASK_WIDTH) {
8094 Error(StrLoc, "expected a 5-character mask");
8095 return false;
8096 }
8097
8098 unsigned AndMask = 0;
8099 unsigned OrMask = 0;
8100 unsigned XorMask = 0;
8101
8102 for (size_t i = 0; i < Ctl.size(); ++i) {
8103 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8104 switch(Ctl[i]) {
8105 default:
8106 Error(StrLoc, "invalid mask");
8107 return false;
8108 case '0':
8109 break;
8110 case '1':
8111 OrMask |= Mask;
8112 break;
8113 case 'p':
8114 AndMask |= Mask;
8115 break;
8116 case 'i':
8117 AndMask |= Mask;
8118 XorMask |= Mask;
8119 break;
8120 }
8121 }
8122
8123 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8124 return true;
8125}
8126
8127bool
8128AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8129
8130 SMLoc OffsetLoc = getLoc();
8131
8132 if (!parseExpr(Imm, "a swizzle macro")) {
8133 return false;
8134 }
8135 if (!isUInt<16>(Imm)) {
8136 Error(OffsetLoc, "expected a 16-bit offset");
8137 return false;
8138 }
8139 return true;
8140}
8141
8142bool
8143AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8144 using namespace llvm::AMDGPU::Swizzle;
8145
8146 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8147
8148 SMLoc ModeLoc = getLoc();
8149 bool Ok = false;
8150
8151 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8152 Ok = parseSwizzleQuadPerm(Imm);
8153 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8154 Ok = parseSwizzleBitmaskPerm(Imm);
8155 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8156 Ok = parseSwizzleBroadcast(Imm);
8157 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8158 Ok = parseSwizzleSwap(Imm);
8159 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8160 Ok = parseSwizzleReverse(Imm);
8161 } else {
8162 Error(ModeLoc, "expected a swizzle mode");
8163 }
8164
8165 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8166 }
8167
8168 return false;
8169}
8170
8171ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8172 SMLoc S = getLoc();
8173 int64_t Imm = 0;
8174
8175 if (trySkipId("offset")) {
8176
8177 bool Ok = false;
8178 if (skipToken(AsmToken::Colon, "expected a colon")) {
8179 if (trySkipId("swizzle")) {
8180 Ok = parseSwizzleMacro(Imm);
8181 } else {
8182 Ok = parseSwizzleOffset(Imm);
8183 }
8184 }
8185
8186 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8187
8189 }
8190 return ParseStatus::NoMatch;
8191}
8192
8193bool
8194AMDGPUOperand::isSwizzle() const {
8195 return isImmTy(ImmTySwizzle);
8196}
8197
8198//===----------------------------------------------------------------------===//
8199// VGPR Index Mode
8200//===----------------------------------------------------------------------===//
8201
8202int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8203
8204 using namespace llvm::AMDGPU::VGPRIndexMode;
8205
8206 if (trySkipToken(AsmToken::RParen)) {
8207 return OFF;
8208 }
8209
8210 int64_t Imm = 0;
8211
8212 while (true) {
8213 unsigned Mode = 0;
8214 SMLoc S = getLoc();
8215
8216 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8217 if (trySkipId(IdSymbolic[ModeId])) {
8218 Mode = 1 << ModeId;
8219 break;
8220 }
8221 }
8222
8223 if (Mode == 0) {
8224 Error(S, (Imm == 0)?
8225 "expected a VGPR index mode or a closing parenthesis" :
8226 "expected a VGPR index mode");
8227 return UNDEF;
8228 }
8229
8230 if (Imm & Mode) {
8231 Error(S, "duplicate VGPR index mode");
8232 return UNDEF;
8233 }
8234 Imm |= Mode;
8235
8236 if (trySkipToken(AsmToken::RParen))
8237 break;
8238 if (!skipToken(AsmToken::Comma,
8239 "expected a comma or a closing parenthesis"))
8240 return UNDEF;
8241 }
8242
8243 return Imm;
8244}
8245
8246ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8247
8248 using namespace llvm::AMDGPU::VGPRIndexMode;
8249
8250 int64_t Imm = 0;
8251 SMLoc S = getLoc();
8252
8253 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8254 Imm = parseGPRIdxMacro();
8255 if (Imm == UNDEF)
8256 return ParseStatus::Failure;
8257 } else {
8258 if (getParser().parseAbsoluteExpression(Imm))
8259 return ParseStatus::Failure;
8260 if (Imm < 0 || !isUInt<4>(Imm))
8261 return Error(S, "invalid immediate: only 4-bit values are legal");
8262 }
8263
8264 Operands.push_back(
8265 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8266 return ParseStatus::Success;
8267}
8268
8269bool AMDGPUOperand::isGPRIdxMode() const {
8270 return isImmTy(ImmTyGprIdxMode);
8271}
8272
8273//===----------------------------------------------------------------------===//
8274// sopp branch targets
8275//===----------------------------------------------------------------------===//
8276
8277ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8278
8279 // Make sure we are not parsing something
8280 // that looks like a label or an expression but is not.
8281 // This will improve error messages.
8282 if (isRegister() || isModifier())
8283 return ParseStatus::NoMatch;
8284
8285 if (!parseExpr(Operands))
8286 return ParseStatus::Failure;
8287
8288 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8289 assert(Opr.isImm() || Opr.isExpr());
8290 SMLoc Loc = Opr.getStartLoc();
8291
8292 // Currently we do not support arbitrary expressions as branch targets.
8293 // Only labels and absolute expressions are accepted.
8294 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8295 Error(Loc, "expected an absolute expression or a label");
8296 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8297 Error(Loc, "expected a 16-bit signed jump offset");
8298 }
8299
8300 return ParseStatus::Success;
8301}
8302
8303//===----------------------------------------------------------------------===//
8304// Boolean holding registers
8305//===----------------------------------------------------------------------===//
8306
8307ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8308 return parseReg(Operands);
8309}
8310
8311//===----------------------------------------------------------------------===//
8312// mubuf
8313//===----------------------------------------------------------------------===//
8314
8315void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8316 const OperandVector &Operands,
8317 bool IsAtomic) {
8318 OptionalImmIndexMap OptionalIdx;
8319 unsigned FirstOperandIdx = 1;
8320 bool IsAtomicReturn = false;
8321
8322 if (IsAtomic) {
8323 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
8325 }
8326
8327 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8328 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8329
8330 // Add the register arguments
8331 if (Op.isReg()) {
8332 Op.addRegOperands(Inst, 1);
8333 // Insert a tied src for atomic return dst.
8334 // This cannot be postponed as subsequent calls to
8335 // addImmOperands rely on correct number of MC operands.
8336 if (IsAtomicReturn && i == FirstOperandIdx)
8337 Op.addRegOperands(Inst, 1);
8338 continue;
8339 }
8340
8341 // Handle the case where soffset is an immediate
8342 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8343 Op.addImmOperands(Inst, 1);
8344 continue;
8345 }
8346
8347 // Handle tokens like 'offen' which are sometimes hard-coded into the
8348 // asm string. There are no MCInst operands for these.
8349 if (Op.isToken()) {
8350 continue;
8351 }
8352 assert(Op.isImm());
8353
8354 // Handle optional arguments
8355 OptionalIdx[Op.getImmTy()] = i;
8356 }
8357
8358 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8359 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8360}
8361
8362//===----------------------------------------------------------------------===//
8363// smrd
8364//===----------------------------------------------------------------------===//
8365
8366bool AMDGPUOperand::isSMRDOffset8() const {
8367 return isImmLiteral() && isUInt<8>(getImm());
8368}
8369
8370bool AMDGPUOperand::isSMEMOffset() const {
8371 // Offset range is checked later by validator.
8372 return isImmLiteral();
8373}
8374
8375bool AMDGPUOperand::isSMRDLiteralOffset() const {
8376 // 32-bit literals are only supported on CI and we only want to use them
8377 // when the offset is > 8-bits.
8378 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8379}
8380
8381//===----------------------------------------------------------------------===//
8382// vop3
8383//===----------------------------------------------------------------------===//
8384
8385static bool ConvertOmodMul(int64_t &Mul) {
8386 if (Mul != 1 && Mul != 2 && Mul != 4)
8387 return false;
8388
8389 Mul >>= 1;
8390 return true;
8391}
8392
8393static bool ConvertOmodDiv(int64_t &Div) {
8394 if (Div == 1) {
8395 Div = 0;
8396 return true;
8397 }
8398
8399 if (Div == 2) {
8400 Div = 3;
8401 return true;
8402 }
8403
8404 return false;
8405}
8406
8407// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8408// This is intentional and ensures compatibility with sp3.
8409// See bug 35397 for details.
8410bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8411 if (BoundCtrl == 0 || BoundCtrl == 1) {
8412 if (!isGFX11Plus())
8413 BoundCtrl = 1;
8414 return true;
8415 }
8416 return false;
8417}
8418
8419void AMDGPUAsmParser::onBeginOfFile() {
8420 if (!getParser().getStreamer().getTargetStreamer() ||
8421 getSTI().getTargetTriple().getArch() == Triple::r600)
8422 return;
8423
8424 if (!getTargetStreamer().getTargetID())
8425 getTargetStreamer().initializeTargetID(getSTI(),
8426 getSTI().getFeatureString());
8427
8428 if (isHsaAbi(getSTI()))
8429 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8430}
8431
8432/// Parse AMDGPU specific expressions.
8433///
8434/// expr ::= or(expr, ...) |
8435/// max(expr, ...)
8436///
8437bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8438 using AGVK = AMDGPUMCExpr::VariantKind;
8439
8440 if (isToken(AsmToken::Identifier)) {
8441 StringRef TokenId = getTokenStr();
8442 AGVK VK = StringSwitch<AGVK>(TokenId)
8443 .Case("max", AGVK::AGVK_Max)
8444 .Case("or", AGVK::AGVK_Or)
8445 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
8446 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
8447 .Case("alignto", AGVK::AGVK_AlignTo)
8448 .Case("occupancy", AGVK::AGVK_Occupancy)
8449 .Default(AGVK::AGVK_None);
8450
8451 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
8453 uint64_t CommaCount = 0;
8454 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
8455 lex(); // Eat '('
8456 while (true) {
8457 if (trySkipToken(AsmToken::RParen)) {
8458 if (Exprs.empty()) {
8459 Error(getToken().getLoc(),
8460 "empty " + Twine(TokenId) + " expression");
8461 return true;
8462 }
8463 if (CommaCount + 1 != Exprs.size()) {
8464 Error(getToken().getLoc(),
8465 "mismatch of commas in " + Twine(TokenId) + " expression");
8466 return true;
8467 }
8468 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
8469 return false;
8470 }
8471 const MCExpr *Expr;
8472 if (getParser().parseExpression(Expr, EndLoc))
8473 return true;
8474 Exprs.push_back(Expr);
8475 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
8476 if (LastTokenWasComma)
8477 CommaCount++;
8478 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
8479 Error(getToken().getLoc(),
8480 "unexpected token in " + Twine(TokenId) + " expression");
8481 return true;
8482 }
8483 }
8484 }
8485 }
8486 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
8487}
8488
8489ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8490 StringRef Name = getTokenStr();
8491 if (Name == "mul") {
8492 return parseIntWithPrefix("mul", Operands,
8493 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8494 }
8495
8496 if (Name == "div") {
8497 return parseIntWithPrefix("div", Operands,
8498 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8499 }
8500
8501 return ParseStatus::NoMatch;
8502}
8503
8504// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8505// the number of src operands present, then copies that bit into src0_modifiers.
8506static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
8507 int Opc = Inst.getOpcode();
8508 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8509 if (OpSelIdx == -1)
8510 return;
8511
8512 int SrcNum;
8513 const int Ops[] = { AMDGPU::OpName::src0,
8514 AMDGPU::OpName::src1,
8515 AMDGPU::OpName::src2 };
8516 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8517 ++SrcNum)
8518 ;
8519 assert(SrcNum > 0);
8520
8521 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8522
8523 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
8524 if (DstIdx == -1)
8525 return;
8526
8527 const MCOperand &DstOp = Inst.getOperand(DstIdx);
8528 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8529 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8530 if (DstOp.isReg() &&
8531 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
8532 if (AMDGPU::isHi(DstOp.getReg(), MRI))
8533 ModVal |= SISrcMods::DST_OP_SEL;
8534 } else {
8535 if ((OpSel & (1 << SrcNum)) != 0)
8536 ModVal |= SISrcMods::DST_OP_SEL;
8537 }
8538 Inst.getOperand(ModIdx).setImm(ModVal);
8539}
8540
8541void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8542 const OperandVector &Operands) {
8543 cvtVOP3P(Inst, Operands);
8544 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8545}
8546
8547void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8548 OptionalImmIndexMap &OptionalIdx) {
8549 cvtVOP3P(Inst, Operands, OptionalIdx);
8550 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8551}
8552
8553static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8554 return
8555 // 1. This operand is input modifiers
8556 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8557 // 2. This is not last operand
8558 && Desc.NumOperands > (OpNum + 1)
8559 // 3. Next operand is register class
8560 && Desc.operands()[OpNum + 1].RegClass != -1
8561 // 4. Next register is not tied to any other operand
8562 && Desc.getOperandConstraint(OpNum + 1,
8563 MCOI::OperandConstraint::TIED_TO) == -1;
8564}
8565
8566void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8567{
8568 OptionalImmIndexMap OptionalIdx;
8569 unsigned Opc = Inst.getOpcode();
8570
8571 unsigned I = 1;
8572 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8573 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8574 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8575 }
8576
8577 for (unsigned E = Operands.size(); I != E; ++I) {
8578 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8580 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8581 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
8582 Op.isInterpAttrChan()) {
8583 Inst.addOperand(MCOperand::createImm(Op.getImm()));
8584 } else if (Op.isImmModifier()) {
8585 OptionalIdx[Op.getImmTy()] = I;
8586 } else {
8587 llvm_unreachable("unhandled operand type");
8588 }
8589 }
8590
8591 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
8592 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8593 AMDGPUOperand::ImmTyHigh);
8594
8595 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8596 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8597 AMDGPUOperand::ImmTyClamp);
8598
8599 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8600 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8601 AMDGPUOperand::ImmTyOModSI);
8602}
8603
8604void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8605{
8606 OptionalImmIndexMap OptionalIdx;
8607 unsigned Opc = Inst.getOpcode();
8608
8609 unsigned I = 1;
8610 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8611 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8612 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8613 }
8614
8615 for (unsigned E = Operands.size(); I != E; ++I) {
8616 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8618 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8619 } else if (Op.isImmModifier()) {
8620 OptionalIdx[Op.getImmTy()] = I;
8621 } else {
8622 llvm_unreachable("unhandled operand type");
8623 }
8624 }
8625
8626 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
8627
8628 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8629 if (OpSelIdx != -1)
8630 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8631
8632 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8633
8634 if (OpSelIdx == -1)
8635 return;
8636
8637 const int Ops[] = { AMDGPU::OpName::src0,
8638 AMDGPU::OpName::src1,
8639 AMDGPU::OpName::src2 };
8640 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8641 AMDGPU::OpName::src1_modifiers,
8642 AMDGPU::OpName::src2_modifiers };
8643
8644 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8645
8646 for (int J = 0; J < 3; ++J) {
8647 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8648 if (OpIdx == -1)
8649 break;
8650
8651 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8652 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8653
8654 if ((OpSel & (1 << J)) != 0)
8655 ModVal |= SISrcMods::OP_SEL_0;
8656 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8657 (OpSel & (1 << 3)) != 0)
8658 ModVal |= SISrcMods::DST_OP_SEL;
8659
8660 Inst.getOperand(ModIdx).setImm(ModVal);
8661 }
8662}
8663
8664void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8665 OptionalImmIndexMap &OptionalIdx) {
8666 unsigned Opc = Inst.getOpcode();
8667
8668 unsigned I = 1;
8669 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8670 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8671 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8672 }
8673
8674 for (unsigned E = Operands.size(); I != E; ++I) {
8675 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8677 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8678 } else if (Op.isImmModifier()) {
8679 OptionalIdx[Op.getImmTy()] = I;
8680 } else {
8681 Op.addRegOrImmOperands(Inst, 1);
8682 }
8683 }
8684
8685 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
8686 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
8687 Inst.addOperand(Inst.getOperand(0));
8688 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8689 AMDGPUOperand::ImmTyByteSel);
8690 }
8691
8692 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8693 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8694 AMDGPUOperand::ImmTyClamp);
8695
8696 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8697 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8698 AMDGPUOperand::ImmTyOModSI);
8699
8700 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8701 // it has src2 register operand that is tied to dst operand
8702 // we don't allow modifiers for this operand in assembler so src2_modifiers
8703 // should be 0.
8704 if (isMAC(Opc)) {
8705 auto it = Inst.begin();
8706 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8707 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8708 ++it;
8709 // Copy the operand to ensure it's not invalidated when Inst grows.
8710 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8711 }
8712}
8713
8714void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8715 OptionalImmIndexMap OptionalIdx;
8716 cvtVOP3(Inst, Operands, OptionalIdx);
8717}
8718
8719void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8720 OptionalImmIndexMap &OptIdx) {
8721 const int Opc = Inst.getOpcode();
8722 const MCInstrDesc &Desc = MII.get(Opc);
8723
8724 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8725
8726 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8727 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8728 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
8729 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
8730 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8731 Inst.addOperand(Inst.getOperand(0));
8732 }
8733
8734 // Adding vdst_in operand is already covered for these DPP instructions in
8735 // cvtVOP3DPP.
8736 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
8737 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8738 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8739 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8740 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 ||
8741 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
8742 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
8743 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
8744 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
8745 assert(!IsPacked);
8746 Inst.addOperand(Inst.getOperand(0));
8747 }
8748
8749 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8750 // instruction, and then figure out where to actually put the modifiers
8751
8752 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8753 if (OpSelIdx != -1) {
8754 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8755 }
8756
8757 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8758 if (OpSelHiIdx != -1) {
8759 int DefaultVal = IsPacked ? -1 : 0;
8760 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8761 DefaultVal);
8762 }
8763
8764 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8765 if (NegLoIdx != -1)
8766 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8767
8768 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8769 if (NegHiIdx != -1)
8770 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8771
8772 const int Ops[] = { AMDGPU::OpName::src0,
8773 AMDGPU::OpName::src1,
8774 AMDGPU::OpName::src2 };
8775 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8776 AMDGPU::OpName::src1_modifiers,
8777 AMDGPU::OpName::src2_modifiers };
8778
8779 unsigned OpSel = 0;
8780 unsigned OpSelHi = 0;
8781 unsigned NegLo = 0;
8782 unsigned NegHi = 0;
8783
8784 if (OpSelIdx != -1)
8785 OpSel = Inst.getOperand(OpSelIdx).getImm();
8786
8787 if (OpSelHiIdx != -1)
8788 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8789
8790 if (NegLoIdx != -1)
8791 NegLo = Inst.getOperand(NegLoIdx).getImm();
8792
8793 if (NegHiIdx != -1)
8794 NegHi = Inst.getOperand(NegHiIdx).getImm();
8795
8796 for (int J = 0; J < 3; ++J) {
8797 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8798 if (OpIdx == -1)
8799 break;
8800
8801 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8802
8803 if (ModIdx == -1)
8804 continue;
8805
8806 uint32_t ModVal = 0;
8807
8808 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
8809 if (SrcOp.isReg() && getMRI()
8810 ->getRegClass(AMDGPU::VGPR_16RegClassID)
8811 .contains(SrcOp.getReg())) {
8812 bool VGPRSuffixIsHi = AMDGPU::isHi(SrcOp.getReg(), *getMRI());
8813 if (VGPRSuffixIsHi)
8814 ModVal |= SISrcMods::OP_SEL_0;
8815 } else {
8816 if ((OpSel & (1 << J)) != 0)
8817 ModVal |= SISrcMods::OP_SEL_0;
8818 }
8819
8820 if ((OpSelHi & (1 << J)) != 0)
8821 ModVal |= SISrcMods::OP_SEL_1;
8822
8823 if ((NegLo & (1 << J)) != 0)
8824 ModVal |= SISrcMods::NEG;
8825
8826 if ((NegHi & (1 << J)) != 0)
8827 ModVal |= SISrcMods::NEG_HI;
8828
8829 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8830 }
8831}
8832
8833void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8834 OptionalImmIndexMap OptIdx;
8835 cvtVOP3(Inst, Operands, OptIdx);
8836 cvtVOP3P(Inst, Operands, OptIdx);
8837}
8838
8840 unsigned i, unsigned Opc, unsigned OpName) {
8841 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
8842 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8843 else
8844 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
8845}
8846
8847void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
8848 unsigned Opc = Inst.getOpcode();
8849
8850 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
8851 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
8852 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
8853 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
8854 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
8855
8856 OptionalImmIndexMap OptIdx;
8857 for (unsigned i = 5; i < Operands.size(); ++i) {
8858 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8859 OptIdx[Op.getImmTy()] = i;
8860 }
8861
8862 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
8863 addOptionalImmOperand(Inst, Operands, OptIdx,
8864 AMDGPUOperand::ImmTyIndexKey8bit);
8865
8866 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
8867 addOptionalImmOperand(Inst, Operands, OptIdx,
8868 AMDGPUOperand::ImmTyIndexKey16bit);
8869
8870 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8871 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
8872
8873 cvtVOP3P(Inst, Operands, OptIdx);
8874}
8875
8876//===----------------------------------------------------------------------===//
8877// VOPD
8878//===----------------------------------------------------------------------===//
8879
8880ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8881 if (!hasVOPD(getSTI()))
8882 return ParseStatus::NoMatch;
8883
8884 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8885 SMLoc S = getLoc();
8886 lex();
8887 lex();
8888 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8889 SMLoc OpYLoc = getLoc();
8890 StringRef OpYName;
8891 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8892 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8893 return ParseStatus::Success;
8894 }
8895 return Error(OpYLoc, "expected a VOPDY instruction after ::");
8896 }
8897 return ParseStatus::NoMatch;
8898}
8899
8900// Create VOPD MCInst operands using parsed assembler operands.
8901void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8902 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8903 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8904 if (Op.isReg()) {
8905 Op.addRegOperands(Inst, 1);
8906 return;
8907 }
8908 if (Op.isImm()) {
8909 Op.addImmOperands(Inst, 1);
8910 return;
8911 }
8912 llvm_unreachable("Unhandled operand type in cvtVOPD");
8913 };
8914
8915 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8916
8917 // MCInst operands are ordered as follows:
8918 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8919
8920 for (auto CompIdx : VOPD::COMPONENTS) {
8921 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8922 }
8923
8924 for (auto CompIdx : VOPD::COMPONENTS) {
8925 const auto &CInfo = InstInfo[CompIdx];
8926 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8927 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8928 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8929 if (CInfo.hasSrc2Acc())
8930 addOp(CInfo.getIndexOfDstInParsedOperands());
8931 }
8932}
8933
8934//===----------------------------------------------------------------------===//
8935// dpp
8936//===----------------------------------------------------------------------===//
8937
8938bool AMDGPUOperand::isDPP8() const {
8939 return isImmTy(ImmTyDPP8);
8940}
8941
8942bool AMDGPUOperand::isDPPCtrl() const {
8943 using namespace AMDGPU::DPP;
8944
8945 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8946 if (result) {
8947 int64_t Imm = getImm();
8948 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8949 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8950 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8951 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8952 (Imm == DppCtrl::WAVE_SHL1) ||
8953 (Imm == DppCtrl::WAVE_ROL1) ||
8954 (Imm == DppCtrl::WAVE_SHR1) ||
8955 (Imm == DppCtrl::WAVE_ROR1) ||
8956 (Imm == DppCtrl::ROW_MIRROR) ||
8957 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8958 (Imm == DppCtrl::BCAST15) ||
8959 (Imm == DppCtrl::BCAST31) ||
8960 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8961 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8962 }
8963 return false;
8964}
8965
8966//===----------------------------------------------------------------------===//
8967// mAI
8968//===----------------------------------------------------------------------===//
8969
8970bool AMDGPUOperand::isBLGP() const {
8971 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8972}
8973
8974bool AMDGPUOperand::isS16Imm() const {
8975 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8976}
8977
8978bool AMDGPUOperand::isU16Imm() const {
8979 return isImmLiteral() && isUInt<16>(getImm());
8980}
8981
8982//===----------------------------------------------------------------------===//
8983// dim
8984//===----------------------------------------------------------------------===//
8985
8986bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8987 // We want to allow "dim:1D" etc.,
8988 // but the initial 1 is tokenized as an integer.
8989 std::string Token;
8990 if (isToken(AsmToken::Integer)) {
8991 SMLoc Loc = getToken().getEndLoc();
8992 Token = std::string(getTokenStr());
8993 lex();
8994 if (getLoc() != Loc)
8995 return false;
8996 }
8997
8998 StringRef Suffix;
8999 if (!parseId(Suffix))
9000 return false;
9001 Token += Suffix;
9002
9003 StringRef DimId = Token;
9004 if (DimId.starts_with("SQ_RSRC_IMG_"))
9005 DimId = DimId.drop_front(12);
9006
9008 if (!DimInfo)
9009 return false;
9010
9011 Encoding = DimInfo->Encoding;
9012 return true;
9013}
9014
9015ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9016 if (!isGFX10Plus())
9017 return ParseStatus::NoMatch;
9018
9019 SMLoc S = getLoc();
9020
9021 if (!trySkipId("dim", AsmToken::Colon))
9022 return ParseStatus::NoMatch;
9023
9024 unsigned Encoding;
9025 SMLoc Loc = getLoc();
9026 if (!parseDimId(Encoding))
9027 return Error(Loc, "invalid dim value");
9028
9029 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9030 AMDGPUOperand::ImmTyDim));
9031 return ParseStatus::Success;
9032}
9033
9034//===----------------------------------------------------------------------===//
9035// dpp
9036//===----------------------------------------------------------------------===//
9037
9038ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9039 SMLoc S = getLoc();
9040
9041 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9042 return ParseStatus::NoMatch;
9043
9044 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9045
9046 int64_t Sels[8];
9047
9048 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9049 return ParseStatus::Failure;
9050
9051 for (size_t i = 0; i < 8; ++i) {
9052 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9053 return ParseStatus::Failure;
9054
9055 SMLoc Loc = getLoc();
9056 if (getParser().parseAbsoluteExpression(Sels[i]))
9057 return ParseStatus::Failure;
9058 if (0 > Sels[i] || 7 < Sels[i])
9059 return Error(Loc, "expected a 3-bit value");
9060 }
9061
9062 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9063 return ParseStatus::Failure;
9064
9065 unsigned DPP8 = 0;
9066 for (size_t i = 0; i < 8; ++i)
9067 DPP8 |= (Sels[i] << (i * 3));
9068
9069 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9070 return ParseStatus::Success;
9071}
9072
9073bool
9074AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9075 const OperandVector &Operands) {
9076 if (Ctrl == "row_newbcast")
9077 return isGFX90A();
9078
9079 if (Ctrl == "row_share" ||
9080 Ctrl == "row_xmask")
9081 return isGFX10Plus();
9082
9083 if (Ctrl == "wave_shl" ||
9084 Ctrl == "wave_shr" ||
9085 Ctrl == "wave_rol" ||
9086 Ctrl == "wave_ror" ||
9087 Ctrl == "row_bcast")
9088 return isVI() || isGFX9();
9089
9090 return Ctrl == "row_mirror" ||
9091 Ctrl == "row_half_mirror" ||
9092 Ctrl == "quad_perm" ||
9093 Ctrl == "row_shl" ||
9094 Ctrl == "row_shr" ||
9095 Ctrl == "row_ror";
9096}
9097
9098int64_t
9099AMDGPUAsmParser::parseDPPCtrlPerm() {
9100 // quad_perm:[%d,%d,%d,%d]
9101
9102 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9103 return -1;
9104
9105 int64_t Val = 0;
9106 for (int i = 0; i < 4; ++i) {
9107 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9108 return -1;
9109
9110 int64_t Temp;
9111 SMLoc Loc = getLoc();
9112 if (getParser().parseAbsoluteExpression(Temp))
9113 return -1;
9114 if (Temp < 0 || Temp > 3) {
9115 Error(Loc, "expected a 2-bit value");
9116 return -1;
9117 }
9118
9119 Val += (Temp << i * 2);
9120 }
9121
9122 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9123 return -1;
9124
9125 return Val;
9126}
9127
9128int64_t
9129AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9130 using namespace AMDGPU::DPP;
9131
9132 // sel:%d
9133
9134 int64_t Val;
9135 SMLoc Loc = getLoc();
9136
9137 if (getParser().parseAbsoluteExpression(Val))
9138 return -1;
9139
9140 struct DppCtrlCheck {
9141 int64_t Ctrl;
9142 int Lo;
9143 int Hi;
9144 };
9145
9146 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9147 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9148 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9149 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9150 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9151 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9152 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9153 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9154 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9155 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9156 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9157 .Default({-1, 0, 0});
9158
9159 bool Valid;
9160 if (Check.Ctrl == -1) {
9161 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
9162 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9163 } else {
9164 Valid = Check.Lo <= Val && Val <= Check.Hi;
9165 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
9166 }
9167
9168 if (!Valid) {
9169 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
9170 return -1;
9171 }
9172
9173 return Val;
9174}
9175
9176ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
9177 using namespace AMDGPU::DPP;
9178
9179 if (!isToken(AsmToken::Identifier) ||
9180 !isSupportedDPPCtrl(getTokenStr(), Operands))
9181 return ParseStatus::NoMatch;
9182
9183 SMLoc S = getLoc();
9184 int64_t Val = -1;
9186
9187 parseId(Ctrl);
9188
9189 if (Ctrl == "row_mirror") {
9190 Val = DppCtrl::ROW_MIRROR;
9191 } else if (Ctrl == "row_half_mirror") {
9192 Val = DppCtrl::ROW_HALF_MIRROR;
9193 } else {
9194 if (skipToken(AsmToken::Colon, "expected a colon")) {
9195 if (Ctrl == "quad_perm") {
9196 Val = parseDPPCtrlPerm();
9197 } else {
9198 Val = parseDPPCtrlSel(Ctrl);
9199 }
9200 }
9201 }
9202
9203 if (Val == -1)
9204 return ParseStatus::Failure;
9205
9206 Operands.push_back(
9207 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9208 return ParseStatus::Success;
9209}
9210
9211void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
9212 bool IsDPP8) {
9213 OptionalImmIndexMap OptionalIdx;
9214 unsigned Opc = Inst.getOpcode();
9215 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9216
9217 // MAC instructions are special because they have 'old'
9218 // operand which is not tied to dst (but assumed to be).
9219 // They also have dummy unused src2_modifiers.
9220 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
9221 int Src2ModIdx =
9222 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
9223 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9224 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
9225
9226 unsigned I = 1;
9227 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9228 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9229 }
9230
9231 int Fi = 0;
9232 for (unsigned E = Operands.size(); I != E; ++I) {
9233
9234 if (IsMAC) {
9235 int NumOperands = Inst.getNumOperands();
9236 if (OldIdx == NumOperands) {
9237 // Handle old operand
9238 constexpr int DST_IDX = 0;
9239 Inst.addOperand(Inst.getOperand(DST_IDX));
9240 } else if (Src2ModIdx == NumOperands) {
9241 // Add unused dummy src2_modifiers
9243 }
9244 }
9245
9246 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9247 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
9248 Inst.addOperand(Inst.getOperand(0));
9249 }
9250
9251 bool IsVOP3CvtSrDpp =
9252 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9253 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9254 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9255 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9256 if (IsVOP3CvtSrDpp) {
9257 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
9260 }
9261 }
9262
9263 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9265 if (TiedTo != -1) {
9266 assert((unsigned)TiedTo < Inst.getNumOperands());
9267 // handle tied old or src2 for MAC instructions
9268 Inst.addOperand(Inst.getOperand(TiedTo));
9269 }
9270 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9271 // Add the register arguments
9272 if (IsDPP8 && Op.isDppFI()) {
9273 Fi = Op.getImm();
9274 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9275 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9276 } else if (Op.isReg()) {
9277 Op.addRegOperands(Inst, 1);
9278 } else if (Op.isImm() &&
9279 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
9280 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
9281 Op.addImmOperands(Inst, 1);
9282 } else if (Op.isImm()) {
9283 OptionalIdx[Op.getImmTy()] = I;
9284 } else {
9285 llvm_unreachable("unhandled operand type");
9286 }
9287 }
9288
9289 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel))
9290 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9291 AMDGPUOperand::ImmTyByteSel);
9292
9293 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9294 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9295 AMDGPUOperand::ImmTyClamp);
9296
9297 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9298 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
9299
9300 if (Desc.TSFlags & SIInstrFlags::VOP3P)
9301 cvtVOP3P(Inst, Operands, OptionalIdx);
9302 else if (Desc.TSFlags & SIInstrFlags::VOP3)
9303 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
9304 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9305 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9306 }
9307
9308 if (IsDPP8) {
9309 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
9310 using namespace llvm::AMDGPU::DPP;
9311 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9312 } else {
9313 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
9314 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9315 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9316 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9317
9318 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
9319 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9320 AMDGPUOperand::ImmTyDppFI);
9321 }
9322}
9323
9324void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
9325 OptionalImmIndexMap OptionalIdx;
9326
9327 unsigned I = 1;
9328 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9329 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9330 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9331 }
9332
9333 int Fi = 0;
9334 for (unsigned E = Operands.size(); I != E; ++I) {
9335 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9337 if (TiedTo != -1) {
9338 assert((unsigned)TiedTo < Inst.getNumOperands());
9339 // handle tied old or src2 for MAC instructions
9340 Inst.addOperand(Inst.getOperand(TiedTo));
9341 }
9342 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9343 // Add the register arguments
9344 if (Op.isReg() && validateVccOperand(Op.getReg())) {
9345 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
9346 // Skip it.
9347 continue;
9348 }
9349
9350 if (IsDPP8) {
9351 if (Op.isDPP8()) {
9352 Op.addImmOperands(Inst, 1);
9353 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9354 Op.addRegWithFPInputModsOperands(Inst, 2);
9355 } else if (Op.isDppFI()) {
9356 Fi = Op.getImm();
9357 } else if (Op.isReg()) {
9358 Op.addRegOperands(Inst, 1);
9359 } else {
9360 llvm_unreachable("Invalid operand type");
9361 }
9362 } else {
9364 Op.addRegWithFPInputModsOperands(Inst, 2);
9365 } else if (Op.isReg()) {
9366 Op.addRegOperands(Inst, 1);
9367 } else if (Op.isDPPCtrl()) {
9368 Op.addImmOperands(Inst, 1);
9369 } else if (Op.isImm()) {
9370 // Handle optional arguments
9371 OptionalIdx[Op.getImmTy()] = I;
9372 } else {
9373 llvm_unreachable("Invalid operand type");
9374 }
9375 }
9376 }
9377
9378 if (IsDPP8) {
9379 using namespace llvm::AMDGPU::DPP;
9380 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9381 } else {
9382 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9383 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9384 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9385 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
9386 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9387 AMDGPUOperand::ImmTyDppFI);
9388 }
9389 }
9390}
9391
9392//===----------------------------------------------------------------------===//
9393// sdwa
9394//===----------------------------------------------------------------------===//
9395
9396ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9397 StringRef Prefix,
9398 AMDGPUOperand::ImmTy Type) {
9399 using namespace llvm::AMDGPU::SDWA;
9400
9401 SMLoc S = getLoc();
9403
9404 SMLoc StringLoc;
9405 ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
9406 if (!Res.isSuccess())
9407 return Res;
9408
9409 int64_t Int;
9411 .Case("BYTE_0", SdwaSel::BYTE_0)
9412 .Case("BYTE_1", SdwaSel::BYTE_1)
9413 .Case("BYTE_2", SdwaSel::BYTE_2)
9414 .Case("BYTE_3", SdwaSel::BYTE_3)
9415 .Case("WORD_0", SdwaSel::WORD_0)
9416 .Case("WORD_1", SdwaSel::WORD_1)
9417 .Case("DWORD", SdwaSel::DWORD)
9418 .Default(0xffffffff);
9419
9420 if (Int == 0xffffffff)
9421 return Error(StringLoc, "invalid " + Twine(Prefix) + " value");
9422
9423 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
9424 return ParseStatus::Success;
9425}
9426
9427ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9428 using namespace llvm::AMDGPU::SDWA;
9429
9430 SMLoc S = getLoc();
9432
9433 SMLoc StringLoc;
9434 ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc);
9435 if (!Res.isSuccess())
9436 return Res;
9437
9438 int64_t Int;
9440 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
9441 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
9442 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
9443 .Default(0xffffffff);
9444
9445 if (Int == 0xffffffff)
9446 return Error(StringLoc, "invalid dst_unused value");
9447
9448 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused));
9449 return ParseStatus::Success;
9450}
9451
9452void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9453 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
9454}
9455
9456void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9457 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
9458}
9459
9460void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9461 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
9462}
9463
9464void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9465 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
9466}
9467
9468void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9469 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9470}
9471
9472void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9473 uint64_t BasicInstType,
9474 bool SkipDstVcc,
9475 bool SkipSrcVcc) {
9476 using namespace llvm::AMDGPU::SDWA;
9477
9478 OptionalImmIndexMap OptionalIdx;
9479 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9480 bool SkippedVcc = false;
9481
9482 unsigned I = 1;
9483 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9484 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9485 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9486 }
9487
9488 for (unsigned E = Operands.size(); I != E; ++I) {
9489 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9490 if (SkipVcc && !SkippedVcc && Op.isReg() &&
9491 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9492 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9493 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9494 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9495 // Skip VCC only if we didn't skip it on previous iteration.
9496 // Note that src0 and src1 occupy 2 slots each because of modifiers.
9497 if (BasicInstType == SIInstrFlags::VOP2 &&
9498 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9499 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9500 SkippedVcc = true;
9501 continue;
9502 }
9503 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
9504 SkippedVcc = true;
9505 continue;
9506 }
9507 }
9509 Op.addRegOrImmWithInputModsOperands(Inst, 2);
9510 } else if (Op.isImm()) {
9511 // Handle optional arguments
9512 OptionalIdx[Op.getImmTy()] = I;
9513 } else {
9514 llvm_unreachable("Invalid operand type");
9515 }
9516 SkippedVcc = false;
9517 }
9518
9519 const unsigned Opc = Inst.getOpcode();
9520 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9521 Opc != AMDGPU::V_NOP_sdwa_vi) {
9522 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9523 switch (BasicInstType) {
9524 case SIInstrFlags::VOP1:
9525 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9526 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9527 AMDGPUOperand::ImmTyClamp, 0);
9528
9529 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9530 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9531 AMDGPUOperand::ImmTyOModSI, 0);
9532
9533 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
9534 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9535 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9536
9537 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
9538 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9539 AMDGPUOperand::ImmTySDWADstUnused,
9540 DstUnused::UNUSED_PRESERVE);
9541
9542 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9543 break;
9544
9545 case SIInstrFlags::VOP2:
9546 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9547 AMDGPUOperand::ImmTyClamp, 0);
9548
9549 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
9550 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9551
9552 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9553 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
9554 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9555 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9556 break;
9557
9558 case SIInstrFlags::VOPC:
9559 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
9560 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9561 AMDGPUOperand::ImmTyClamp, 0);
9562 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9563 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9564 break;
9565
9566 default:
9567 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9568 }
9569 }
9570
9571 // special case v_mac_{f16, f32}:
9572 // it has src2 register operand that is tied to dst operand
9573 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9574 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9575 auto it = Inst.begin();
9576 std::advance(
9577 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9578 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9579 }
9580}
9581
9582/// Force static initialization.
9586}
9587
9588#define GET_REGISTER_MATCHER
9589#define GET_MATCHER_IMPLEMENTATION
9590#define GET_MNEMONIC_SPELL_CHECKER
9591#define GET_MNEMONIC_CHECKER
9592#include "AMDGPUGenAsmMatcher.inc"
9593
9594ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9595 unsigned MCK) {
9596 switch (MCK) {
9597 case MCK_addr64:
9598 return parseTokenOp("addr64", Operands);
9599 case MCK_done:
9600 return parseTokenOp("done", Operands);
9601 case MCK_idxen:
9602 return parseTokenOp("idxen", Operands);
9603 case MCK_lds:
9604 return parseTokenOp("lds", Operands);
9605 case MCK_offen:
9606 return parseTokenOp("offen", Operands);
9607 case MCK_off:
9608 return parseTokenOp("off", Operands);
9609 case MCK_row_95_en:
9610 return parseTokenOp("row_en", Operands);
9611 case MCK_gds:
9612 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
9613 case MCK_tfe:
9614 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
9615 }
9616 return tryCustomParseOperand(Operands, MCK);
9617}
9618
9619// This function should be defined after auto-generated include so that we have
9620// MatchClassKind enum defined
9621unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9622 unsigned Kind) {
9623 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9624 // But MatchInstructionImpl() expects to meet token and fails to validate
9625 // operand. This method checks if we are given immediate operand but expect to
9626 // get corresponding token.
9627 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9628 switch (Kind) {
9629 case MCK_addr64:
9630 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9631 case MCK_gds:
9632 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9633 case MCK_lds:
9634 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9635 case MCK_idxen:
9636 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9637 case MCK_offen:
9638 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9639 case MCK_tfe:
9640 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9641 case MCK_SSrc_b32:
9642 // When operands have expression values, they will return true for isToken,
9643 // because it is not possible to distinguish between a token and an
9644 // expression at parse time. MatchInstructionImpl() will always try to
9645 // match an operand as a token, when isToken returns true, and when the
9646 // name of the expression is not a valid token, the match will fail,
9647 // so we need to handle it here.
9648 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9649 case MCK_SSrc_f32:
9650 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9651 case MCK_SOPPBrTarget:
9652 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9653 case MCK_VReg32OrOff:
9654 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9655 case MCK_InterpSlot:
9656 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9657 case MCK_InterpAttr:
9658 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9659 case MCK_InterpAttrChan:
9660 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9661 case MCK_SReg_64:
9662 case MCK_SReg_64_XEXEC:
9663 // Null is defined as a 32-bit register but
9664 // it should also be enabled with 64-bit operands.
9665 // The following code enables it for SReg_64 operands
9666 // used as source and destination. Remaining source
9667 // operands are handled in isInlinableImm.
9668 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9669 default:
9670 return Match_InvalidOperand;
9671 }
9672}
9673
9674//===----------------------------------------------------------------------===//
9675// endpgm
9676//===----------------------------------------------------------------------===//
9677
9678ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9679 SMLoc S = getLoc();
9680 int64_t Imm = 0;
9681
9682 if (!parseExpr(Imm)) {
9683 // The operand is optional, if not present default to 0
9684 Imm = 0;
9685 }
9686
9687 if (!isUInt<16>(Imm))
9688 return Error(S, "expected a 16-bit value");
9689
9690 Operands.push_back(
9691 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9692 return ParseStatus::Success;
9693}
9694
9695bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9696
9697//===----------------------------------------------------------------------===//
9698// Split Barrier
9699//===----------------------------------------------------------------------===//
9700
9701bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static unsigned getSpecialRegForName(StringRef RegName)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, unsigned OpName)
static bool IsRevOpcode(const unsigned Opcode)
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_READNONE
Definition: Compiler.h:220
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
@ Default
Definition: DwarfDebug.cpp:87
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
Definition: InlineInfo.cpp:180
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
unsigned unsigned DefaultVal
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
static bool Enabled
Definition: Statistic.cpp:46
StringSet - A set-like wrapper for the StringMap.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
BinaryOperator * Mul
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
support::ulittle16_t & Hi
Definition: aarch32.cpp:205
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
Definition: AMDGPUMCExpr.h:69
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
Definition: AMDGPUMCExpr.h:83
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5317
Class for arbitrary precision integers.
Definition: APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmMacro.h:110
bool is(TokenKind K) const
Definition: MCAsmMacro.h:82
TokenKind getKind() const
Definition: MCAsmMacro.h:81
This class represents an Operation in the Expression.
Register getReg() const
Base class for user error types.
Definition: Error.h:355
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:481
Class representing an expression and its matching format.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:123
virtual MCStreamer & getStreamer()=0
Return the output streamer for the assembler.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:532
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:542
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:617
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:193
Context object for machine code objects.
Definition: MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:414
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:213
const MCSubtargetInfo * getSubtargetInfo() const
Definition: MCContext.h:418
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
SMLoc getLoc() const
Definition: MCInst.h:204
void setLoc(SMLoc loc)
Definition: MCInst.h:203
unsigned getOpcode() const
Definition: MCInst.h:198
iterator insert(iterator I, const MCOperand &Op)
Definition: MCInst.h:224
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
iterator begin()
Definition: MCInst.h:219
size_t size() const
Definition: MCInst.h:218
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:444
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
void setImm(int64_t Val)
Definition: MCInst.h:85
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
void setReg(unsigned Reg)
Set the register number.
Definition: MCInst.h:75
int64_t getImm() const
Definition: MCInst.h:80
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
bool isImm() const
Definition: MCInst.h:62
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
bool isReg() const
Definition: MCInst.h:61
bool isExpr() const
Definition: MCInst.h:65
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual bool isReg() const =0
isReg - Is this a register operand?
virtual bool isMem() const =0
isMem - Is this a memory operand?
virtual MCRegister getReg() const =0
virtual bool isToken() const =0
isToken - Is this a token operand?
virtual bool isImm() const =0
isImm - Is this an immediate operand?
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
Streaming machine code generation interface.
Definition: MCStreamer.h:213
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Definition: MCStreamer.h:309
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
void setVariableValue(const MCExpr *Value)
Definition: MCSymbol.cpp:47
MCTargetAsmParser - Generic interface to target specific assembly parsers.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
const MCInstrInfo & MII
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn't/can't handle effec...
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
ParseInstruction - Parse one assembly instruction.
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
MatchAndEmitInstruction - Recognize a series of operands of a parsed instruction as an actual MCInst ...
Target specific streamer interface.
Definition: MCStreamer.h:94
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Represents a location in source code.
Definition: SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
constexpr const char * getPointer() const
Definition: SMLoc.h:34
constexpr bool isValid() const
Definition: SMLoc.h:29
Represents a range in source code.
Definition: SMLoc.h:48
SMLoc Start
Definition: SMLoc.h:50
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:290
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Register getReg() const
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:838
StringMapEntry - This is used to represent one value that is inserted into a StringMap.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition: StringRef.h:640
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:250
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:594
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:262
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
bool contains(StringRef key) const
Check if the set contains the given key.
Definition: StringSet.h:55
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
Key
PAL metadata keys.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool isGFX9(const MCSubtargetInfo &STI)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition: SIDefines.h:201
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:211
@ OPERAND_REG_INLINE_C_V2INT32
Definition: SIDefines.h:227
@ OPERAND_REG_INLINE_C_FP64
Definition: SIDefines.h:223
@ OPERAND_REG_INLINE_C_BF16
Definition: SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2BF16
Definition: SIDefines.h:225
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:212
@ OPERAND_REG_IMM_BF16
Definition: SIDefines.h:205
@ OPERAND_REG_INLINE_AC_V2FP16
Definition: SIDefines.h:246
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
Definition: SIDefines.h:200
@ OPERAND_REG_IMM_V2BF16
Definition: SIDefines.h:210
@ OPERAND_REG_IMM_BF16_DEFERRED
Definition: SIDefines.h:207
@ OPERAND_REG_IMM_FP16
Definition: SIDefines.h:206
@ OPERAND_REG_INLINE_C_INT64
Definition: SIDefines.h:219
@ OPERAND_REG_INLINE_AC_BF16
Definition: SIDefines.h:240
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition: SIDefines.h:217
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
Definition: SIDefines.h:238
@ OPERAND_REG_IMM_FP64
Definition: SIDefines.h:204
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:226
@ OPERAND_REG_INLINE_AC_V2INT16
Definition: SIDefines.h:244
@ OPERAND_REG_INLINE_AC_FP16
Definition: SIDefines.h:241
@ OPERAND_REG_INLINE_AC_INT32
Definition: SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition: SIDefines.h:242
@ OPERAND_REG_INLINE_AC_V2BF16
Definition: SIDefines.h:245
@ OPERAND_REG_IMM_V2INT32
Definition: SIDefines.h:213
@ OPERAND_REG_IMM_FP32
Definition: SIDefines.h:203
@ OPERAND_INPUT_MODS
Definition: SIDefines.h:251
@ OPERAND_REG_INLINE_C_FP32
Definition: SIDefines.h:222
@ OPERAND_REG_INLINE_C_INT32
Definition: SIDefines.h:218
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:224
@ OPERAND_REG_IMM_V2FP32
Definition: SIDefines.h:214
@ OPERAND_REG_INLINE_AC_FP64
Definition: SIDefines.h:243
@ OPERAND_REG_INLINE_C_FP16
Definition: SIDefines.h:221
@ OPERAND_REG_IMM_INT16
Definition: SIDefines.h:202
@ OPERAND_REG_INLINE_C_V2FP32
Definition: SIDefines.h:228
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition: SIDefines.h:231
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1344
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
const uint64_t Version
Definition: InstrProf.h:1107
@ OPERAND_IMMEDIATE
Definition: MCInstrDesc.h:60
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Reg
All possible values of the reg field in the ModR/M byte.
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
Format
The format used for serializing/deserializing remarks.
Definition: RemarkFormat.h:25
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition: Error.h:1099
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition: Alignment.h:217
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:255
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
void PrintError(const Twine &Msg)
Definition: Error.cpp:101
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
Target & getTheR600Target()
The target for R600 GPUs.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:154
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:193
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:159
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:403
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:260
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
unsigned M0(unsigned Val)
Definition: VE.h:375
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
#define N
RegisterKind Kind
StringLiteral Name
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
Definition: TargetParser.h:127
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
Represents the counter values to wait for in an s_waitcnt instruction.
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:276
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:274
static const fltSemantics & BFloat() LLVM_READNONE
Definition: APFloat.cpp:275
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:266
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...