LLVM 19.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
14#include "SIDefines.h"
15#include "SIInstrInfo.h"
16#include "SIRegisterInfo.h"
21#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/StringSet.h"
24#include "llvm/ADT/Twine.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCContext.h"
29#include "llvm/MC/MCExpr.h"
30#include "llvm/MC/MCInst.h"
31#include "llvm/MC/MCInstrDesc.h"
36#include "llvm/MC/MCSymbol.h"
43#include <optional>
44
45using namespace llvm;
46using namespace llvm::AMDGPU;
47using namespace llvm::amdhsa;
48
49namespace {
50
51class AMDGPUAsmParser;
52
53enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
54
55//===----------------------------------------------------------------------===//
56// Operand
57//===----------------------------------------------------------------------===//
58
59class AMDGPUOperand : public MCParsedAsmOperand {
60 enum KindTy {
61 Token,
62 Immediate,
65 } Kind;
66
67 SMLoc StartLoc, EndLoc;
68 const AMDGPUAsmParser *AsmParser;
69
70public:
71 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
72 : Kind(Kind_), AsmParser(AsmParser_) {}
73
74 using Ptr = std::unique_ptr<AMDGPUOperand>;
75
76 struct Modifiers {
77 bool Abs = false;
78 bool Neg = false;
79 bool Sext = false;
80 bool Lit = false;
81
82 bool hasFPModifiers() const { return Abs || Neg; }
83 bool hasIntModifiers() const { return Sext; }
84 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
85
86 int64_t getFPModifiersOperand() const {
87 int64_t Operand = 0;
88 Operand |= Abs ? SISrcMods::ABS : 0u;
89 Operand |= Neg ? SISrcMods::NEG : 0u;
90 return Operand;
91 }
92
93 int64_t getIntModifiersOperand() const {
94 int64_t Operand = 0;
95 Operand |= Sext ? SISrcMods::SEXT : 0u;
96 return Operand;
97 }
98
99 int64_t getModifiersOperand() const {
100 assert(!(hasFPModifiers() && hasIntModifiers())
101 && "fp and int modifiers should not be used simultaneously");
102 if (hasFPModifiers()) {
103 return getFPModifiersOperand();
104 } else if (hasIntModifiers()) {
105 return getIntModifiersOperand();
106 } else {
107 return 0;
108 }
109 }
110
111 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
112 };
113
114 enum ImmTy {
115 ImmTyNone,
116 ImmTyGDS,
117 ImmTyLDS,
118 ImmTyOffen,
119 ImmTyIdxen,
120 ImmTyAddr64,
121 ImmTyOffset,
122 ImmTyInstOffset,
123 ImmTyOffset0,
124 ImmTyOffset1,
125 ImmTySMEMOffsetMod,
126 ImmTyCPol,
127 ImmTyTFE,
128 ImmTyD16,
129 ImmTyClampSI,
130 ImmTyOModSI,
131 ImmTySDWADstSel,
132 ImmTySDWASrc0Sel,
133 ImmTySDWASrc1Sel,
134 ImmTySDWADstUnused,
135 ImmTyDMask,
136 ImmTyDim,
137 ImmTyUNorm,
138 ImmTyDA,
139 ImmTyR128A16,
140 ImmTyA16,
141 ImmTyLWE,
142 ImmTyExpTgt,
143 ImmTyExpCompr,
144 ImmTyExpVM,
145 ImmTyFORMAT,
146 ImmTyHwreg,
147 ImmTyOff,
148 ImmTySendMsg,
149 ImmTyInterpSlot,
150 ImmTyInterpAttr,
151 ImmTyInterpAttrChan,
152 ImmTyOpSel,
153 ImmTyOpSelHi,
154 ImmTyNegLo,
155 ImmTyNegHi,
156 ImmTyIndexKey8bit,
157 ImmTyIndexKey16bit,
158 ImmTyDPP8,
159 ImmTyDppCtrl,
160 ImmTyDppRowMask,
161 ImmTyDppBankMask,
162 ImmTyDppBoundCtrl,
163 ImmTyDppFI,
164 ImmTySwizzle,
165 ImmTyGprIdxMode,
166 ImmTyHigh,
167 ImmTyBLGP,
168 ImmTyCBSZ,
169 ImmTyABID,
170 ImmTyEndpgm,
171 ImmTyWaitVDST,
172 ImmTyWaitEXP,
173 ImmTyWaitVAVDst,
174 ImmTyWaitVMVSrc,
175 ImmTyByteSel,
176 };
177
178 // Immediate operand kind.
179 // It helps to identify the location of an offending operand after an error.
180 // Note that regular literals and mandatory literals (KImm) must be handled
181 // differently. When looking for an offending operand, we should usually
182 // ignore mandatory literals because they are part of the instruction and
183 // cannot be changed. Report location of mandatory operands only for VOPD,
184 // when both OpX and OpY have a KImm and there are no other literals.
185 enum ImmKindTy {
186 ImmKindTyNone,
187 ImmKindTyLiteral,
188 ImmKindTyMandatoryLiteral,
189 ImmKindTyConst,
190 };
191
192private:
193 struct TokOp {
194 const char *Data;
195 unsigned Length;
196 };
197
198 struct ImmOp {
199 int64_t Val;
200 ImmTy Type;
201 bool IsFPImm;
202 mutable ImmKindTy Kind;
203 Modifiers Mods;
204 };
205
206 struct RegOp {
207 unsigned RegNo;
208 Modifiers Mods;
209 };
210
211 union {
212 TokOp Tok;
213 ImmOp Imm;
214 RegOp Reg;
215 const MCExpr *Expr;
216 };
217
218public:
219 bool isToken() const override { return Kind == Token; }
220
221 bool isSymbolRefExpr() const {
222 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
223 }
224
225 bool isImm() const override {
226 return Kind == Immediate;
227 }
228
229 void setImmKindNone() const {
230 assert(isImm());
231 Imm.Kind = ImmKindTyNone;
232 }
233
234 void setImmKindLiteral() const {
235 assert(isImm());
236 Imm.Kind = ImmKindTyLiteral;
237 }
238
239 void setImmKindMandatoryLiteral() const {
240 assert(isImm());
241 Imm.Kind = ImmKindTyMandatoryLiteral;
242 }
243
244 void setImmKindConst() const {
245 assert(isImm());
246 Imm.Kind = ImmKindTyConst;
247 }
248
249 bool IsImmKindLiteral() const {
250 return isImm() && Imm.Kind == ImmKindTyLiteral;
251 }
252
253 bool IsImmKindMandatoryLiteral() const {
254 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
255 }
256
257 bool isImmKindConst() const {
258 return isImm() && Imm.Kind == ImmKindTyConst;
259 }
260
261 bool isInlinableImm(MVT type) const;
262 bool isLiteralImm(MVT type) const;
263
264 bool isRegKind() const {
265 return Kind == Register;
266 }
267
268 bool isReg() const override {
269 return isRegKind() && !hasModifiers();
270 }
271
272 bool isRegOrInline(unsigned RCID, MVT type) const {
273 return isRegClass(RCID) || isInlinableImm(type);
274 }
275
276 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
277 return isRegOrInline(RCID, type) || isLiteralImm(type);
278 }
279
280 bool isRegOrImmWithInt16InputMods() const {
281 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
282 }
283
284 bool isRegOrImmWithIntT16InputMods() const {
285 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
286 }
287
288 bool isRegOrImmWithInt32InputMods() const {
289 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
290 }
291
292 bool isRegOrInlineImmWithInt16InputMods() const {
293 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
294 }
295
296 bool isRegOrInlineImmWithInt32InputMods() const {
297 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
298 }
299
300 bool isRegOrImmWithInt64InputMods() const {
301 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
302 }
303
304 bool isRegOrImmWithFP16InputMods() const {
305 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
306 }
307
308 bool isRegOrImmWithFPT16InputMods() const {
309 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
310 }
311
312 bool isRegOrImmWithFP32InputMods() const {
313 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
314 }
315
316 bool isRegOrImmWithFP64InputMods() const {
317 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
318 }
319
320 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
321 return isRegOrInline(
322 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
323 }
324
325 bool isRegOrInlineImmWithFP32InputMods() const {
326 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
327 }
328
329 bool isPackedFP16InputMods() const {
330 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
331 }
332
333 bool isVReg() const {
334 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
335 isRegClass(AMDGPU::VReg_64RegClassID) ||
336 isRegClass(AMDGPU::VReg_96RegClassID) ||
337 isRegClass(AMDGPU::VReg_128RegClassID) ||
338 isRegClass(AMDGPU::VReg_160RegClassID) ||
339 isRegClass(AMDGPU::VReg_192RegClassID) ||
340 isRegClass(AMDGPU::VReg_256RegClassID) ||
341 isRegClass(AMDGPU::VReg_512RegClassID) ||
342 isRegClass(AMDGPU::VReg_1024RegClassID);
343 }
344
345 bool isVReg32() const {
346 return isRegClass(AMDGPU::VGPR_32RegClassID);
347 }
348
349 bool isVReg32OrOff() const {
350 return isOff() || isVReg32();
351 }
352
353 bool isNull() const {
354 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
355 }
356
357 bool isVRegWithInputMods() const;
358 template <bool IsFake16> bool isT16VRegWithInputMods() const;
359
360 bool isSDWAOperand(MVT type) const;
361 bool isSDWAFP16Operand() const;
362 bool isSDWAFP32Operand() const;
363 bool isSDWAInt16Operand() const;
364 bool isSDWAInt32Operand() const;
365
366 bool isImmTy(ImmTy ImmT) const {
367 return isImm() && Imm.Type == ImmT;
368 }
369
370 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
371
372 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
373
374 bool isImmModifier() const {
375 return isImm() && Imm.Type != ImmTyNone;
376 }
377
378 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
379 bool isDim() const { return isImmTy(ImmTyDim); }
380 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
381 bool isOff() const { return isImmTy(ImmTyOff); }
382 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
383 bool isOffen() const { return isImmTy(ImmTyOffen); }
384 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
385 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
386 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
387 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
388 bool isGDS() const { return isImmTy(ImmTyGDS); }
389 bool isLDS() const { return isImmTy(ImmTyLDS); }
390 bool isCPol() const { return isImmTy(ImmTyCPol); }
391 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
392 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
393 bool isTFE() const { return isImmTy(ImmTyTFE); }
394 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
395 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
396 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
397 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
398 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
399 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
400 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
401 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
402 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
403 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
404 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
405 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
406 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
407
408 bool isRegOrImm() const {
409 return isReg() || isImm();
410 }
411
412 bool isRegClass(unsigned RCID) const;
413
414 bool isInlineValue() const;
415
416 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
417 return isRegOrInline(RCID, type) && !hasModifiers();
418 }
419
420 bool isSCSrcB16() const {
421 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
422 }
423
424 bool isSCSrcV2B16() const {
425 return isSCSrcB16();
426 }
427
428 bool isSCSrc_b32() const {
429 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
430 }
431
432 bool isSCSrc_b64() const {
433 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
434 }
435
436 bool isBoolReg() const;
437
438 bool isSCSrcF16() const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
440 }
441
442 bool isSCSrcV2F16() const {
443 return isSCSrcF16();
444 }
445
446 bool isSCSrcF32() const {
447 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
448 }
449
450 bool isSCSrcF64() const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
452 }
453
454 bool isSSrc_b32() const {
455 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
456 }
457
458 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
459
460 bool isSSrcV2B16() const {
461 llvm_unreachable("cannot happen");
462 return isSSrc_b16();
463 }
464
465 bool isSSrc_b64() const {
466 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
467 // See isVSrc64().
468 return isSCSrc_b64() || isLiteralImm(MVT::i64);
469 }
470
471 bool isSSrc_f32() const {
472 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
473 }
474
475 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
476
477 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
478
479 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
480
481 bool isSSrcV2F16() const {
482 llvm_unreachable("cannot happen");
483 return isSSrc_f16();
484 }
485
486 bool isSSrcV2FP32() const {
487 llvm_unreachable("cannot happen");
488 return isSSrc_f32();
489 }
490
491 bool isSCSrcV2FP32() const {
492 llvm_unreachable("cannot happen");
493 return isSCSrcF32();
494 }
495
496 bool isSSrcV2INT32() const {
497 llvm_unreachable("cannot happen");
498 return isSSrc_b32();
499 }
500
501 bool isSCSrcV2INT32() const {
502 llvm_unreachable("cannot happen");
503 return isSCSrc_b32();
504 }
505
506 bool isSSrcOrLds_b32() const {
507 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
508 isLiteralImm(MVT::i32) || isExpr();
509 }
510
511 bool isVCSrc_b32() const {
512 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
513 }
514
515 bool isVCSrcB64() const {
516 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
517 }
518
519 bool isVCSrcTB16() const {
520 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
521 }
522
523 bool isVCSrcTB16_Lo128() const {
524 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
525 }
526
527 bool isVCSrcFake16B16_Lo128() const {
528 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
529 }
530
531 bool isVCSrc_b16() const {
532 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
533 }
534
535 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
536
537 bool isVCSrc_f32() const {
538 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
539 }
540
541 bool isVCSrcF64() const {
542 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
543 }
544
545 bool isVCSrcTBF16() const {
546 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
547 }
548
549 bool isVCSrcTF16() const {
550 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
551 }
552
553 bool isVCSrcTBF16_Lo128() const {
554 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
555 }
556
557 bool isVCSrcTF16_Lo128() const {
558 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
559 }
560
561 bool isVCSrcFake16BF16_Lo128() const {
562 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
563 }
564
565 bool isVCSrcFake16F16_Lo128() const {
566 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
567 }
568
569 bool isVCSrc_bf16() const {
570 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
571 }
572
573 bool isVCSrc_f16() const {
574 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
575 }
576
577 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
578
579 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
580
581 bool isVSrc_b32() const {
582 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
583 }
584
585 bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
586
587 bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
588
589 bool isVSrcT_b16_Lo128() const {
590 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
591 }
592
593 bool isVSrcFake16_b16_Lo128() const {
594 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
595 }
596
597 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
598
599 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
600
601 bool isVCSrcV2FP32() const {
602 return isVCSrcF64();
603 }
604
605 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
606
607 bool isVCSrcV2INT32() const {
608 return isVCSrcB64();
609 }
610
611 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
612
613 bool isVSrc_f32() const {
614 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
615 }
616
617 bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
618
619 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
620
621 bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
622
623 bool isVSrcT_bf16_Lo128() const {
624 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
625 }
626
627 bool isVSrcT_f16_Lo128() const {
628 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
629 }
630
631 bool isVSrcFake16_bf16_Lo128() const {
632 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
633 }
634
635 bool isVSrcFake16_f16_Lo128() const {
636 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
637 }
638
639 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
640
641 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
642
643 bool isVSrc_v2bf16() const {
644 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
645 }
646
647 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
648
649 bool isVISrcB32() const {
650 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
651 }
652
653 bool isVISrcB16() const {
654 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
655 }
656
657 bool isVISrcV2B16() const {
658 return isVISrcB16();
659 }
660
661 bool isVISrcF32() const {
662 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
663 }
664
665 bool isVISrcF16() const {
666 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
667 }
668
669 bool isVISrcV2F16() const {
670 return isVISrcF16() || isVISrcB32();
671 }
672
673 bool isVISrc_64_bf16() const {
674 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
675 }
676
677 bool isVISrc_64_f16() const {
678 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
679 }
680
681 bool isVISrc_64_b32() const {
682 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
683 }
684
685 bool isVISrc_64B64() const {
686 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
687 }
688
689 bool isVISrc_64_f64() const {
690 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
691 }
692
693 bool isVISrc_64V2FP32() const {
694 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
695 }
696
697 bool isVISrc_64V2INT32() const {
698 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
699 }
700
701 bool isVISrc_256_b32() const {
702 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
703 }
704
705 bool isVISrc_256_f32() const {
706 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
707 }
708
709 bool isVISrc_256B64() const {
710 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
711 }
712
713 bool isVISrc_256_f64() const {
714 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
715 }
716
717 bool isVISrc_128B16() const {
718 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
719 }
720
721 bool isVISrc_128V2B16() const {
722 return isVISrc_128B16();
723 }
724
725 bool isVISrc_128_b32() const {
726 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
727 }
728
729 bool isVISrc_128_f32() const {
730 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
731 }
732
733 bool isVISrc_256V2FP32() const {
734 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
735 }
736
737 bool isVISrc_256V2INT32() const {
738 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
739 }
740
741 bool isVISrc_512_b32() const {
742 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
743 }
744
745 bool isVISrc_512B16() const {
746 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
747 }
748
749 bool isVISrc_512V2B16() const {
750 return isVISrc_512B16();
751 }
752
753 bool isVISrc_512_f32() const {
754 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
755 }
756
757 bool isVISrc_512F16() const {
758 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
759 }
760
761 bool isVISrc_512V2F16() const {
762 return isVISrc_512F16() || isVISrc_512_b32();
763 }
764
765 bool isVISrc_1024_b32() const {
766 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
767 }
768
769 bool isVISrc_1024B16() const {
770 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
771 }
772
773 bool isVISrc_1024V2B16() const {
774 return isVISrc_1024B16();
775 }
776
777 bool isVISrc_1024_f32() const {
778 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
779 }
780
781 bool isVISrc_1024F16() const {
782 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
783 }
784
785 bool isVISrc_1024V2F16() const {
786 return isVISrc_1024F16() || isVISrc_1024_b32();
787 }
788
789 bool isAISrcB32() const {
790 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
791 }
792
793 bool isAISrcB16() const {
794 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
795 }
796
797 bool isAISrcV2B16() const {
798 return isAISrcB16();
799 }
800
801 bool isAISrcF32() const {
802 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
803 }
804
805 bool isAISrcF16() const {
806 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
807 }
808
809 bool isAISrcV2F16() const {
810 return isAISrcF16() || isAISrcB32();
811 }
812
813 bool isAISrc_64B64() const {
814 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
815 }
816
817 bool isAISrc_64_f64() const {
818 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
819 }
820
821 bool isAISrc_128_b32() const {
822 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
823 }
824
825 bool isAISrc_128B16() const {
826 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
827 }
828
829 bool isAISrc_128V2B16() const {
830 return isAISrc_128B16();
831 }
832
833 bool isAISrc_128_f32() const {
834 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
835 }
836
837 bool isAISrc_128F16() const {
838 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
839 }
840
841 bool isAISrc_128V2F16() const {
842 return isAISrc_128F16() || isAISrc_128_b32();
843 }
844
845 bool isVISrc_128_bf16() const {
846 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
847 }
848
849 bool isVISrc_128_f16() const {
850 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
851 }
852
853 bool isVISrc_128V2F16() const {
854 return isVISrc_128_f16() || isVISrc_128_b32();
855 }
856
857 bool isAISrc_256B64() const {
858 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
859 }
860
861 bool isAISrc_256_f64() const {
862 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
863 }
864
865 bool isAISrc_512_b32() const {
866 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
867 }
868
869 bool isAISrc_512B16() const {
870 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
871 }
872
873 bool isAISrc_512V2B16() const {
874 return isAISrc_512B16();
875 }
876
877 bool isAISrc_512_f32() const {
878 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
879 }
880
881 bool isAISrc_512F16() const {
882 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
883 }
884
885 bool isAISrc_512V2F16() const {
886 return isAISrc_512F16() || isAISrc_512_b32();
887 }
888
889 bool isAISrc_1024_b32() const {
890 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
891 }
892
893 bool isAISrc_1024B16() const {
894 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
895 }
896
897 bool isAISrc_1024V2B16() const {
898 return isAISrc_1024B16();
899 }
900
901 bool isAISrc_1024_f32() const {
902 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
903 }
904
905 bool isAISrc_1024F16() const {
906 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
907 }
908
909 bool isAISrc_1024V2F16() const {
910 return isAISrc_1024F16() || isAISrc_1024_b32();
911 }
912
913 bool isKImmFP32() const {
914 return isLiteralImm(MVT::f32);
915 }
916
917 bool isKImmFP16() const {
918 return isLiteralImm(MVT::f16);
919 }
920
921 bool isMem() const override {
922 return false;
923 }
924
925 bool isExpr() const {
926 return Kind == Expression;
927 }
928
929 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
930
931 bool isSWaitCnt() const;
932 bool isDepCtr() const;
933 bool isSDelayALU() const;
934 bool isHwreg() const;
935 bool isSendMsg() const;
936 bool isSplitBarrier() const;
937 bool isSwizzle() const;
938 bool isSMRDOffset8() const;
939 bool isSMEMOffset() const;
940 bool isSMRDLiteralOffset() const;
941 bool isDPP8() const;
942 bool isDPPCtrl() const;
943 bool isBLGP() const;
944 bool isGPRIdxMode() const;
945 bool isS16Imm() const;
946 bool isU16Imm() const;
947 bool isEndpgm() const;
948
949 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
950 return std::bind(P, *this);
951 }
952
953 StringRef getToken() const {
954 assert(isToken());
955 return StringRef(Tok.Data, Tok.Length);
956 }
957
958 int64_t getImm() const {
959 assert(isImm());
960 return Imm.Val;
961 }
962
963 void setImm(int64_t Val) {
964 assert(isImm());
965 Imm.Val = Val;
966 }
967
968 ImmTy getImmTy() const {
969 assert(isImm());
970 return Imm.Type;
971 }
972
973 MCRegister getReg() const override {
974 assert(isRegKind());
975 return Reg.RegNo;
976 }
977
978 SMLoc getStartLoc() const override {
979 return StartLoc;
980 }
981
982 SMLoc getEndLoc() const override {
983 return EndLoc;
984 }
985
986 SMRange getLocRange() const {
987 return SMRange(StartLoc, EndLoc);
988 }
989
990 Modifiers getModifiers() const {
991 assert(isRegKind() || isImmTy(ImmTyNone));
992 return isRegKind() ? Reg.Mods : Imm.Mods;
993 }
994
995 void setModifiers(Modifiers Mods) {
996 assert(isRegKind() || isImmTy(ImmTyNone));
997 if (isRegKind())
998 Reg.Mods = Mods;
999 else
1000 Imm.Mods = Mods;
1001 }
1002
1003 bool hasModifiers() const {
1004 return getModifiers().hasModifiers();
1005 }
1006
1007 bool hasFPModifiers() const {
1008 return getModifiers().hasFPModifiers();
1009 }
1010
1011 bool hasIntModifiers() const {
1012 return getModifiers().hasIntModifiers();
1013 }
1014
1015 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1016
1017 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1018
1019 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1020
1021 void addRegOperands(MCInst &Inst, unsigned N) const;
1022
1023 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1024 if (isRegKind())
1025 addRegOperands(Inst, N);
1026 else
1027 addImmOperands(Inst, N);
1028 }
1029
1030 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1031 Modifiers Mods = getModifiers();
1032 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1033 if (isRegKind()) {
1034 addRegOperands(Inst, N);
1035 } else {
1036 addImmOperands(Inst, N, false);
1037 }
1038 }
1039
1040 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1041 assert(!hasIntModifiers());
1042 addRegOrImmWithInputModsOperands(Inst, N);
1043 }
1044
1045 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1046 assert(!hasFPModifiers());
1047 addRegOrImmWithInputModsOperands(Inst, N);
1048 }
1049
1050 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1051 Modifiers Mods = getModifiers();
1052 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1053 assert(isRegKind());
1054 addRegOperands(Inst, N);
1055 }
1056
1057 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1058 assert(!hasIntModifiers());
1059 addRegWithInputModsOperands(Inst, N);
1060 }
1061
1062 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1063 assert(!hasFPModifiers());
1064 addRegWithInputModsOperands(Inst, N);
1065 }
1066
1067 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1068 // clang-format off
1069 switch (Type) {
1070 case ImmTyNone: OS << "None"; break;
1071 case ImmTyGDS: OS << "GDS"; break;
1072 case ImmTyLDS: OS << "LDS"; break;
1073 case ImmTyOffen: OS << "Offen"; break;
1074 case ImmTyIdxen: OS << "Idxen"; break;
1075 case ImmTyAddr64: OS << "Addr64"; break;
1076 case ImmTyOffset: OS << "Offset"; break;
1077 case ImmTyInstOffset: OS << "InstOffset"; break;
1078 case ImmTyOffset0: OS << "Offset0"; break;
1079 case ImmTyOffset1: OS << "Offset1"; break;
1080 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1081 case ImmTyCPol: OS << "CPol"; break;
1082 case ImmTyIndexKey8bit: OS << "index_key"; break;
1083 case ImmTyIndexKey16bit: OS << "index_key"; break;
1084 case ImmTyTFE: OS << "TFE"; break;
1085 case ImmTyD16: OS << "D16"; break;
1086 case ImmTyFORMAT: OS << "FORMAT"; break;
1087 case ImmTyClampSI: OS << "ClampSI"; break;
1088 case ImmTyOModSI: OS << "OModSI"; break;
1089 case ImmTyDPP8: OS << "DPP8"; break;
1090 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1091 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1092 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1093 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1094 case ImmTyDppFI: OS << "DppFI"; break;
1095 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1096 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1097 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1098 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1099 case ImmTyDMask: OS << "DMask"; break;
1100 case ImmTyDim: OS << "Dim"; break;
1101 case ImmTyUNorm: OS << "UNorm"; break;
1102 case ImmTyDA: OS << "DA"; break;
1103 case ImmTyR128A16: OS << "R128A16"; break;
1104 case ImmTyA16: OS << "A16"; break;
1105 case ImmTyLWE: OS << "LWE"; break;
1106 case ImmTyOff: OS << "Off"; break;
1107 case ImmTyExpTgt: OS << "ExpTgt"; break;
1108 case ImmTyExpCompr: OS << "ExpCompr"; break;
1109 case ImmTyExpVM: OS << "ExpVM"; break;
1110 case ImmTyHwreg: OS << "Hwreg"; break;
1111 case ImmTySendMsg: OS << "SendMsg"; break;
1112 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1113 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1114 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1115 case ImmTyOpSel: OS << "OpSel"; break;
1116 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1117 case ImmTyNegLo: OS << "NegLo"; break;
1118 case ImmTyNegHi: OS << "NegHi"; break;
1119 case ImmTySwizzle: OS << "Swizzle"; break;
1120 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1121 case ImmTyHigh: OS << "High"; break;
1122 case ImmTyBLGP: OS << "BLGP"; break;
1123 case ImmTyCBSZ: OS << "CBSZ"; break;
1124 case ImmTyABID: OS << "ABID"; break;
1125 case ImmTyEndpgm: OS << "Endpgm"; break;
1126 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1127 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1128 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1129 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1130 case ImmTyByteSel: OS << "ByteSel" ; break;
1131 }
1132 // clang-format on
1133 }
1134
1135 void print(raw_ostream &OS) const override {
1136 switch (Kind) {
1137 case Register:
1138 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1139 break;
1140 case Immediate:
1141 OS << '<' << getImm();
1142 if (getImmTy() != ImmTyNone) {
1143 OS << " type: "; printImmTy(OS, getImmTy());
1144 }
1145 OS << " mods: " << Imm.Mods << '>';
1146 break;
1147 case Token:
1148 OS << '\'' << getToken() << '\'';
1149 break;
1150 case Expression:
1151 OS << "<expr " << *Expr << '>';
1152 break;
1153 }
1154 }
1155
1156 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1157 int64_t Val, SMLoc Loc,
1158 ImmTy Type = ImmTyNone,
1159 bool IsFPImm = false) {
1160 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1161 Op->Imm.Val = Val;
1162 Op->Imm.IsFPImm = IsFPImm;
1163 Op->Imm.Kind = ImmKindTyNone;
1164 Op->Imm.Type = Type;
1165 Op->Imm.Mods = Modifiers();
1166 Op->StartLoc = Loc;
1167 Op->EndLoc = Loc;
1168 return Op;
1169 }
1170
1171 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1172 StringRef Str, SMLoc Loc,
1173 bool HasExplicitEncodingSize = true) {
1174 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1175 Res->Tok.Data = Str.data();
1176 Res->Tok.Length = Str.size();
1177 Res->StartLoc = Loc;
1178 Res->EndLoc = Loc;
1179 return Res;
1180 }
1181
1182 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1183 unsigned RegNo, SMLoc S,
1184 SMLoc E) {
1185 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1186 Op->Reg.RegNo = RegNo;
1187 Op->Reg.Mods = Modifiers();
1188 Op->StartLoc = S;
1189 Op->EndLoc = E;
1190 return Op;
1191 }
1192
1193 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1194 const class MCExpr *Expr, SMLoc S) {
1195 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1196 Op->Expr = Expr;
1197 Op->StartLoc = S;
1198 Op->EndLoc = S;
1199 return Op;
1200 }
1201};
1202
1203raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1204 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1205 return OS;
1206}
1207
1208//===----------------------------------------------------------------------===//
1209// AsmParser
1210//===----------------------------------------------------------------------===//
1211
1212// Holds info related to the current kernel, e.g. count of SGPRs used.
1213// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1214// .amdgpu_hsa_kernel or at EOF.
1215class KernelScopeInfo {
1216 int SgprIndexUnusedMin = -1;
1217 int VgprIndexUnusedMin = -1;
1218 int AgprIndexUnusedMin = -1;
1219 MCContext *Ctx = nullptr;
1220 MCSubtargetInfo const *MSTI = nullptr;
1221
1222 void usesSgprAt(int i) {
1223 if (i >= SgprIndexUnusedMin) {
1224 SgprIndexUnusedMin = ++i;
1225 if (Ctx) {
1226 MCSymbol* const Sym =
1227 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1228 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1229 }
1230 }
1231 }
1232
1233 void usesVgprAt(int i) {
1234 if (i >= VgprIndexUnusedMin) {
1235 VgprIndexUnusedMin = ++i;
1236 if (Ctx) {
1237 MCSymbol* const Sym =
1238 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1239 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1240 VgprIndexUnusedMin);
1241 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1242 }
1243 }
1244 }
1245
1246 void usesAgprAt(int i) {
1247 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1248 if (!hasMAIInsts(*MSTI))
1249 return;
1250
1251 if (i >= AgprIndexUnusedMin) {
1252 AgprIndexUnusedMin = ++i;
1253 if (Ctx) {
1254 MCSymbol* const Sym =
1255 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1256 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1257
1258 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1259 MCSymbol* const vSym =
1260 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1261 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1262 VgprIndexUnusedMin);
1263 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1264 }
1265 }
1266 }
1267
1268public:
1269 KernelScopeInfo() = default;
1270
1271 void initialize(MCContext &Context) {
1272 Ctx = &Context;
1273 MSTI = Ctx->getSubtargetInfo();
1274
1275 usesSgprAt(SgprIndexUnusedMin = -1);
1276 usesVgprAt(VgprIndexUnusedMin = -1);
1277 if (hasMAIInsts(*MSTI)) {
1278 usesAgprAt(AgprIndexUnusedMin = -1);
1279 }
1280 }
1281
1282 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1283 unsigned RegWidth) {
1284 switch (RegKind) {
1285 case IS_SGPR:
1286 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1287 break;
1288 case IS_AGPR:
1289 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1290 break;
1291 case IS_VGPR:
1292 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1293 break;
1294 default:
1295 break;
1296 }
1297 }
1298};
1299
1300class AMDGPUAsmParser : public MCTargetAsmParser {
1301 MCAsmParser &Parser;
1302
1303 unsigned ForcedEncodingSize = 0;
1304 bool ForcedDPP = false;
1305 bool ForcedSDWA = false;
1306 KernelScopeInfo KernelScope;
1307
1308 /// @name Auto-generated Match Functions
1309 /// {
1310
1311#define GET_ASSEMBLER_HEADER
1312#include "AMDGPUGenAsmMatcher.inc"
1313
1314 /// }
1315
1316private:
1317 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1318 bool OutOfRangeError(SMRange Range);
1319 /// Calculate VGPR/SGPR blocks required for given target, reserved
1320 /// registers, and user-specified NextFreeXGPR values.
1321 ///
1322 /// \param Features [in] Target features, used for bug corrections.
1323 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1324 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1325 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1326 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1327 /// descriptor field, if valid.
1328 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1329 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1330 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1331 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1332 /// \param VGPRBlocks [out] Result VGPR block count.
1333 /// \param SGPRBlocks [out] Result SGPR block count.
1334 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1335 bool FlatScrUsed, bool XNACKUsed,
1336 std::optional<bool> EnableWavefrontSize32,
1337 unsigned NextFreeVGPR, SMRange VGPRRange,
1338 unsigned NextFreeSGPR, SMRange SGPRRange,
1339 unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1340 bool ParseDirectiveAMDGCNTarget();
1341 bool ParseDirectiveAMDHSACodeObjectVersion();
1342 bool ParseDirectiveAMDHSAKernel();
1343 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1344 bool ParseDirectiveAMDKernelCodeT();
1345 // TODO: Possibly make subtargetHasRegister const.
1346 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1347 bool ParseDirectiveAMDGPUHsaKernel();
1348
1349 bool ParseDirectiveISAVersion();
1350 bool ParseDirectiveHSAMetadata();
1351 bool ParseDirectivePALMetadataBegin();
1352 bool ParseDirectivePALMetadata();
1353 bool ParseDirectiveAMDGPULDS();
1354
1355 /// Common code to parse out a block of text (typically YAML) between start and
1356 /// end directives.
1357 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1358 const char *AssemblerDirectiveEnd,
1359 std::string &CollectString);
1360
1361 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1362 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1363 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1364 unsigned &RegNum, unsigned &RegWidth,
1365 bool RestoreOnFailure = false);
1366 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1367 unsigned &RegNum, unsigned &RegWidth,
1369 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1370 unsigned &RegWidth,
1372 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1373 unsigned &RegWidth,
1375 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1376 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1377 bool ParseRegRange(unsigned& Num, unsigned& Width);
1378 unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
1379 unsigned RegWidth, SMLoc Loc);
1380
1381 bool isRegister();
1382 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1383 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1384 void initializeGprCountSymbol(RegisterKind RegKind);
1385 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1386 unsigned RegWidth);
1387 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1388 bool IsAtomic);
1389
1390public:
1391 enum AMDGPUMatchResultTy {
1392 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1393 };
1394 enum OperandMode {
1395 OperandMode_Default,
1396 OperandMode_NSA,
1397 };
1398
1399 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1400
1401 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1402 const MCInstrInfo &MII,
1403 const MCTargetOptions &Options)
1404 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1406
1407 if (getFeatureBits().none()) {
1408 // Set default features.
1409 copySTI().ToggleFeature("southern-islands");
1410 }
1411
1412 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1413
1414 {
1415 // TODO: make those pre-defined variables read-only.
1416 // Currently there is none suitable machinery in the core llvm-mc for this.
1417 // MCSymbol::isRedefinable is intended for another purpose, and
1418 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1420 MCContext &Ctx = getContext();
1421 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1422 MCSymbol *Sym =
1423 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1424 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1425 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1426 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1427 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1428 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1429 } else {
1430 MCSymbol *Sym =
1431 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1432 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1433 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1434 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1435 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1436 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1437 }
1438 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1439 initializeGprCountSymbol(IS_VGPR);
1440 initializeGprCountSymbol(IS_SGPR);
1441 } else
1442 KernelScope.initialize(getContext());
1443 }
1444 }
1445
1446 bool hasMIMG_R128() const {
1447 return AMDGPU::hasMIMG_R128(getSTI());
1448 }
1449
1450 bool hasPackedD16() const {
1451 return AMDGPU::hasPackedD16(getSTI());
1452 }
1453
1454 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1455
1456 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1457
1458 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1459
1460 bool isSI() const {
1461 return AMDGPU::isSI(getSTI());
1462 }
1463
1464 bool isCI() const {
1465 return AMDGPU::isCI(getSTI());
1466 }
1467
1468 bool isVI() const {
1469 return AMDGPU::isVI(getSTI());
1470 }
1471
1472 bool isGFX9() const {
1473 return AMDGPU::isGFX9(getSTI());
1474 }
1475
1476 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1477 bool isGFX90A() const {
1478 return AMDGPU::isGFX90A(getSTI());
1479 }
1480
1481 bool isGFX940() const {
1482 return AMDGPU::isGFX940(getSTI());
1483 }
1484
1485 bool isGFX9Plus() const {
1486 return AMDGPU::isGFX9Plus(getSTI());
1487 }
1488
1489 bool isGFX10() const {
1490 return AMDGPU::isGFX10(getSTI());
1491 }
1492
1493 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1494
1495 bool isGFX11() const {
1496 return AMDGPU::isGFX11(getSTI());
1497 }
1498
1499 bool isGFX11Plus() const {
1500 return AMDGPU::isGFX11Plus(getSTI());
1501 }
1502
1503 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1504
1505 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1506
1507 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1508
1509 bool isGFX10_BEncoding() const {
1511 }
1512
1513 bool hasInv2PiInlineImm() const {
1514 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1515 }
1516
1517 bool hasFlatOffsets() const {
1518 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1519 }
1520
1521 bool hasArchitectedFlatScratch() const {
1522 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1523 }
1524
1525 bool hasSGPR102_SGPR103() const {
1526 return !isVI() && !isGFX9();
1527 }
1528
1529 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1530
1531 bool hasIntClamp() const {
1532 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1533 }
1534
1535 bool hasPartialNSAEncoding() const {
1536 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1537 }
1538
1539 unsigned getNSAMaxSize(bool HasSampler = false) const {
1540 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1541 }
1542
1543 unsigned getMaxNumUserSGPRs() const {
1545 }
1546
1547 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1548
1549 AMDGPUTargetStreamer &getTargetStreamer() {
1551 return static_cast<AMDGPUTargetStreamer &>(TS);
1552 }
1553
1554 const MCRegisterInfo *getMRI() const {
1555 // We need this const_cast because for some reason getContext() is not const
1556 // in MCAsmParser.
1557 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1558 }
1559
1560 const MCInstrInfo *getMII() const {
1561 return &MII;
1562 }
1563
1564 const FeatureBitset &getFeatureBits() const {
1565 return getSTI().getFeatureBits();
1566 }
1567
1568 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1569 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1570 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1571
1572 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1573 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1574 bool isForcedDPP() const { return ForcedDPP; }
1575 bool isForcedSDWA() const { return ForcedSDWA; }
1576 ArrayRef<unsigned> getMatchedVariants() const;
1577 StringRef getMatchedVariantName() const;
1578
1579 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1580 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1581 bool RestoreOnFailure);
1582 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1584 SMLoc &EndLoc) override;
1585 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1587 unsigned Kind) override;
1588 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1591 bool MatchingInlineAsm) override;
1592 bool ParseDirective(AsmToken DirectiveID) override;
1593 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1594 OperandMode Mode = OperandMode_Default);
1595 StringRef parseMnemonicSuffix(StringRef Name);
1597 SMLoc NameLoc, OperandVector &Operands) override;
1598 //bool ProcessInstruction(MCInst &Inst);
1599
1601
1602 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1603
1605 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1606 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1607 std::function<bool(int64_t &)> ConvertResult = nullptr);
1608
1609 ParseStatus parseOperandArrayWithPrefix(
1610 const char *Prefix, OperandVector &Operands,
1611 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1612 bool (*ConvertResult)(int64_t &) = nullptr);
1613
1615 parseNamedBit(StringRef Name, OperandVector &Operands,
1616 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1617 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1619 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1620 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1621 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1622 SMLoc &StringLoc);
1623
1624 bool isModifier();
1625 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1626 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1627 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1628 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1629 bool parseSP3NegModifier();
1630 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1631 bool HasLit = false);
1633 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1634 bool HasLit = false);
1635 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1636 bool AllowImm = true);
1637 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1638 bool AllowImm = true);
1639 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1640 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1641 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1642 ParseStatus tryParseIndexKey(OperandVector &Operands,
1643 AMDGPUOperand::ImmTy ImmTy);
1644 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1645 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1646
1647 ParseStatus parseDfmtNfmt(int64_t &Format);
1648 ParseStatus parseUfmt(int64_t &Format);
1649 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1650 int64_t &Format);
1651 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1652 int64_t &Format);
1653 ParseStatus parseFORMAT(OperandVector &Operands);
1654 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1655 ParseStatus parseNumericFormat(int64_t &Format);
1656 ParseStatus parseFlatOffset(OperandVector &Operands);
1657 ParseStatus parseR128A16(OperandVector &Operands);
1659 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1660 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1661
1662 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1663
1664 bool parseCnt(int64_t &IntVal);
1665 ParseStatus parseSWaitCnt(OperandVector &Operands);
1666
1667 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1668 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1669 ParseStatus parseDepCtr(OperandVector &Operands);
1670
1671 bool parseDelay(int64_t &Delay);
1672 ParseStatus parseSDelayALU(OperandVector &Operands);
1673
1674 ParseStatus parseHwreg(OperandVector &Operands);
1675
1676private:
1677 struct OperandInfoTy {
1678 SMLoc Loc;
1679 int64_t Val;
1680 bool IsSymbolic = false;
1681 bool IsDefined = false;
1682
1683 OperandInfoTy(int64_t Val) : Val(Val) {}
1684 };
1685
1686 struct StructuredOpField : OperandInfoTy {
1689 unsigned Width;
1690 bool IsDefined = false;
1691
1692 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1693 int64_t Default)
1694 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1695 virtual ~StructuredOpField() = default;
1696
1697 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1698 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1699 return false;
1700 }
1701
1702 virtual bool validate(AMDGPUAsmParser &Parser) const {
1703 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1704 return Error(Parser, "not supported on this GPU");
1705 if (!isUIntN(Width, Val))
1706 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1707 return true;
1708 }
1709 };
1710
1711 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1712 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1713
1714 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1715 bool validateSendMsg(const OperandInfoTy &Msg,
1716 const OperandInfoTy &Op,
1717 const OperandInfoTy &Stream);
1718
1719 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1720 OperandInfoTy &Width);
1721
1722 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1723 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1724 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1725
1726 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1727 const OperandVector &Operands) const;
1728 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1729 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1730 SMLoc getLitLoc(const OperandVector &Operands,
1731 bool SearchMandatoryLiterals = false) const;
1732 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1733 SMLoc getConstLoc(const OperandVector &Operands) const;
1734 SMLoc getInstLoc(const OperandVector &Operands) const;
1735
1736 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1737 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1738 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1739 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1740 bool validateSOPLiteral(const MCInst &Inst) const;
1741 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1742 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1743 const OperandVector &Operands);
1744 bool validateIntClampSupported(const MCInst &Inst);
1745 bool validateMIMGAtomicDMask(const MCInst &Inst);
1746 bool validateMIMGGatherDMask(const MCInst &Inst);
1747 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1748 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1749 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1750 bool validateMIMGD16(const MCInst &Inst);
1751 bool validateMIMGMSAA(const MCInst &Inst);
1752 bool validateOpSel(const MCInst &Inst);
1753 bool validateNeg(const MCInst &Inst, int OpName);
1754 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1755 bool validateVccOperand(unsigned Reg) const;
1756 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1757 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1758 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1759 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1760 bool validateAGPRLdSt(const MCInst &Inst) const;
1761 bool validateVGPRAlign(const MCInst &Inst) const;
1762 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1763 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1764 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1765 bool validateDivScale(const MCInst &Inst);
1766 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1767 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1768 const SMLoc &IDLoc);
1769 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1770 const unsigned CPol);
1771 bool validateExeczVcczOperands(const OperandVector &Operands);
1772 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1773 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1774 unsigned getConstantBusLimit(unsigned Opcode) const;
1775 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1776 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1777 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1778
1779 bool isSupportedMnemo(StringRef Mnemo,
1780 const FeatureBitset &FBS);
1781 bool isSupportedMnemo(StringRef Mnemo,
1782 const FeatureBitset &FBS,
1783 ArrayRef<unsigned> Variants);
1784 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1785
1786 bool isId(const StringRef Id) const;
1787 bool isId(const AsmToken &Token, const StringRef Id) const;
1788 bool isToken(const AsmToken::TokenKind Kind) const;
1789 StringRef getId() const;
1790 bool trySkipId(const StringRef Id);
1791 bool trySkipId(const StringRef Pref, const StringRef Id);
1792 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1793 bool trySkipToken(const AsmToken::TokenKind Kind);
1794 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1795 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1796 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1797
1798 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1799 AsmToken::TokenKind getTokenKind() const;
1800 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1801 bool parseExpr(OperandVector &Operands);
1802 StringRef getTokenStr() const;
1803 AsmToken peekToken(bool ShouldSkipSpace = true);
1804 AsmToken getToken() const;
1805 SMLoc getLoc() const;
1806 void lex();
1807
1808public:
1809 void onBeginOfFile() override;
1810 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1811
1812 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1813
1814 ParseStatus parseExpTgt(OperandVector &Operands);
1815 ParseStatus parseSendMsg(OperandVector &Operands);
1816 ParseStatus parseInterpSlot(OperandVector &Operands);
1817 ParseStatus parseInterpAttr(OperandVector &Operands);
1818 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1819 ParseStatus parseBoolReg(OperandVector &Operands);
1820
1821 bool parseSwizzleOperand(int64_t &Op,
1822 const unsigned MinVal,
1823 const unsigned MaxVal,
1824 const StringRef ErrMsg,
1825 SMLoc &Loc);
1826 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1827 const unsigned MinVal,
1828 const unsigned MaxVal,
1829 const StringRef ErrMsg);
1830 ParseStatus parseSwizzle(OperandVector &Operands);
1831 bool parseSwizzleOffset(int64_t &Imm);
1832 bool parseSwizzleMacro(int64_t &Imm);
1833 bool parseSwizzleQuadPerm(int64_t &Imm);
1834 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1835 bool parseSwizzleBroadcast(int64_t &Imm);
1836 bool parseSwizzleSwap(int64_t &Imm);
1837 bool parseSwizzleReverse(int64_t &Imm);
1838
1839 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1840 int64_t parseGPRIdxMacro();
1841
1842 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1843 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1844
1845 ParseStatus parseOModSI(OperandVector &Operands);
1846
1847 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1848 OptionalImmIndexMap &OptionalIdx);
1849 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1850 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1851 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1852 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1853
1854 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1855 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1856 OptionalImmIndexMap &OptionalIdx);
1857 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1858 OptionalImmIndexMap &OptionalIdx);
1859
1860 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1861 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1862
1863 bool parseDimId(unsigned &Encoding);
1865 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1867 ParseStatus parseDPPCtrl(OperandVector &Operands);
1868 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1869 int64_t parseDPPCtrlSel(StringRef Ctrl);
1870 int64_t parseDPPCtrlPerm();
1871 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1872 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1873 cvtDPP(Inst, Operands, true);
1874 }
1875 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1876 bool IsDPP8 = false);
1877 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1878 cvtVOP3DPP(Inst, Operands, true);
1879 }
1880
1881 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1882 AMDGPUOperand::ImmTy Type);
1883 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1884 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1885 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1886 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1887 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1888 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1889 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1890 uint64_t BasicInstType,
1891 bool SkipDstVcc = false,
1892 bool SkipSrcVcc = false);
1893
1894 ParseStatus parseEndpgm(OperandVector &Operands);
1895
1897};
1898
1899} // end anonymous namespace
1900
1901// May be called with integer type with equivalent bitwidth.
1902static const fltSemantics *getFltSemantics(unsigned Size) {
1903 switch (Size) {
1904 case 4:
1905 return &APFloat::IEEEsingle();
1906 case 8:
1907 return &APFloat::IEEEdouble();
1908 case 2:
1909 return &APFloat::IEEEhalf();
1910 default:
1911 llvm_unreachable("unsupported fp type");
1912 }
1913}
1914
1916 return getFltSemantics(VT.getSizeInBits() / 8);
1917}
1918
1920 switch (OperandType) {
1921 // When floating-point immediate is used as operand of type i16, the 32-bit
1922 // representation of the constant truncated to the 16 LSBs should be used.
1942 return &APFloat::IEEEsingle();
1948 return &APFloat::IEEEdouble();
1957 return &APFloat::IEEEhalf();
1965 return &APFloat::BFloat();
1966 default:
1967 llvm_unreachable("unsupported fp type");
1968 }
1969}
1970
1971//===----------------------------------------------------------------------===//
1972// Operand
1973//===----------------------------------------------------------------------===//
1974
1975static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1976 bool Lost;
1977
1978 // Convert literal to single precision
1980 APFloat::rmNearestTiesToEven,
1981 &Lost);
1982 // We allow precision lost but not overflow or underflow
1983 if (Status != APFloat::opOK &&
1984 Lost &&
1985 ((Status & APFloat::opOverflow) != 0 ||
1986 (Status & APFloat::opUnderflow) != 0)) {
1987 return false;
1988 }
1989
1990 return true;
1991}
1992
1993static bool isSafeTruncation(int64_t Val, unsigned Size) {
1994 return isUIntN(Size, Val) || isIntN(Size, Val);
1995}
1996
1997static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1998 if (VT.getScalarType() == MVT::i16)
1999 return isInlinableLiteral32(Val, HasInv2Pi);
2000
2001 if (VT.getScalarType() == MVT::f16)
2002 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2003
2004 assert(VT.getScalarType() == MVT::bf16);
2005
2006 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2007}
2008
2009bool AMDGPUOperand::isInlinableImm(MVT type) const {
2010
2011 // This is a hack to enable named inline values like
2012 // shared_base with both 32-bit and 64-bit operands.
2013 // Note that these values are defined as
2014 // 32-bit operands only.
2015 if (isInlineValue()) {
2016 return true;
2017 }
2018
2019 if (!isImmTy(ImmTyNone)) {
2020 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2021 return false;
2022 }
2023 // TODO: We should avoid using host float here. It would be better to
2024 // check the float bit values which is what a few other places do.
2025 // We've had bot failures before due to weird NaN support on mips hosts.
2026
2027 APInt Literal(64, Imm.Val);
2028
2029 if (Imm.IsFPImm) { // We got fp literal token
2030 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2032 AsmParser->hasInv2PiInlineImm());
2033 }
2034
2035 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2036 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2037 return false;
2038
2039 if (type.getScalarSizeInBits() == 16) {
2040 bool Lost = false;
2041 switch (type.getScalarType().SimpleTy) {
2042 default:
2043 llvm_unreachable("unknown 16-bit type");
2044 case MVT::bf16:
2045 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2046 &Lost);
2047 break;
2048 case MVT::f16:
2049 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2050 &Lost);
2051 break;
2052 case MVT::i16:
2053 FPLiteral.convert(APFloatBase::IEEEsingle(),
2054 APFloat::rmNearestTiesToEven, &Lost);
2055 break;
2056 }
2057 // We need to use 32-bit representation here because when a floating-point
2058 // inline constant is used as an i16 operand, its 32-bit representation
2059 // representation will be used. We will need the 32-bit value to check if
2060 // it is FP inline constant.
2061 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2062 return isInlineableLiteralOp16(ImmVal, type,
2063 AsmParser->hasInv2PiInlineImm());
2064 }
2065
2066 // Check if single precision literal is inlinable
2068 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2069 AsmParser->hasInv2PiInlineImm());
2070 }
2071
2072 // We got int literal token.
2073 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2075 AsmParser->hasInv2PiInlineImm());
2076 }
2077
2078 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2079 return false;
2080 }
2081
2082 if (type.getScalarSizeInBits() == 16) {
2084 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2085 type, AsmParser->hasInv2PiInlineImm());
2086 }
2087
2089 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2090 AsmParser->hasInv2PiInlineImm());
2091}
2092
2093bool AMDGPUOperand::isLiteralImm(MVT type) const {
2094 // Check that this immediate can be added as literal
2095 if (!isImmTy(ImmTyNone)) {
2096 return false;
2097 }
2098
2099 if (!Imm.IsFPImm) {
2100 // We got int literal token.
2101
2102 if (type == MVT::f64 && hasFPModifiers()) {
2103 // Cannot apply fp modifiers to int literals preserving the same semantics
2104 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2105 // disable these cases.
2106 return false;
2107 }
2108
2109 unsigned Size = type.getSizeInBits();
2110 if (Size == 64)
2111 Size = 32;
2112
2113 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2114 // types.
2115 return isSafeTruncation(Imm.Val, Size);
2116 }
2117
2118 // We got fp literal token
2119 if (type == MVT::f64) { // Expected 64-bit fp operand
2120 // We would set low 64-bits of literal to zeroes but we accept this literals
2121 return true;
2122 }
2123
2124 if (type == MVT::i64) { // Expected 64-bit int operand
2125 // We don't allow fp literals in 64-bit integer instructions. It is
2126 // unclear how we should encode them.
2127 return false;
2128 }
2129
2130 // We allow fp literals with f16x2 operands assuming that the specified
2131 // literal goes into the lower half and the upper half is zero. We also
2132 // require that the literal may be losslessly converted to f16.
2133 //
2134 // For i16x2 operands, we assume that the specified literal is encoded as a
2135 // single-precision float. This is pretty odd, but it matches SP3 and what
2136 // happens in hardware.
2137 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2138 : (type == MVT::v2i16) ? MVT::f32
2139 : (type == MVT::v2f32) ? MVT::f32
2140 : type;
2141
2142 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2143 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2144}
2145
2146bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2147 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2148}
2149
2150bool AMDGPUOperand::isVRegWithInputMods() const {
2151 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2152 // GFX90A allows DPP on 64-bit operands.
2153 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2154 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2155}
2156
2157template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2158 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2159 : AMDGPU::VGPR_16_Lo128RegClassID);
2160}
2161
2162bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2163 if (AsmParser->isVI())
2164 return isVReg32();
2165 else if (AsmParser->isGFX9Plus())
2166 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2167 else
2168 return false;
2169}
2170
2171bool AMDGPUOperand::isSDWAFP16Operand() const {
2172 return isSDWAOperand(MVT::f16);
2173}
2174
2175bool AMDGPUOperand::isSDWAFP32Operand() const {
2176 return isSDWAOperand(MVT::f32);
2177}
2178
2179bool AMDGPUOperand::isSDWAInt16Operand() const {
2180 return isSDWAOperand(MVT::i16);
2181}
2182
2183bool AMDGPUOperand::isSDWAInt32Operand() const {
2184 return isSDWAOperand(MVT::i32);
2185}
2186
2187bool AMDGPUOperand::isBoolReg() const {
2188 auto FB = AsmParser->getFeatureBits();
2189 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2190 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2191}
2192
2193uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2194{
2195 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2196 assert(Size == 2 || Size == 4 || Size == 8);
2197
2198 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2199
2200 if (Imm.Mods.Abs) {
2201 Val &= ~FpSignMask;
2202 }
2203 if (Imm.Mods.Neg) {
2204 Val ^= FpSignMask;
2205 }
2206
2207 return Val;
2208}
2209
2210void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2211 if (isExpr()) {
2213 return;
2214 }
2215
2216 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2217 Inst.getNumOperands())) {
2218 addLiteralImmOperand(Inst, Imm.Val,
2219 ApplyModifiers &
2220 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2221 } else {
2222 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2224 setImmKindNone();
2225 }
2226}
2227
2228void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2229 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2230 auto OpNum = Inst.getNumOperands();
2231 // Check that this operand accepts literals
2232 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2233
2234 if (ApplyModifiers) {
2235 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2236 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2237 Val = applyInputFPModifiers(Val, Size);
2238 }
2239
2240 APInt Literal(64, Val);
2241 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2242
2243 if (Imm.IsFPImm) { // We got fp literal token
2244 switch (OpTy) {
2250 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2251 AsmParser->hasInv2PiInlineImm())) {
2252 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2253 setImmKindConst();
2254 return;
2255 }
2256
2257 // Non-inlineable
2258 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2259 // For fp operands we check if low 32 bits are zeros
2260 if (Literal.getLoBits(32) != 0) {
2261 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2262 "Can't encode literal as exact 64-bit floating-point operand. "
2263 "Low 32-bits will be set to zero");
2264 Val &= 0xffffffff00000000u;
2265 }
2266
2268 setImmKindLiteral();
2269 return;
2270 }
2271
2272 // We don't allow fp literals in 64-bit integer instructions. It is
2273 // unclear how we should encode them. This case should be checked earlier
2274 // in predicate methods (isLiteralImm())
2275 llvm_unreachable("fp literal in 64-bit integer instruction.");
2276
2284 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2285 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2286 // loss of precision. The constant represents ideomatic fp32 value of
2287 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2288 // bits. Prevent rounding below.
2289 Inst.addOperand(MCOperand::createImm(0x3e22));
2290 setImmKindLiteral();
2291 return;
2292 }
2293 [[fallthrough]];
2294
2322 bool lost;
2323 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2324 // Convert literal to single precision
2325 FPLiteral.convert(*getOpFltSemantics(OpTy),
2326 APFloat::rmNearestTiesToEven, &lost);
2327 // We allow precision lost but not overflow or underflow. This should be
2328 // checked earlier in isLiteralImm()
2329
2330 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2331 Inst.addOperand(MCOperand::createImm(ImmVal));
2332 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2333 setImmKindMandatoryLiteral();
2334 } else {
2335 setImmKindLiteral();
2336 }
2337 return;
2338 }
2339 default:
2340 llvm_unreachable("invalid operand size");
2341 }
2342
2343 return;
2344 }
2345
2346 // We got int literal token.
2347 // Only sign extend inline immediates.
2348 switch (OpTy) {
2364 if (isSafeTruncation(Val, 32) &&
2365 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2366 AsmParser->hasInv2PiInlineImm())) {
2368 setImmKindConst();
2369 return;
2370 }
2371
2372 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2373 setImmKindLiteral();
2374 return;
2375
2381 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2383 setImmKindConst();
2384 return;
2385 }
2386
2387 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2388 : Lo_32(Val);
2389
2391 setImmKindLiteral();
2392 return;
2393
2397 if (isSafeTruncation(Val, 16) &&
2398 AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2399 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2400 setImmKindConst();
2401 return;
2402 }
2403
2404 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2405 setImmKindLiteral();
2406 return;
2407
2412 if (isSafeTruncation(Val, 16) &&
2413 AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2414 AsmParser->hasInv2PiInlineImm())) {
2416 setImmKindConst();
2417 return;
2418 }
2419
2420 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2421 setImmKindLiteral();
2422 return;
2423
2428 if (isSafeTruncation(Val, 16) &&
2429 AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2430 AsmParser->hasInv2PiInlineImm())) {
2432 setImmKindConst();
2433 return;
2434 }
2435
2436 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2437 setImmKindLiteral();
2438 return;
2439
2442 assert(isSafeTruncation(Val, 16));
2443 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2445 return;
2446 }
2449 assert(isSafeTruncation(Val, 16));
2450 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2451 AsmParser->hasInv2PiInlineImm()));
2452
2454 return;
2455 }
2456
2459 assert(isSafeTruncation(Val, 16));
2460 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2461 AsmParser->hasInv2PiInlineImm()));
2462
2464 return;
2465 }
2466
2468 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2469 setImmKindMandatoryLiteral();
2470 return;
2472 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2473 setImmKindMandatoryLiteral();
2474 return;
2475 default:
2476 llvm_unreachable("invalid operand size");
2477 }
2478}
2479
2480void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2481 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2482}
2483
2484bool AMDGPUOperand::isInlineValue() const {
2485 return isRegKind() && ::isInlineValue(getReg());
2486}
2487
2488//===----------------------------------------------------------------------===//
2489// AsmParser
2490//===----------------------------------------------------------------------===//
2491
2492static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2493 if (Is == IS_VGPR) {
2494 switch (RegWidth) {
2495 default: return -1;
2496 case 32:
2497 return AMDGPU::VGPR_32RegClassID;
2498 case 64:
2499 return AMDGPU::VReg_64RegClassID;
2500 case 96:
2501 return AMDGPU::VReg_96RegClassID;
2502 case 128:
2503 return AMDGPU::VReg_128RegClassID;
2504 case 160:
2505 return AMDGPU::VReg_160RegClassID;
2506 case 192:
2507 return AMDGPU::VReg_192RegClassID;
2508 case 224:
2509 return AMDGPU::VReg_224RegClassID;
2510 case 256:
2511 return AMDGPU::VReg_256RegClassID;
2512 case 288:
2513 return AMDGPU::VReg_288RegClassID;
2514 case 320:
2515 return AMDGPU::VReg_320RegClassID;
2516 case 352:
2517 return AMDGPU::VReg_352RegClassID;
2518 case 384:
2519 return AMDGPU::VReg_384RegClassID;
2520 case 512:
2521 return AMDGPU::VReg_512RegClassID;
2522 case 1024:
2523 return AMDGPU::VReg_1024RegClassID;
2524 }
2525 } else if (Is == IS_TTMP) {
2526 switch (RegWidth) {
2527 default: return -1;
2528 case 32:
2529 return AMDGPU::TTMP_32RegClassID;
2530 case 64:
2531 return AMDGPU::TTMP_64RegClassID;
2532 case 128:
2533 return AMDGPU::TTMP_128RegClassID;
2534 case 256:
2535 return AMDGPU::TTMP_256RegClassID;
2536 case 512:
2537 return AMDGPU::TTMP_512RegClassID;
2538 }
2539 } else if (Is == IS_SGPR) {
2540 switch (RegWidth) {
2541 default: return -1;
2542 case 32:
2543 return AMDGPU::SGPR_32RegClassID;
2544 case 64:
2545 return AMDGPU::SGPR_64RegClassID;
2546 case 96:
2547 return AMDGPU::SGPR_96RegClassID;
2548 case 128:
2549 return AMDGPU::SGPR_128RegClassID;
2550 case 160:
2551 return AMDGPU::SGPR_160RegClassID;
2552 case 192:
2553 return AMDGPU::SGPR_192RegClassID;
2554 case 224:
2555 return AMDGPU::SGPR_224RegClassID;
2556 case 256:
2557 return AMDGPU::SGPR_256RegClassID;
2558 case 288:
2559 return AMDGPU::SGPR_288RegClassID;
2560 case 320:
2561 return AMDGPU::SGPR_320RegClassID;
2562 case 352:
2563 return AMDGPU::SGPR_352RegClassID;
2564 case 384:
2565 return AMDGPU::SGPR_384RegClassID;
2566 case 512:
2567 return AMDGPU::SGPR_512RegClassID;
2568 }
2569 } else if (Is == IS_AGPR) {
2570 switch (RegWidth) {
2571 default: return -1;
2572 case 32:
2573 return AMDGPU::AGPR_32RegClassID;
2574 case 64:
2575 return AMDGPU::AReg_64RegClassID;
2576 case 96:
2577 return AMDGPU::AReg_96RegClassID;
2578 case 128:
2579 return AMDGPU::AReg_128RegClassID;
2580 case 160:
2581 return AMDGPU::AReg_160RegClassID;
2582 case 192:
2583 return AMDGPU::AReg_192RegClassID;
2584 case 224:
2585 return AMDGPU::AReg_224RegClassID;
2586 case 256:
2587 return AMDGPU::AReg_256RegClassID;
2588 case 288:
2589 return AMDGPU::AReg_288RegClassID;
2590 case 320:
2591 return AMDGPU::AReg_320RegClassID;
2592 case 352:
2593 return AMDGPU::AReg_352RegClassID;
2594 case 384:
2595 return AMDGPU::AReg_384RegClassID;
2596 case 512:
2597 return AMDGPU::AReg_512RegClassID;
2598 case 1024:
2599 return AMDGPU::AReg_1024RegClassID;
2600 }
2601 }
2602 return -1;
2603}
2604
2607 .Case("exec", AMDGPU::EXEC)
2608 .Case("vcc", AMDGPU::VCC)
2609 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2610 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2611 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2612 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2613 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2614 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2615 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2616 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2617 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2618 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2619 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2620 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2621 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2622 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2623 .Case("m0", AMDGPU::M0)
2624 .Case("vccz", AMDGPU::SRC_VCCZ)
2625 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2626 .Case("execz", AMDGPU::SRC_EXECZ)
2627 .Case("src_execz", AMDGPU::SRC_EXECZ)
2628 .Case("scc", AMDGPU::SRC_SCC)
2629 .Case("src_scc", AMDGPU::SRC_SCC)
2630 .Case("tba", AMDGPU::TBA)
2631 .Case("tma", AMDGPU::TMA)
2632 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2633 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2634 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2635 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2636 .Case("vcc_lo", AMDGPU::VCC_LO)
2637 .Case("vcc_hi", AMDGPU::VCC_HI)
2638 .Case("exec_lo", AMDGPU::EXEC_LO)
2639 .Case("exec_hi", AMDGPU::EXEC_HI)
2640 .Case("tma_lo", AMDGPU::TMA_LO)
2641 .Case("tma_hi", AMDGPU::TMA_HI)
2642 .Case("tba_lo", AMDGPU::TBA_LO)
2643 .Case("tba_hi", AMDGPU::TBA_HI)
2644 .Case("pc", AMDGPU::PC_REG)
2645 .Case("null", AMDGPU::SGPR_NULL)
2646 .Default(AMDGPU::NoRegister);
2647}
2648
2649bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2650 SMLoc &EndLoc, bool RestoreOnFailure) {
2651 auto R = parseRegister();
2652 if (!R) return true;
2653 assert(R->isReg());
2654 RegNo = R->getReg();
2655 StartLoc = R->getStartLoc();
2656 EndLoc = R->getEndLoc();
2657 return false;
2658}
2659
2660bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2661 SMLoc &EndLoc) {
2662 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2663}
2664
2665ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2666 SMLoc &EndLoc) {
2667 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2668 bool PendingErrors = getParser().hasPendingError();
2669 getParser().clearPendingErrors();
2670 if (PendingErrors)
2671 return ParseStatus::Failure;
2672 if (Result)
2673 return ParseStatus::NoMatch;
2674 return ParseStatus::Success;
2675}
2676
2677bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2678 RegisterKind RegKind, unsigned Reg1,
2679 SMLoc Loc) {
2680 switch (RegKind) {
2681 case IS_SPECIAL:
2682 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2683 Reg = AMDGPU::EXEC;
2684 RegWidth = 64;
2685 return true;
2686 }
2687 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2688 Reg = AMDGPU::FLAT_SCR;
2689 RegWidth = 64;
2690 return true;
2691 }
2692 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2693 Reg = AMDGPU::XNACK_MASK;
2694 RegWidth = 64;
2695 return true;
2696 }
2697 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2698 Reg = AMDGPU::VCC;
2699 RegWidth = 64;
2700 return true;
2701 }
2702 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2703 Reg = AMDGPU::TBA;
2704 RegWidth = 64;
2705 return true;
2706 }
2707 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2708 Reg = AMDGPU::TMA;
2709 RegWidth = 64;
2710 return true;
2711 }
2712 Error(Loc, "register does not fit in the list");
2713 return false;
2714 case IS_VGPR:
2715 case IS_SGPR:
2716 case IS_AGPR:
2717 case IS_TTMP:
2718 if (Reg1 != Reg + RegWidth / 32) {
2719 Error(Loc, "registers in a list must have consecutive indices");
2720 return false;
2721 }
2722 RegWidth += 32;
2723 return true;
2724 default:
2725 llvm_unreachable("unexpected register kind");
2726 }
2727}
2728
2729struct RegInfo {
2731 RegisterKind Kind;
2732};
2733
2734static constexpr RegInfo RegularRegisters[] = {
2735 {{"v"}, IS_VGPR},
2736 {{"s"}, IS_SGPR},
2737 {{"ttmp"}, IS_TTMP},
2738 {{"acc"}, IS_AGPR},
2739 {{"a"}, IS_AGPR},
2740};
2741
2742static bool isRegularReg(RegisterKind Kind) {
2743 return Kind == IS_VGPR ||
2744 Kind == IS_SGPR ||
2745 Kind == IS_TTMP ||
2746 Kind == IS_AGPR;
2747}
2748
2750 for (const RegInfo &Reg : RegularRegisters)
2751 if (Str.starts_with(Reg.Name))
2752 return &Reg;
2753 return nullptr;
2754}
2755
2756static bool getRegNum(StringRef Str, unsigned& Num) {
2757 return !Str.getAsInteger(10, Num);
2758}
2759
2760bool
2761AMDGPUAsmParser::isRegister(const AsmToken &Token,
2762 const AsmToken &NextToken) const {
2763
2764 // A list of consecutive registers: [s0,s1,s2,s3]
2765 if (Token.is(AsmToken::LBrac))
2766 return true;
2767
2768 if (!Token.is(AsmToken::Identifier))
2769 return false;
2770
2771 // A single register like s0 or a range of registers like s[0:1]
2772
2773 StringRef Str = Token.getString();
2774 const RegInfo *Reg = getRegularRegInfo(Str);
2775 if (Reg) {
2776 StringRef RegName = Reg->Name;
2777 StringRef RegSuffix = Str.substr(RegName.size());
2778 if (!RegSuffix.empty()) {
2779 RegSuffix.consume_back(".l");
2780 RegSuffix.consume_back(".h");
2781 unsigned Num;
2782 // A single register with an index: rXX
2783 if (getRegNum(RegSuffix, Num))
2784 return true;
2785 } else {
2786 // A range of registers: r[XX:YY].
2787 if (NextToken.is(AsmToken::LBrac))
2788 return true;
2789 }
2790 }
2791
2792 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2793}
2794
2795bool
2796AMDGPUAsmParser::isRegister()
2797{
2798 return isRegister(getToken(), peekToken());
2799}
2800
2801unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2802 unsigned SubReg, unsigned RegWidth,
2803 SMLoc Loc) {
2804 assert(isRegularReg(RegKind));
2805
2806 unsigned AlignSize = 1;
2807 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2808 // SGPR and TTMP registers must be aligned.
2809 // Max required alignment is 4 dwords.
2810 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2811 }
2812
2813 if (RegNum % AlignSize != 0) {
2814 Error(Loc, "invalid register alignment");
2815 return AMDGPU::NoRegister;
2816 }
2817
2818 unsigned RegIdx = RegNum / AlignSize;
2819 int RCID = getRegClass(RegKind, RegWidth);
2820 if (RCID == -1) {
2821 Error(Loc, "invalid or unsupported register size");
2822 return AMDGPU::NoRegister;
2823 }
2824
2825 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2826 const MCRegisterClass RC = TRI->getRegClass(RCID);
2827 if (RegIdx >= RC.getNumRegs()) {
2828 Error(Loc, "register index is out of range");
2829 return AMDGPU::NoRegister;
2830 }
2831
2832 unsigned Reg = RC.getRegister(RegIdx);
2833
2834 if (SubReg) {
2835 Reg = TRI->getSubReg(Reg, SubReg);
2836
2837 // Currently all regular registers have their .l and .h subregisters, so
2838 // we should never need to generate an error here.
2839 assert(Reg && "Invalid subregister!");
2840 }
2841
2842 return Reg;
2843}
2844
2845bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2846 int64_t RegLo, RegHi;
2847 if (!skipToken(AsmToken::LBrac, "missing register index"))
2848 return false;
2849
2850 SMLoc FirstIdxLoc = getLoc();
2851 SMLoc SecondIdxLoc;
2852
2853 if (!parseExpr(RegLo))
2854 return false;
2855
2856 if (trySkipToken(AsmToken::Colon)) {
2857 SecondIdxLoc = getLoc();
2858 if (!parseExpr(RegHi))
2859 return false;
2860 } else {
2861 RegHi = RegLo;
2862 }
2863
2864 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2865 return false;
2866
2867 if (!isUInt<32>(RegLo)) {
2868 Error(FirstIdxLoc, "invalid register index");
2869 return false;
2870 }
2871
2872 if (!isUInt<32>(RegHi)) {
2873 Error(SecondIdxLoc, "invalid register index");
2874 return false;
2875 }
2876
2877 if (RegLo > RegHi) {
2878 Error(FirstIdxLoc, "first register index should not exceed second index");
2879 return false;
2880 }
2881
2882 Num = static_cast<unsigned>(RegLo);
2883 RegWidth = 32 * ((RegHi - RegLo) + 1);
2884 return true;
2885}
2886
2887unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2888 unsigned &RegNum, unsigned &RegWidth,
2889 SmallVectorImpl<AsmToken> &Tokens) {
2890 assert(isToken(AsmToken::Identifier));
2891 unsigned Reg = getSpecialRegForName(getTokenStr());
2892 if (Reg) {
2893 RegNum = 0;
2894 RegWidth = 32;
2895 RegKind = IS_SPECIAL;
2896 Tokens.push_back(getToken());
2897 lex(); // skip register name
2898 }
2899 return Reg;
2900}
2901
2902unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2903 unsigned &RegNum, unsigned &RegWidth,
2904 SmallVectorImpl<AsmToken> &Tokens) {
2905 assert(isToken(AsmToken::Identifier));
2906 StringRef RegName = getTokenStr();
2907 auto Loc = getLoc();
2908
2909 const RegInfo *RI = getRegularRegInfo(RegName);
2910 if (!RI) {
2911 Error(Loc, "invalid register name");
2912 return AMDGPU::NoRegister;
2913 }
2914
2915 Tokens.push_back(getToken());
2916 lex(); // skip register name
2917
2918 RegKind = RI->Kind;
2919 StringRef RegSuffix = RegName.substr(RI->Name.size());
2920 unsigned SubReg = NoSubRegister;
2921 if (!RegSuffix.empty()) {
2922 // We don't know the opcode till we are done parsing, so we don't know if
2923 // registers should be 16 or 32 bit. It is therefore mandatory to put .l or
2924 // .h to correctly specify 16 bit registers. We also can't determine class
2925 // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16.
2926 if (RegSuffix.consume_back(".l"))
2927 SubReg = AMDGPU::lo16;
2928 else if (RegSuffix.consume_back(".h"))
2929 SubReg = AMDGPU::hi16;
2930
2931 // Single 32-bit register: vXX.
2932 if (!getRegNum(RegSuffix, RegNum)) {
2933 Error(Loc, "invalid register index");
2934 return AMDGPU::NoRegister;
2935 }
2936 RegWidth = 32;
2937 } else {
2938 // Range of registers: v[XX:YY]. ":YY" is optional.
2939 if (!ParseRegRange(RegNum, RegWidth))
2940 return AMDGPU::NoRegister;
2941 }
2942
2943 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2944}
2945
2946unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2947 unsigned &RegWidth,
2948 SmallVectorImpl<AsmToken> &Tokens) {
2949 unsigned Reg = AMDGPU::NoRegister;
2950 auto ListLoc = getLoc();
2951
2952 if (!skipToken(AsmToken::LBrac,
2953 "expected a register or a list of registers")) {
2954 return AMDGPU::NoRegister;
2955 }
2956
2957 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2958
2959 auto Loc = getLoc();
2960 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2961 return AMDGPU::NoRegister;
2962 if (RegWidth != 32) {
2963 Error(Loc, "expected a single 32-bit register");
2964 return AMDGPU::NoRegister;
2965 }
2966
2967 for (; trySkipToken(AsmToken::Comma); ) {
2968 RegisterKind NextRegKind;
2969 unsigned NextReg, NextRegNum, NextRegWidth;
2970 Loc = getLoc();
2971
2972 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2973 NextRegNum, NextRegWidth,
2974 Tokens)) {
2975 return AMDGPU::NoRegister;
2976 }
2977 if (NextRegWidth != 32) {
2978 Error(Loc, "expected a single 32-bit register");
2979 return AMDGPU::NoRegister;
2980 }
2981 if (NextRegKind != RegKind) {
2982 Error(Loc, "registers in a list must be of the same kind");
2983 return AMDGPU::NoRegister;
2984 }
2985 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2986 return AMDGPU::NoRegister;
2987 }
2988
2989 if (!skipToken(AsmToken::RBrac,
2990 "expected a comma or a closing square bracket")) {
2991 return AMDGPU::NoRegister;
2992 }
2993
2994 if (isRegularReg(RegKind))
2995 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
2996
2997 return Reg;
2998}
2999
3000bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3001 unsigned &RegNum, unsigned &RegWidth,
3002 SmallVectorImpl<AsmToken> &Tokens) {
3003 auto Loc = getLoc();
3004 Reg = AMDGPU::NoRegister;
3005
3006 if (isToken(AsmToken::Identifier)) {
3007 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3008 if (Reg == AMDGPU::NoRegister)
3009 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3010 } else {
3011 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3012 }
3013
3014 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3015 if (Reg == AMDGPU::NoRegister) {
3016 assert(Parser.hasPendingError());
3017 return false;
3018 }
3019
3020 if (!subtargetHasRegister(*TRI, Reg)) {
3021 if (Reg == AMDGPU::SGPR_NULL) {
3022 Error(Loc, "'null' operand is not supported on this GPU");
3023 } else {
3024 Error(Loc, "register not available on this GPU");
3025 }
3026 return false;
3027 }
3028
3029 return true;
3030}
3031
3032bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3033 unsigned &RegNum, unsigned &RegWidth,
3034 bool RestoreOnFailure /*=false*/) {
3035 Reg = AMDGPU::NoRegister;
3036
3038 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3039 if (RestoreOnFailure) {
3040 while (!Tokens.empty()) {
3041 getLexer().UnLex(Tokens.pop_back_val());
3042 }
3043 }
3044 return true;
3045 }
3046 return false;
3047}
3048
3049std::optional<StringRef>
3050AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3051 switch (RegKind) {
3052 case IS_VGPR:
3053 return StringRef(".amdgcn.next_free_vgpr");
3054 case IS_SGPR:
3055 return StringRef(".amdgcn.next_free_sgpr");
3056 default:
3057 return std::nullopt;
3058 }
3059}
3060
3061void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3062 auto SymbolName = getGprCountSymbolName(RegKind);
3063 assert(SymbolName && "initializing invalid register kind");
3064 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3065 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
3066}
3067
3068bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3069 unsigned DwordRegIndex,
3070 unsigned RegWidth) {
3071 // Symbols are only defined for GCN targets
3072 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3073 return true;
3074
3075 auto SymbolName = getGprCountSymbolName(RegKind);
3076 if (!SymbolName)
3077 return true;
3078 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3079
3080 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3081 int64_t OldCount;
3082
3083 if (!Sym->isVariable())
3084 return !Error(getLoc(),
3085 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3086 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
3087 return !Error(
3088 getLoc(),
3089 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3090
3091 if (OldCount <= NewMax)
3092 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
3093
3094 return true;
3095}
3096
3097std::unique_ptr<AMDGPUOperand>
3098AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3099 const auto &Tok = getToken();
3100 SMLoc StartLoc = Tok.getLoc();
3101 SMLoc EndLoc = Tok.getEndLoc();
3102 RegisterKind RegKind;
3103 unsigned Reg, RegNum, RegWidth;
3104
3105 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3106 return nullptr;
3107 }
3108 if (isHsaAbi(getSTI())) {
3109 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3110 return nullptr;
3111 } else
3112 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3113 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3114}
3115
3116ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3117 bool HasSP3AbsModifier, bool HasLit) {
3118 // TODO: add syntactic sugar for 1/(2*PI)
3119
3120 if (isRegister())
3121 return ParseStatus::NoMatch;
3122 assert(!isModifier());
3123
3124 if (!HasLit) {
3125 HasLit = trySkipId("lit");
3126 if (HasLit) {
3127 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3128 return ParseStatus::Failure;
3129 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
3130 if (S.isSuccess() &&
3131 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3132 return ParseStatus::Failure;
3133 return S;
3134 }
3135 }
3136
3137 const auto& Tok = getToken();
3138 const auto& NextTok = peekToken();
3139 bool IsReal = Tok.is(AsmToken::Real);
3140 SMLoc S = getLoc();
3141 bool Negate = false;
3142
3143 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3144 lex();
3145 IsReal = true;
3146 Negate = true;
3147 }
3148
3149 AMDGPUOperand::Modifiers Mods;
3150 Mods.Lit = HasLit;
3151
3152 if (IsReal) {
3153 // Floating-point expressions are not supported.
3154 // Can only allow floating-point literals with an
3155 // optional sign.
3156
3157 StringRef Num = getTokenStr();
3158 lex();
3159
3160 APFloat RealVal(APFloat::IEEEdouble());
3161 auto roundMode = APFloat::rmNearestTiesToEven;
3162 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3163 return ParseStatus::Failure;
3164 if (Negate)
3165 RealVal.changeSign();
3166
3167 Operands.push_back(
3168 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3169 AMDGPUOperand::ImmTyNone, true));
3170 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3171 Op.setModifiers(Mods);
3172
3173 return ParseStatus::Success;
3174
3175 } else {
3176 int64_t IntVal;
3177 const MCExpr *Expr;
3178 SMLoc S = getLoc();
3179
3180 if (HasSP3AbsModifier) {
3181 // This is a workaround for handling expressions
3182 // as arguments of SP3 'abs' modifier, for example:
3183 // |1.0|
3184 // |-1|
3185 // |1+x|
3186 // This syntax is not compatible with syntax of standard
3187 // MC expressions (due to the trailing '|').
3188 SMLoc EndLoc;
3189 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3190 return ParseStatus::Failure;
3191 } else {
3192 if (Parser.parseExpression(Expr))
3193 return ParseStatus::Failure;
3194 }
3195
3196 if (Expr->evaluateAsAbsolute(IntVal)) {
3197 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3198 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3199 Op.setModifiers(Mods);
3200 } else {
3201 if (HasLit)
3202 return ParseStatus::NoMatch;
3203 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3204 }
3205
3206 return ParseStatus::Success;
3207 }
3208
3209 return ParseStatus::NoMatch;
3210}
3211
3212ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3213 if (!isRegister())
3214 return ParseStatus::NoMatch;
3215
3216 if (auto R = parseRegister()) {
3217 assert(R->isReg());
3218 Operands.push_back(std::move(R));
3219 return ParseStatus::Success;
3220 }
3221 return ParseStatus::Failure;
3222}
3223
3224ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3225 bool HasSP3AbsMod, bool HasLit) {
3226 ParseStatus Res = parseReg(Operands);
3227 if (!Res.isNoMatch())
3228 return Res;
3229 if (isModifier())
3230 return ParseStatus::NoMatch;
3231 return parseImm(Operands, HasSP3AbsMod, HasLit);
3232}
3233
3234bool
3235AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3236 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3237 const auto &str = Token.getString();
3238 return str == "abs" || str == "neg" || str == "sext";
3239 }
3240 return false;
3241}
3242
3243bool
3244AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3245 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3246}
3247
3248bool
3249AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3250 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3251}
3252
3253bool
3254AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3255 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3256}
3257
3258// Check if this is an operand modifier or an opcode modifier
3259// which may look like an expression but it is not. We should
3260// avoid parsing these modifiers as expressions. Currently
3261// recognized sequences are:
3262// |...|
3263// abs(...)
3264// neg(...)
3265// sext(...)
3266// -reg
3267// -|...|
3268// -abs(...)
3269// name:...
3270//
3271bool
3272AMDGPUAsmParser::isModifier() {
3273
3274 AsmToken Tok = getToken();
3275 AsmToken NextToken[2];
3276 peekTokens(NextToken);
3277
3278 return isOperandModifier(Tok, NextToken[0]) ||
3279 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3280 isOpcodeModifierWithVal(Tok, NextToken[0]);
3281}
3282
3283// Check if the current token is an SP3 'neg' modifier.
3284// Currently this modifier is allowed in the following context:
3285//
3286// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3287// 2. Before an 'abs' modifier: -abs(...)
3288// 3. Before an SP3 'abs' modifier: -|...|
3289//
3290// In all other cases "-" is handled as a part
3291// of an expression that follows the sign.
3292//
3293// Note: When "-" is followed by an integer literal,
3294// this is interpreted as integer negation rather
3295// than a floating-point NEG modifier applied to N.
3296// Beside being contr-intuitive, such use of floating-point
3297// NEG modifier would have resulted in different meaning
3298// of integer literals used with VOP1/2/C and VOP3,
3299// for example:
3300// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3301// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3302// Negative fp literals with preceding "-" are
3303// handled likewise for uniformity
3304//
3305bool
3306AMDGPUAsmParser::parseSP3NegModifier() {
3307
3308 AsmToken NextToken[2];
3309 peekTokens(NextToken);
3310
3311 if (isToken(AsmToken::Minus) &&
3312 (isRegister(NextToken[0], NextToken[1]) ||
3313 NextToken[0].is(AsmToken::Pipe) ||
3314 isId(NextToken[0], "abs"))) {
3315 lex();
3316 return true;
3317 }
3318
3319 return false;
3320}
3321
3323AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3324 bool AllowImm) {
3325 bool Neg, SP3Neg;
3326 bool Abs, SP3Abs;
3327 bool Lit;
3328 SMLoc Loc;
3329
3330 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3331 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3332 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3333
3334 SP3Neg = parseSP3NegModifier();
3335
3336 Loc = getLoc();
3337 Neg = trySkipId("neg");
3338 if (Neg && SP3Neg)
3339 return Error(Loc, "expected register or immediate");
3340 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3341 return ParseStatus::Failure;
3342
3343 Abs = trySkipId("abs");
3344 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3345 return ParseStatus::Failure;
3346
3347 Lit = trySkipId("lit");
3348 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3349 return ParseStatus::Failure;
3350
3351 Loc = getLoc();
3352 SP3Abs = trySkipToken(AsmToken::Pipe);
3353 if (Abs && SP3Abs)
3354 return Error(Loc, "expected register or immediate");
3355
3356 ParseStatus Res;
3357 if (AllowImm) {
3358 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3359 } else {
3360 Res = parseReg(Operands);
3361 }
3362 if (!Res.isSuccess())
3363 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3364
3365 if (Lit && !Operands.back()->isImm())
3366 Error(Loc, "expected immediate with lit modifier");
3367
3368 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3369 return ParseStatus::Failure;
3370 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3371 return ParseStatus::Failure;
3372 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3373 return ParseStatus::Failure;
3374 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3375 return ParseStatus::Failure;
3376
3377 AMDGPUOperand::Modifiers Mods;
3378 Mods.Abs = Abs || SP3Abs;
3379 Mods.Neg = Neg || SP3Neg;
3380 Mods.Lit = Lit;
3381
3382 if (Mods.hasFPModifiers() || Lit) {
3383 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3384 if (Op.isExpr())
3385 return Error(Op.getStartLoc(), "expected an absolute expression");
3386 Op.setModifiers(Mods);
3387 }
3388 return ParseStatus::Success;
3389}
3390
3392AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3393 bool AllowImm) {
3394 bool Sext = trySkipId("sext");
3395 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3396 return ParseStatus::Failure;
3397
3398 ParseStatus Res;
3399 if (AllowImm) {
3400 Res = parseRegOrImm(Operands);
3401 } else {
3402 Res = parseReg(Operands);
3403 }
3404 if (!Res.isSuccess())
3405 return Sext ? ParseStatus::Failure : Res;
3406
3407 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3408 return ParseStatus::Failure;
3409
3410 AMDGPUOperand::Modifiers Mods;
3411 Mods.Sext = Sext;
3412
3413 if (Mods.hasIntModifiers()) {
3414 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3415 if (Op.isExpr())
3416 return Error(Op.getStartLoc(), "expected an absolute expression");
3417 Op.setModifiers(Mods);
3418 }
3419
3420 return ParseStatus::Success;
3421}
3422
3423ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3424 return parseRegOrImmWithFPInputMods(Operands, false);
3425}
3426
3427ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3428 return parseRegOrImmWithIntInputMods(Operands, false);
3429}
3430
3431ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3432 auto Loc = getLoc();
3433 if (trySkipId("off")) {
3434 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3435 AMDGPUOperand::ImmTyOff, false));
3436 return ParseStatus::Success;
3437 }
3438
3439 if (!isRegister())
3440 return ParseStatus::NoMatch;
3441
3442 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3443 if (Reg) {
3444 Operands.push_back(std::move(Reg));
3445 return ParseStatus::Success;
3446 }
3447
3448 return ParseStatus::Failure;
3449}
3450
3451unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3452 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3453
3454 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3455 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3456 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3457 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3458 return Match_InvalidOperand;
3459
3460 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3461 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3462 // v_mac_f32/16 allow only dst_sel == DWORD;
3463 auto OpNum =
3464 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3465 const auto &Op = Inst.getOperand(OpNum);
3466 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3467 return Match_InvalidOperand;
3468 }
3469 }
3470
3471 return Match_Success;
3472}
3473
3475 static const unsigned Variants[] = {
3479 };
3480
3481 return ArrayRef(Variants);
3482}
3483
3484// What asm variants we should check
3485ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3486 if (isForcedDPP() && isForcedVOP3()) {
3487 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3488 return ArrayRef(Variants);
3489 }
3490 if (getForcedEncodingSize() == 32) {
3491 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3492 return ArrayRef(Variants);
3493 }
3494
3495 if (isForcedVOP3()) {
3496 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3497 return ArrayRef(Variants);
3498 }
3499
3500 if (isForcedSDWA()) {
3501 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3503 return ArrayRef(Variants);
3504 }
3505
3506 if (isForcedDPP()) {
3507 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3508 return ArrayRef(Variants);
3509 }
3510
3511 return getAllVariants();
3512}
3513
3514StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3515 if (isForcedDPP() && isForcedVOP3())
3516 return "e64_dpp";
3517
3518 if (getForcedEncodingSize() == 32)
3519 return "e32";
3520
3521 if (isForcedVOP3())
3522 return "e64";
3523
3524 if (isForcedSDWA())
3525 return "sdwa";
3526
3527 if (isForcedDPP())
3528 return "dpp";
3529
3530 return "";
3531}
3532
3533unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3534 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3535 for (MCPhysReg Reg : Desc.implicit_uses()) {
3536 switch (Reg) {
3537 case AMDGPU::FLAT_SCR:
3538 case AMDGPU::VCC:
3539 case AMDGPU::VCC_LO:
3540 case AMDGPU::VCC_HI:
3541 case AMDGPU::M0:
3542 return Reg;
3543 default:
3544 break;
3545 }
3546 }
3547 return AMDGPU::NoRegister;
3548}
3549
3550// NB: This code is correct only when used to check constant
3551// bus limitations because GFX7 support no f16 inline constants.
3552// Note that there are no cases when a GFX7 opcode violates
3553// constant bus limitations due to the use of an f16 constant.
3554bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3555 unsigned OpIdx) const {
3556 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3557
3558 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3559 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3560 return false;
3561 }
3562
3563 const MCOperand &MO = Inst.getOperand(OpIdx);
3564
3565 int64_t Val = MO.getImm();
3566 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3567
3568 switch (OpSize) { // expected operand size
3569 case 8:
3570 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3571 case 4:
3572 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3573 case 2: {
3574 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3578 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3579
3584
3589
3594
3599 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3600
3605 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3606
3607 llvm_unreachable("invalid operand type");
3608 }
3609 default:
3610 llvm_unreachable("invalid operand size");
3611 }
3612}
3613
3614unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3615 if (!isGFX10Plus())
3616 return 1;
3617
3618 switch (Opcode) {
3619 // 64-bit shift instructions can use only one scalar value input
3620 case AMDGPU::V_LSHLREV_B64_e64:
3621 case AMDGPU::V_LSHLREV_B64_gfx10:
3622 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3623 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3624 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3625 case AMDGPU::V_LSHRREV_B64_e64:
3626 case AMDGPU::V_LSHRREV_B64_gfx10:
3627 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3628 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3629 case AMDGPU::V_ASHRREV_I64_e64:
3630 case AMDGPU::V_ASHRREV_I64_gfx10:
3631 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3632 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3633 case AMDGPU::V_LSHL_B64_e64:
3634 case AMDGPU::V_LSHR_B64_e64:
3635 case AMDGPU::V_ASHR_I64_e64:
3636 return 1;
3637 default:
3638 return 2;
3639 }
3640}
3641
3642constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3644
3645// Get regular operand indices in the same order as specified
3646// in the instruction (but append mandatory literals to the end).
3648 bool AddMandatoryLiterals = false) {
3649
3650 int16_t ImmIdx =
3651 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3652
3653 if (isVOPD(Opcode)) {
3654 int16_t ImmDeferredIdx =
3655 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3656 : -1;
3657
3658 return {getNamedOperandIdx(Opcode, OpName::src0X),
3659 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3660 getNamedOperandIdx(Opcode, OpName::src0Y),
3661 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3662 ImmDeferredIdx,
3663 ImmIdx};
3664 }
3665
3666 return {getNamedOperandIdx(Opcode, OpName::src0),
3667 getNamedOperandIdx(Opcode, OpName::src1),
3668 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3669}
3670
3671bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3672 const MCOperand &MO = Inst.getOperand(OpIdx);
3673 if (MO.isImm()) {
3674 return !isInlineConstant(Inst, OpIdx);
3675 } else if (MO.isReg()) {
3676 auto Reg = MO.getReg();
3677 if (!Reg) {
3678 return false;
3679 }
3680 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3681 auto PReg = mc2PseudoReg(Reg);
3682 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3683 } else {
3684 return true;
3685 }
3686}
3687
3688// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3689// Writelane is special in that it can use SGPR and M0 (which would normally
3690// count as using the constant bus twice - but in this case it is allowed since
3691// the lane selector doesn't count as a use of the constant bus). However, it is
3692// still required to abide by the 1 SGPR rule.
3693static bool checkWriteLane(const MCInst &Inst) {
3694 const unsigned Opcode = Inst.getOpcode();
3695 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3696 return false;
3697 const MCOperand &LaneSelOp = Inst.getOperand(2);
3698 if (!LaneSelOp.isReg())
3699 return false;
3700 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3701 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3702}
3703
3704bool AMDGPUAsmParser::validateConstantBusLimitations(
3705 const MCInst &Inst, const OperandVector &Operands) {
3706 const unsigned Opcode = Inst.getOpcode();
3707 const MCInstrDesc &Desc = MII.get(Opcode);
3708 unsigned LastSGPR = AMDGPU::NoRegister;
3709 unsigned ConstantBusUseCount = 0;
3710 unsigned NumLiterals = 0;
3711 unsigned LiteralSize;
3712
3713 if (!(Desc.TSFlags &
3716 !isVOPD(Opcode))
3717 return true;
3718
3719 if (checkWriteLane(Inst))
3720 return true;
3721
3722 // Check special imm operands (used by madmk, etc)
3723 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3724 ++NumLiterals;
3725 LiteralSize = 4;
3726 }
3727
3728 SmallDenseSet<unsigned> SGPRsUsed;
3729 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3730 if (SGPRUsed != AMDGPU::NoRegister) {
3731 SGPRsUsed.insert(SGPRUsed);
3732 ++ConstantBusUseCount;
3733 }
3734
3735 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3736
3737 for (int OpIdx : OpIndices) {
3738 if (OpIdx == -1)
3739 continue;
3740
3741 const MCOperand &MO = Inst.getOperand(OpIdx);
3742 if (usesConstantBus(Inst, OpIdx)) {
3743 if (MO.isReg()) {
3744 LastSGPR = mc2PseudoReg(MO.getReg());
3745 // Pairs of registers with a partial intersections like these
3746 // s0, s[0:1]
3747 // flat_scratch_lo, flat_scratch
3748 // flat_scratch_lo, flat_scratch_hi
3749 // are theoretically valid but they are disabled anyway.
3750 // Note that this code mimics SIInstrInfo::verifyInstruction
3751 if (SGPRsUsed.insert(LastSGPR).second) {
3752 ++ConstantBusUseCount;
3753 }
3754 } else { // Expression or a literal
3755
3756 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3757 continue; // special operand like VINTERP attr_chan
3758
3759 // An instruction may use only one literal.
3760 // This has been validated on the previous step.
3761 // See validateVOPLiteral.
3762 // This literal may be used as more than one operand.
3763 // If all these operands are of the same size,
3764 // this literal counts as one scalar value.
3765 // Otherwise it counts as 2 scalar values.
3766 // See "GFX10 Shader Programming", section 3.6.2.3.
3767
3768 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3769 if (Size < 4)
3770 Size = 4;
3771
3772 if (NumLiterals == 0) {
3773 NumLiterals = 1;
3774 LiteralSize = Size;
3775 } else if (LiteralSize != Size) {
3776 NumLiterals = 2;
3777 }
3778 }
3779 }
3780 }
3781 ConstantBusUseCount += NumLiterals;
3782
3783 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3784 return true;
3785
3786 SMLoc LitLoc = getLitLoc(Operands);
3787 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3788 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3789 Error(Loc, "invalid operand (violates constant bus restrictions)");
3790 return false;
3791}
3792
3793bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3794 const MCInst &Inst, const OperandVector &Operands) {
3795
3796 const unsigned Opcode = Inst.getOpcode();
3797 if (!isVOPD(Opcode))
3798 return true;
3799
3800 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3801
3802 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3803 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3804 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3805 ? Opr.getReg()
3807 };
3808
3809 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3810 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3811
3812 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3813 auto InvalidCompOprIdx =
3814 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3815 if (!InvalidCompOprIdx)
3816 return true;
3817
3818 auto CompOprIdx = *InvalidCompOprIdx;
3819 auto ParsedIdx =
3820 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3821 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3822 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3823
3824 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3825 if (CompOprIdx == VOPD::Component::DST) {
3826 Error(Loc, "one dst register must be even and the other odd");
3827 } else {
3828 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3829 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3830 " operands must use different VGPR banks");
3831 }
3832
3833 return false;
3834}
3835
3836bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3837
3838 const unsigned Opc = Inst.getOpcode();
3839 const MCInstrDesc &Desc = MII.get(Opc);
3840
3841 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3842 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3843 assert(ClampIdx != -1);
3844 return Inst.getOperand(ClampIdx).getImm() == 0;
3845 }
3846
3847 return true;
3848}
3849
3852
3853bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3854 const SMLoc &IDLoc) {
3855
3856 const unsigned Opc = Inst.getOpcode();
3857 const MCInstrDesc &Desc = MII.get(Opc);
3858
3859 if ((Desc.TSFlags & MIMGFlags) == 0)
3860 return true;
3861
3862 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3863 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3864 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3865
3866 assert(VDataIdx != -1);
3867
3868 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3869 return true;
3870
3871 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3872 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3873 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3874 if (DMask == 0)
3875 DMask = 1;
3876
3877 bool IsPackedD16 = false;
3878 unsigned DataSize =
3879 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3880 if (hasPackedD16()) {
3881 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3882 IsPackedD16 = D16Idx >= 0;
3883 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3884 DataSize = (DataSize + 1) / 2;
3885 }
3886
3887 if ((VDataSize / 4) == DataSize + TFESize)
3888 return true;
3889
3890 StringRef Modifiers;
3891 if (isGFX90A())
3892 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3893 else
3894 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3895
3896 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3897 return false;
3898}
3899
3900bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3901 const SMLoc &IDLoc) {
3902 const unsigned Opc = Inst.getOpcode();
3903 const MCInstrDesc &Desc = MII.get(Opc);
3904
3905 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3906 return true;
3907
3909
3910 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3912 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3913 int RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
3914 : AMDGPU::OpName::rsrc;
3915 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3916 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3917 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3918
3919 assert(VAddr0Idx != -1);
3920 assert(SrsrcIdx != -1);
3921 assert(SrsrcIdx > VAddr0Idx);
3922
3923 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3924 if (BaseOpcode->BVH) {
3925 if (IsA16 == BaseOpcode->A16)
3926 return true;
3927 Error(IDLoc, "image address size does not match a16");
3928 return false;
3929 }
3930
3931 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3933 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3934 unsigned ActualAddrSize =
3935 IsNSA ? SrsrcIdx - VAddr0Idx
3936 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3937
3938 unsigned ExpectedAddrSize =
3939 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3940
3941 if (IsNSA) {
3942 if (hasPartialNSAEncoding() &&
3943 ExpectedAddrSize >
3945 int VAddrLastIdx = SrsrcIdx - 1;
3946 unsigned VAddrLastSize =
3947 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3948
3949 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3950 }
3951 } else {
3952 if (ExpectedAddrSize > 12)
3953 ExpectedAddrSize = 16;
3954
3955 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3956 // This provides backward compatibility for assembly created
3957 // before 160b/192b/224b types were directly supported.
3958 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3959 return true;
3960 }
3961
3962 if (ActualAddrSize == ExpectedAddrSize)
3963 return true;
3964
3965 Error(IDLoc, "image address size does not match dim and a16");
3966 return false;
3967}
3968
3969bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3970
3971 const unsigned Opc = Inst.getOpcode();
3972 const MCInstrDesc &Desc = MII.get(Opc);
3973
3974 if ((Desc.TSFlags & MIMGFlags) == 0)
3975 return true;
3976 if (!Desc.mayLoad() || !Desc.mayStore())
3977 return true; // Not atomic
3978
3979 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3980 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3981
3982 // This is an incomplete check because image_atomic_cmpswap
3983 // may only use 0x3 and 0xf while other atomic operations
3984 // may use 0x1 and 0x3. However these limitations are
3985 // verified when we check that dmask matches dst size.
3986 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3987}
3988
3989bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3990
3991 const unsigned Opc = Inst.getOpcode();
3992 const MCInstrDesc &Desc = MII.get(Opc);
3993
3994 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3995 return true;
3996
3997 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3998 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3999
4000 // GATHER4 instructions use dmask in a different fashion compared to
4001 // other MIMG instructions. The only useful DMASK values are
4002 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4003 // (red,red,red,red) etc.) The ISA document doesn't mention
4004 // this.
4005 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4006}
4007
4008bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4009 const unsigned Opc = Inst.getOpcode();
4010 const MCInstrDesc &Desc = MII.get(Opc);
4011
4012 if ((Desc.TSFlags & MIMGFlags) == 0)
4013 return true;
4014
4016 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4018
4019 if (!BaseOpcode->MSAA)
4020 return true;
4021
4022 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4023 assert(DimIdx != -1);
4024
4025 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4027
4028 return DimInfo->MSAA;
4029}
4030
4031static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4032{
4033 switch (Opcode) {
4034 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4035 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4036 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4037 return true;
4038 default:
4039 return false;
4040 }
4041}
4042
4043// movrels* opcodes should only allow VGPRS as src0.
4044// This is specified in .td description for vop1/vop3,
4045// but sdwa is handled differently. See isSDWAOperand.
4046bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4047 const OperandVector &Operands) {
4048
4049 const unsigned Opc = Inst.getOpcode();
4050 const MCInstrDesc &Desc = MII.get(Opc);
4051
4052 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4053 return true;
4054
4055 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4056 assert(Src0Idx != -1);
4057
4058 SMLoc ErrLoc;
4059 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4060 if (Src0.isReg()) {
4061 auto Reg = mc2PseudoReg(Src0.getReg());
4062 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4063 if (!isSGPR(Reg, TRI))
4064 return true;
4065 ErrLoc = getRegLoc(Reg, Operands);
4066 } else {
4067 ErrLoc = getConstLoc(Operands);
4068 }
4069
4070 Error(ErrLoc, "source operand must be a VGPR");
4071 return false;
4072}
4073
4074bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4075 const OperandVector &Operands) {
4076
4077 const unsigned Opc = Inst.getOpcode();
4078
4079 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4080 return true;
4081
4082 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4083 assert(Src0Idx != -1);
4084
4085 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4086 if (!Src0.isReg())
4087 return true;
4088
4089 auto Reg = mc2PseudoReg(Src0.getReg());
4090 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4091 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4092 Error(getRegLoc(Reg, Operands),
4093 "source operand must be either a VGPR or an inline constant");
4094 return false;
4095 }
4096
4097 return true;
4098}
4099
4100bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4101 const OperandVector &Operands) {
4102 unsigned Opcode = Inst.getOpcode();
4103 const MCInstrDesc &Desc = MII.get(Opcode);
4104
4105 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4106 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4107 return true;
4108
4109 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4110 if (Src2Idx == -1)
4111 return true;
4112
4113 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4114 Error(getConstLoc(Operands),
4115 "inline constants are not allowed for this operand");
4116 return false;
4117 }
4118
4119 return true;
4120}
4121
4122bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4123 const OperandVector &Operands) {
4124 const unsigned Opc = Inst.getOpcode();
4125 const MCInstrDesc &Desc = MII.get(Opc);
4126
4127 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4128 return true;
4129
4130 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4131 if (Src2Idx == -1)
4132 return true;
4133
4134 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4135 if (!Src2.isReg())
4136 return true;
4137
4138 MCRegister Src2Reg = Src2.getReg();
4139 MCRegister DstReg = Inst.getOperand(0).getReg();
4140 if (Src2Reg == DstReg)
4141 return true;
4142
4143 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4144 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4145 return true;
4146
4147 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4148 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
4149 "source 2 operand must not partially overlap with dst");
4150 return false;
4151 }
4152
4153 return true;
4154}
4155
4156bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4157 switch (Inst.getOpcode()) {
4158 default:
4159 return true;
4160 case V_DIV_SCALE_F32_gfx6_gfx7:
4161 case V_DIV_SCALE_F32_vi:
4162 case V_DIV_SCALE_F32_gfx10:
4163 case V_DIV_SCALE_F64_gfx6_gfx7:
4164 case V_DIV_SCALE_F64_vi:
4165 case V_DIV_SCALE_F64_gfx10:
4166 break;
4167 }
4168
4169 // TODO: Check that src0 = src1 or src2.
4170
4171 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4172 AMDGPU::OpName::src2_modifiers,
4173 AMDGPU::OpName::src2_modifiers}) {
4175 .getImm() &
4177 return false;
4178 }
4179 }
4180
4181 return true;
4182}
4183
4184bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4185
4186 const unsigned Opc = Inst.getOpcode();
4187 const MCInstrDesc &Desc = MII.get(Opc);
4188
4189 if ((Desc.TSFlags & MIMGFlags) == 0)
4190 return true;
4191
4192 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4193 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4194 if (isCI() || isSI())
4195 return false;
4196 }
4197
4198 return true;
4199}
4200
4201static bool IsRevOpcode(const unsigned Opcode)
4202{
4203 switch (Opcode) {
4204 case AMDGPU::V_SUBREV_F32_e32:
4205 case AMDGPU::V_SUBREV_F32_e64:
4206 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4207 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4208 case AMDGPU::V_SUBREV_F32_e32_vi:
4209 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4210 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4211 case AMDGPU::V_SUBREV_F32_e64_vi:
4212
4213 case AMDGPU::V_SUBREV_CO_U32_e32:
4214 case AMDGPU::V_SUBREV_CO_U32_e64:
4215 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4216 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4217
4218 case AMDGPU::V_SUBBREV_U32_e32:
4219 case AMDGPU::V_SUBBREV_U32_e64:
4220 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4221 case AMDGPU::V_SUBBREV_U32_e32_vi:
4222 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4223 case AMDGPU::V_SUBBREV_U32_e64_vi:
4224
4225 case AMDGPU::V_SUBREV_U32_e32:
4226 case AMDGPU::V_SUBREV_U32_e64:
4227 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4228 case AMDGPU::V_SUBREV_U32_e32_vi:
4229 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4230 case AMDGPU::V_SUBREV_U32_e64_vi:
4231
4232 case AMDGPU::V_SUBREV_F16_e32:
4233 case AMDGPU::V_SUBREV_F16_e64:
4234 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4235 case AMDGPU::V_SUBREV_F16_e32_vi:
4236 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4237 case AMDGPU::V_SUBREV_F16_e64_vi:
4238
4239 case AMDGPU::V_SUBREV_U16_e32:
4240 case AMDGPU::V_SUBREV_U16_e64:
4241 case AMDGPU::V_SUBREV_U16_e32_vi:
4242 case AMDGPU::V_SUBREV_U16_e64_vi:
4243
4244 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4245 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4246 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4247
4248 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4249 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4250
4251 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4252 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4253
4254 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4255 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4256
4257 case AMDGPU::V_LSHRREV_B32_e32:
4258 case AMDGPU::V_LSHRREV_B32_e64:
4259 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4260 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4261 case AMDGPU::V_LSHRREV_B32_e32_vi:
4262 case AMDGPU::V_LSHRREV_B32_e64_vi:
4263 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4264 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4265
4266 case AMDGPU::V_ASHRREV_I32_e32:
4267 case AMDGPU::V_ASHRREV_I32_e64:
4268 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4269 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4270 case AMDGPU::V_ASHRREV_I32_e32_vi:
4271 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4272 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4273 case AMDGPU::V_ASHRREV_I32_e64_vi:
4274
4275 case AMDGPU::V_LSHLREV_B32_e32:
4276 case AMDGPU::V_LSHLREV_B32_e64:
4277 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4278 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4279 case AMDGPU::V_LSHLREV_B32_e32_vi:
4280 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4281 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4282 case AMDGPU::V_LSHLREV_B32_e64_vi:
4283
4284 case AMDGPU::V_LSHLREV_B16_e32:
4285 case AMDGPU::V_LSHLREV_B16_e64:
4286 case AMDGPU::V_LSHLREV_B16_e32_vi:
4287 case AMDGPU::V_LSHLREV_B16_e64_vi:
4288 case AMDGPU::V_LSHLREV_B16_gfx10:
4289
4290 case AMDGPU::V_LSHRREV_B16_e32:
4291 case AMDGPU::V_LSHRREV_B16_e64:
4292 case AMDGPU::V_LSHRREV_B16_e32_vi:
4293 case AMDGPU::V_LSHRREV_B16_e64_vi:
4294 case AMDGPU::V_LSHRREV_B16_gfx10:
4295
4296 case AMDGPU::V_ASHRREV_I16_e32:
4297 case AMDGPU::V_ASHRREV_I16_e64:
4298 case AMDGPU::V_ASHRREV_I16_e32_vi:
4299 case AMDGPU::V_ASHRREV_I16_e64_vi:
4300 case AMDGPU::V_ASHRREV_I16_gfx10:
4301
4302 case AMDGPU::V_LSHLREV_B64_e64:
4303 case AMDGPU::V_LSHLREV_B64_gfx10:
4304 case AMDGPU::V_LSHLREV_B64_vi:
4305
4306 case AMDGPU::V_LSHRREV_B64_e64:
4307 case AMDGPU::V_LSHRREV_B64_gfx10:
4308 case AMDGPU::V_LSHRREV_B64_vi:
4309
4310 case AMDGPU::V_ASHRREV_I64_e64:
4311 case AMDGPU::V_ASHRREV_I64_gfx10:
4312 case AMDGPU::V_ASHRREV_I64_vi:
4313
4314 case AMDGPU::V_PK_LSHLREV_B16:
4315 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4316 case AMDGPU::V_PK_LSHLREV_B16_vi:
4317
4318 case AMDGPU::V_PK_LSHRREV_B16:
4319 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4320 case AMDGPU::V_PK_LSHRREV_B16_vi:
4321 case AMDGPU::V_PK_ASHRREV_I16:
4322 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4323 case AMDGPU::V_PK_ASHRREV_I16_vi:
4324 return true;
4325 default:
4326 return false;
4327 }
4328}
4329
4330std::optional<StringRef>
4331AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4332
4333 using namespace SIInstrFlags;
4334 const unsigned Opcode = Inst.getOpcode();
4335 const MCInstrDesc &Desc = MII.get(Opcode);
4336
4337 // lds_direct register is defined so that it can be used
4338 // with 9-bit operands only. Ignore encodings which do not accept these.
4339 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4340 if ((Desc.TSFlags & Enc) == 0)
4341 return std::nullopt;
4342
4343 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4344 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4345 if (SrcIdx == -1)
4346 break;
4347 const auto &Src = Inst.getOperand(SrcIdx);
4348 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4349
4350 if (isGFX90A() || isGFX11Plus())
4351 return StringRef("lds_direct is not supported on this GPU");
4352
4353 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4354 return StringRef("lds_direct cannot be used with this instruction");
4355
4356 if (SrcName != OpName::src0)
4357 return StringRef("lds_direct may be used as src0 only");
4358 }
4359 }
4360
4361 return std::nullopt;
4362}
4363
4364SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4365 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4366 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4367 if (Op.isFlatOffset())
4368 return Op.getStartLoc();
4369 }
4370 return getLoc();
4371}
4372
4373bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4374 const OperandVector &Operands) {
4375 auto Opcode = Inst.getOpcode();
4376 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4377 if (OpNum == -1)
4378 return true;
4379
4380 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4381 if ((TSFlags & SIInstrFlags::FLAT))
4382 return validateFlatOffset(Inst, Operands);
4383
4384 if ((TSFlags & SIInstrFlags::SMRD))
4385 return validateSMEMOffset(Inst, Operands);
4386
4387 const auto &Op = Inst.getOperand(OpNum);
4388 if (isGFX12Plus() &&
4389 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4390 const unsigned OffsetSize = 24;
4391 if (!isIntN(OffsetSize, Op.getImm())) {
4392 Error(getFlatOffsetLoc(Operands),
4393 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4394 return false;
4395 }
4396 } else {
4397 const unsigned OffsetSize = 16;
4398 if (!isUIntN(OffsetSize, Op.getImm())) {
4399 Error(getFlatOffsetLoc(Operands),
4400 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4401 return false;
4402 }
4403 }
4404 return true;
4405}
4406
4407bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4408 const OperandVector &Operands) {
4409 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4410 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4411 return true;
4412
4413 auto Opcode = Inst.getOpcode();
4414 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4415 assert(OpNum != -1);
4416
4417 const auto &Op = Inst.getOperand(OpNum);
4418 if (!hasFlatOffsets() && Op.getImm() != 0) {
4419 Error(getFlatOffsetLoc(Operands),
4420 "flat offset modifier is not supported on this GPU");
4421 return false;
4422 }
4423
4424 // For pre-GFX12 FLAT instructions the offset must be positive;
4425 // MSB is ignored and forced to zero.
4426 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4427 bool AllowNegative =
4429 isGFX12Plus();
4430 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4431 Error(getFlatOffsetLoc(Operands),
4432 Twine("expected a ") +
4433 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4434 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4435 return false;
4436 }
4437
4438 return true;
4439}
4440
4441SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4442 // Start with second operand because SMEM Offset cannot be dst or src0.
4443 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4444 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4445 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4446 return Op.getStartLoc();
4447 }
4448 return getLoc();
4449}
4450
4451bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4452 const OperandVector &Operands) {
4453 if (isCI() || isSI())
4454 return true;
4455
4456 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4457 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4458 return true;
4459
4460 auto Opcode = Inst.getOpcode();
4461 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4462 if (OpNum == -1)
4463 return true;
4464
4465 const auto &Op = Inst.getOperand(OpNum);
4466 if (!Op.isImm())
4467 return true;
4468
4469 uint64_t Offset = Op.getImm();
4470 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4473 return true;
4474
4475 Error(getSMEMOffsetLoc(Operands),
4476 isGFX12Plus() ? "expected a 24-bit signed offset"
4477 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4478 : "expected a 21-bit signed offset");
4479
4480 return false;
4481}
4482
4483bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4484 unsigned Opcode = Inst.getOpcode();
4485 const MCInstrDesc &Desc = MII.get(Opcode);
4486 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4487 return true;
4488
4489 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4490 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4491
4492 const int OpIndices[] = { Src0Idx, Src1Idx };
4493
4494 unsigned NumExprs = 0;
4495 unsigned NumLiterals = 0;
4497
4498 for (int OpIdx : OpIndices) {
4499 if (OpIdx == -1) break;
4500
4501 const MCOperand &MO = Inst.getOperand(OpIdx);
4502 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4503 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4504 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4505 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4506 if (NumLiterals == 0 || LiteralValue != Value) {
4508 ++NumLiterals;
4509 }
4510 } else if (MO.isExpr()) {
4511 ++NumExprs;
4512 }
4513 }
4514 }
4515
4516 return NumLiterals + NumExprs <= 1;
4517}
4518
4519bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4520 const unsigned Opc = Inst.getOpcode();
4521 if (isPermlane16(Opc)) {
4522 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4523 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4524
4525 if (OpSel & ~3)
4526 return false;
4527 }
4528
4529 uint64_t TSFlags = MII.get(Opc).TSFlags;
4530
4531 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4532 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4533 if (OpSelIdx != -1) {
4534 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4535 return false;
4536 }
4537 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4538 if (OpSelHiIdx != -1) {
4539 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4540 return false;
4541 }
4542 }
4543
4544 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4545 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4546 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4547 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4548 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4549 if (OpSel & 3)
4550 return false;
4551 }
4552
4553 return true;
4554}
4555
4556bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
4557 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4558
4559 const unsigned Opc = Inst.getOpcode();
4560 uint64_t TSFlags = MII.get(Opc).TSFlags;
4561
4562 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4563 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4564 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4565 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4566 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4567 !(TSFlags & SIInstrFlags::IsSWMMAC))
4568 return true;
4569
4570 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4571 if (NegIdx == -1)
4572 return true;
4573
4574 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4575
4576 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4577 // on some src operands but not allowed on other.
4578 // It is convenient that such instructions don't have src_modifiers operand
4579 // for src operands that don't allow neg because they also don't allow opsel.
4580
4581 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4582 AMDGPU::OpName::src1_modifiers,
4583 AMDGPU::OpName::src2_modifiers};
4584
4585 for (unsigned i = 0; i < 3; ++i) {
4586 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4587 if (Neg & (1 << i))
4588 return false;
4589 }
4590 }
4591
4592 return true;
4593}
4594
4595bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4596 const OperandVector &Operands) {
4597 const unsigned Opc = Inst.getOpcode();
4598 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4599 if (DppCtrlIdx >= 0) {
4600 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4601
4602 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4603 AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4604 // DP ALU DPP is supported for row_newbcast only on GFX9*
4605 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4606 Error(S, "DP ALU dpp only supports row_newbcast");
4607 return false;
4608 }
4609 }
4610
4611 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4612 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4613
4614 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4615 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4616 if (Src1Idx >= 0) {
4617 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4618 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4619 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
4620 auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());
4621 SMLoc S = getRegLoc(Reg, Operands);
4622 Error(S, "invalid operand for instruction");
4623 return false;
4624 }
4625 if (Src1.isImm()) {
4626 Error(getInstLoc(Operands),
4627 "src1 immediate operand invalid for instruction");
4628 return false;
4629 }
4630 }
4631 }
4632
4633 return true;
4634}
4635
4636// Check if VCC register matches wavefront size
4637bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4638 auto FB = getFeatureBits();
4639 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4640 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4641}
4642
4643// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4644bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4645 const OperandVector &Operands) {
4646 unsigned Opcode = Inst.getOpcode();
4647 const MCInstrDesc &Desc = MII.get(Opcode);
4648 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4649 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4650 !HasMandatoryLiteral && !isVOPD(Opcode))
4651 return true;
4652
4653 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4654
4655 unsigned NumExprs = 0;
4656 unsigned NumLiterals = 0;
4658
4659 for (int OpIdx : OpIndices) {
4660 if (OpIdx == -1)
4661 continue;
4662
4663 const MCOperand &MO = Inst.getOperand(OpIdx);
4664 if (!MO.isImm() && !MO.isExpr())
4665 continue;
4666 if (!isSISrcOperand(Desc, OpIdx))
4667 continue;
4668
4669 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4670 uint64_t Value = static_cast<uint64_t>(MO.getImm());
4671 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4672 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4673 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4674
4675 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4676 Error(getLitLoc(Operands), "invalid operand for instruction");
4677 return false;
4678 }
4679
4680 if (IsFP64 && IsValid32Op)
4681 Value = Hi_32(Value);
4682
4683 if (NumLiterals == 0 || LiteralValue != Value) {
4685 ++NumLiterals;
4686 }
4687 } else if (MO.isExpr()) {
4688 ++NumExprs;
4689 }
4690 }
4691 NumLiterals += NumExprs;
4692
4693 if (!NumLiterals)
4694 return true;
4695
4696 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4697 Error(getLitLoc(Operands), "literal operands are not supported");
4698 return false;
4699 }
4700
4701 if (NumLiterals > 1) {
4702 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4703 return false;
4704 }
4705
4706 return true;
4707}
4708
4709// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4710static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4711 const MCRegisterInfo *MRI) {
4712 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4713 if (OpIdx < 0)
4714 return -1;
4715
4716 const MCOperand &Op = Inst.getOperand(OpIdx);
4717 if (!Op.isReg())
4718 return -1;
4719
4720 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4721 auto Reg = Sub ? Sub : Op.getReg();
4722 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4723 return AGPR32.contains(Reg) ? 1 : 0;
4724}
4725
4726bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4727 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4728 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4730 SIInstrFlags::DS)) == 0)
4731 return true;
4732
4733 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4734 : AMDGPU::OpName::vdata;
4735
4736 const MCRegisterInfo *MRI = getMRI();
4737 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4738 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4739
4740 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4741 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4742 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4743 return false;
4744 }
4745
4746 auto FB = getFeatureBits();
4747 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4748 if (DataAreg < 0 || DstAreg < 0)
4749 return true;
4750 return DstAreg == DataAreg;
4751 }
4752
4753 return DstAreg < 1 && DataAreg < 1;
4754}
4755
4756bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4757 auto FB = getFeatureBits();
4758 if (!FB[AMDGPU::FeatureGFX90AInsts])
4759 return true;
4760
4761 const MCRegisterInfo *MRI = getMRI();
4762 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4763 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4764 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4765 const MCOperand &Op = Inst.getOperand(I);
4766 if (!Op.isReg())
4767 continue;
4768
4769 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4770 if (!Sub)
4771 continue;
4772
4773 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4774 return false;
4775 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4776 return false;
4777 }
4778
4779 return true;
4780}
4781
4782SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4783 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4784 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4785 if (Op.isBLGP())
4786 return Op.getStartLoc();
4787 }
4788 return SMLoc();
4789}
4790
4791bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4792 const OperandVector &Operands) {
4793 unsigned Opc = Inst.getOpcode();
4794 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4795 if (BlgpIdx == -1)
4796 return true;
4797 SMLoc BLGPLoc = getBLGPLoc(Operands);
4798 if (!BLGPLoc.isValid())
4799 return true;
4800 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4801 auto FB = getFeatureBits();
4802 bool UsesNeg = false;
4803 if (FB[AMDGPU::FeatureGFX940Insts]) {
4804 switch (Opc) {
4805 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4806 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4807 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4808 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4809 UsesNeg = true;
4810 }
4811 }
4812
4813 if (IsNeg == UsesNeg)
4814 return true;
4815
4816 Error(BLGPLoc,
4817 UsesNeg ? "invalid modifier: blgp is not supported"
4818 : "invalid modifier: neg is not supported");
4819
4820 return false;
4821}
4822
4823bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4824 const OperandVector &Operands) {
4825 if (!isGFX11Plus())
4826 return true;
4827
4828 unsigned Opc = Inst.getOpcode();
4829 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4830 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4831 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4832 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4833 return true;
4834
4835 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4836 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4837 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4838 if (Reg == AMDGPU::SGPR_NULL)
4839 return true;
4840
4841 SMLoc RegLoc = getRegLoc(Reg, Operands);
4842 Error(RegLoc, "src0 must be null");
4843 return false;
4844}
4845
4846bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4847 const OperandVector &Operands) {
4848 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4849 if ((TSFlags & SIInstrFlags::DS) == 0)
4850 return true;
4851 if (TSFlags & SIInstrFlags::GWS)
4852 return validateGWS(Inst, Operands);
4853 // Only validate GDS for non-GWS instructions.
4854 if (hasGDS())
4855 return true;
4856 int GDSIdx =
4857 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4858 if (GDSIdx < 0)
4859 return true;
4860 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4861 if (GDS) {
4862 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4863 Error(S, "gds modifier is not supported on this GPU");
4864 return false;
4865 }
4866 return true;
4867}
4868
4869// gfx90a has an undocumented limitation:
4870// DS_GWS opcodes must use even aligned registers.
4871bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4872 const OperandVector &Operands) {
4873 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4874 return true;
4875
4876 int Opc = Inst.getOpcode();
4877 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4878 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4879 return true;
4880
4881 const MCRegisterInfo *MRI = getMRI();
4882 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4883 int Data0Pos =
4884 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4885 assert(Data0Pos != -1);
4886 auto Reg = Inst.getOperand(Data0Pos).getReg();
4887 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4888 if (RegIdx & 1) {
4889 SMLoc RegLoc = getRegLoc(Reg, Operands);
4890 Error(RegLoc, "vgpr must be even aligned");
4891 return false;
4892 }
4893
4894 return true;
4895}
4896
4897bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4898 const OperandVector &Operands,
4899 const SMLoc &IDLoc) {
4900 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4901 AMDGPU::OpName::cpol);
4902 if (CPolPos == -1)
4903 return true;
4904
4905 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4906
4907 if (isGFX12Plus())
4908 return validateTHAndScopeBits(Inst, Operands, CPol);
4909
4910 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4911 if (TSFlags & SIInstrFlags::SMRD) {
4912 if (CPol && (isSI() || isCI())) {
4913 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4914 Error(S, "cache policy is not supported for SMRD instructions");
4915 return false;
4916 }
4917 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4918 Error(IDLoc, "invalid cache policy for SMEM instruction");
4919 return false;
4920 }
4921 }
4922
4923 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4924 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4927 if (!(TSFlags & AllowSCCModifier)) {
4928 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4929 StringRef CStr(S.getPointer());
4930 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4931 Error(S,
4932 "scc modifier is not supported for this instruction on this GPU");
4933 return false;
4934 }
4935 }
4936
4938 return true;
4939
4940 if (TSFlags & SIInstrFlags::IsAtomicRet) {
4941 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4942 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4943 : "instruction must use glc");
4944 return false;
4945 }
4946 } else {
4947 if (CPol & CPol::GLC) {
4948 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4949 StringRef CStr(S.getPointer());
4951 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4952 Error(S, isGFX940() ? "instruction must not use sc0"
4953 : "instruction must not use glc");
4954 return false;
4955 }
4956 }
4957
4958 return true;
4959}
4960
4961bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4962 const OperandVector &Operands,
4963 const unsigned CPol) {
4964 const unsigned TH = CPol & AMDGPU::CPol::TH;
4965 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
4966
4967 const unsigned Opcode = Inst.getOpcode();
4968 const MCInstrDesc &TID = MII.get(Opcode);
4969
4970 auto PrintError = [&](StringRef Msg) {
4971 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4972 Error(S, Msg);
4973 return false;
4974 };
4975
4976 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
4979 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
4980
4981 if (TH == 0)
4982 return true;
4983
4984 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
4985 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
4986 (TH == AMDGPU::CPol::TH_NT_HT)))
4987 return PrintError("invalid th value for SMEM instruction");
4988
4989 if (TH == AMDGPU::CPol::TH_BYPASS) {
4990 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
4992 (Scope == AMDGPU::CPol::SCOPE_SYS &&
4994 return PrintError("scope and th combination is not valid");
4995 }
4996
4997 bool IsStore = TID.mayStore();
4998 bool IsAtomic =
5000
5001 if (IsAtomic) {
5002 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5003 return PrintError("invalid th value for atomic instructions");
5004 } else if (IsStore) {
5005 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5006 return PrintError("invalid th value for store instructions");
5007 } else {
5008 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5009 return PrintError("invalid th value for load instructions");
5010 }
5011
5012 return true;
5013}
5014
5015bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
5016 if (!isGFX11Plus())
5017 return true;
5018 for (auto &Operand : Operands) {
5019 if (!Operand->isReg())
5020 continue;
5021 unsigned Reg = Operand->getReg();
5022 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
5023 Error(getRegLoc(Reg, Operands),
5024 "execz and vccz are not supported on this GPU");
5025 return false;
5026 }
5027 }
5028 return true;
5029}
5030
5031bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5032 const OperandVector &Operands) {
5033 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5034 if (Desc.mayStore() &&
5036 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5037 if (Loc != getInstLoc(Operands)) {
5038 Error(Loc, "TFE modifier has no meaning for store instructions");
5039 return false;
5040 }
5041 }
5042
5043 return true;
5044}
5045
5046bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5047 const SMLoc &IDLoc,
5048 const OperandVector &Operands) {
5049 if (auto ErrMsg = validateLdsDirect(Inst)) {
5050 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
5051 return false;
5052 }
5053 if (!validateSOPLiteral(Inst)) {
5054 Error(getLitLoc(Operands),
5055 "only one unique literal operand is allowed");
5056 return false;
5057 }
5058 if (!validateVOPLiteral(Inst, Operands)) {
5059 return false;
5060 }
5061 if (!validateConstantBusLimitations(Inst, Operands)) {
5062 return false;
5063 }
5064 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5065 return false;
5066 }
5067 if (!validateIntClampSupported(Inst)) {
5068 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
5069 "integer clamping is not supported on this GPU");
5070 return false;
5071 }
5072 if (!validateOpSel(Inst)) {
5073 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5074 "invalid op_sel operand");
5075 return false;
5076 }
5077 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5078 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5079 "invalid neg_lo operand");
5080 return false;
5081 }
5082 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5083 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5084 "invalid neg_hi operand");
5085 return false;
5086 }
5087 if (!validateDPP(Inst, Operands)) {
5088 return false;
5089 }
5090 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5091 if (!validateMIMGD16(Inst)) {
5092 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5093 "d16 modifier is not supported on this GPU");
5094 return false;
5095 }
5096 if (!validateMIMGMSAA(Inst)) {
5097 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5098 "invalid dim; must be MSAA type");
5099 return false;
5100 }
5101 if (!validateMIMGDataSize(Inst, IDLoc)) {
5102 return false;
5103 }
5104 if (!validateMIMGAddrSize(Inst, IDLoc))
5105 return false;
5106 if (!validateMIMGAtomicDMask(Inst)) {
5107 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5108 "invalid atomic image dmask");
5109 return false;
5110 }
5111 if (!validateMIMGGatherDMask(Inst)) {
5112 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5113 "invalid image_gather dmask: only one bit must be set");
5114 return false;
5115 }
5116 if (!validateMovrels(Inst, Operands)) {
5117 return false;
5118 }
5119 if (!validateOffset(Inst, Operands)) {
5120 return false;
5121 }
5122 if (!validateMAIAccWrite(Inst, Operands)) {
5123 return false;
5124 }
5125 if (!validateMAISrc2(Inst, Operands)) {
5126 return false;
5127 }
5128 if (!validateMFMA(Inst, Operands)) {
5129 return false;
5130 }
5131 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5132 return false;
5133 }
5134
5135 if (!validateAGPRLdSt(Inst)) {
5136 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5137 ? "invalid register class: data and dst should be all VGPR or AGPR"
5138 : "invalid register class: agpr loads and stores not supported on this GPU"
5139 );
5140 return false;
5141 }
5142 if (!validateVGPRAlign(Inst)) {
5143 Error(IDLoc,
5144 "invalid register class: vgpr tuples must be 64 bit aligned");
5145 return false;
5146 }
5147 if (!validateDS(Inst, Operands)) {
5148 return false;
5149 }
5150
5151 if (!validateBLGP(Inst, Operands)) {
5152 return false;
5153 }
5154
5155 if (!validateDivScale(Inst)) {
5156 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5157 return false;
5158 }
5159 if (!validateWaitCnt(Inst, Operands)) {
5160 return false;
5161 }
5162 if (!validateExeczVcczOperands(Operands)) {
5163 return false;
5164 }
5165 if (!validateTFE(Inst, Operands)) {
5166 return false;
5167 }
5168
5169 return true;
5170}
5171
5173 const FeatureBitset &FBS,
5174 unsigned VariantID = 0);
5175
5176static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5177 const FeatureBitset &AvailableFeatures,
5178 unsigned VariantID);
5179
5180bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5181 const FeatureBitset &FBS) {
5182 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5183}
5184
5185bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5186 const FeatureBitset &FBS,
5187 ArrayRef<unsigned> Variants) {
5188 for (auto Variant : Variants) {
5189 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5190 return true;
5191 }
5192
5193 return false;
5194}
5195
5196bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5197 const SMLoc &IDLoc) {
5198 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5199
5200 // Check if requested instruction variant is supported.
5201 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5202 return false;
5203
5204 // This instruction is not supported.
5205 // Clear any other pending errors because they are no longer relevant.
5206 getParser().clearPendingErrors();
5207
5208 // Requested instruction variant is not supported.
5209 // Check if any other variants are supported.
5210 StringRef VariantName = getMatchedVariantName();
5211 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5212 return Error(IDLoc,
5213 Twine(VariantName,
5214 " variant of this instruction is not supported"));
5215 }
5216
5217 // Check if this instruction may be used with a different wavesize.
5218 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5219 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5220
5221 FeatureBitset FeaturesWS32 = getFeatureBits();
5222 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5223 .flip(AMDGPU::FeatureWavefrontSize32);
5224 FeatureBitset AvailableFeaturesWS32 =
5225 ComputeAvailableFeatures(FeaturesWS32);
5226
5227 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5228 return Error(IDLoc, "instruction requires wavesize=32");
5229 }
5230
5231 // Finally check if this instruction is supported on any other GPU.
5232 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5233 return Error(IDLoc, "instruction not supported on this GPU");
5234 }
5235
5236 // Instruction not supported on any GPU. Probably a typo.
5237 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5238 return Error(IDLoc, "invalid instruction" + Suggestion);
5239}
5240
5242 uint64_t InvalidOprIdx) {
5243 assert(InvalidOprIdx < Operands.size());
5244 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5245 if (Op.isToken() && InvalidOprIdx > 1) {
5246 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5247 return PrevOp.isToken() && PrevOp.getToken() == "::";
5248 }
5249 return false;
5250}
5251
5252bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5254 MCStreamer &Out,
5256 bool MatchingInlineAsm) {
5257 MCInst Inst;
5258 unsigned Result = Match_Success;
5259 for (auto Variant : getMatchedVariants()) {
5260 uint64_t EI;
5261 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5262 Variant);
5263 // We order match statuses from least to most specific. We use most specific
5264 // status as resulting
5265 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
5266 if ((R == Match_Success) ||
5267 (R == Match_PreferE32) ||
5268 (R == Match_MissingFeature && Result != Match_PreferE32) ||
5269 (R == Match_InvalidOperand && Result != Match_MissingFeature
5270 && Result != Match_PreferE32) ||
5271 (R == Match_MnemonicFail && Result != Match_InvalidOperand
5272 && Result != Match_MissingFeature
5273 && Result != Match_PreferE32)) {
5274 Result = R;
5275 ErrorInfo = EI;
5276 }
5277 if (R == Match_Success)
5278 break;
5279 }
5280
5281 if (Result == Match_Success) {
5282 if (!validateInstruction(Inst, IDLoc, Operands)) {
5283 return true;
5284 }
5285 Inst.setLoc(IDLoc);
5286 Out.emitInstruction(Inst, getSTI());
5287 return false;
5288 }
5289
5290 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5291 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5292 return true;
5293 }
5294
5295 switch (Result) {
5296 default: break;
5297 case Match_MissingFeature:
5298 // It has been verified that the specified instruction
5299 // mnemonic is valid. A match was found but it requires
5300 // features which are not supported on this GPU.
5301 return Error(IDLoc, "operands are not valid for this GPU or mode");
5302
5303 case Match_InvalidOperand: {
5304 SMLoc ErrorLoc = IDLoc;
5305 if (ErrorInfo != ~0ULL) {
5306 if (ErrorInfo >= Operands.size()) {
5307 return Error(IDLoc, "too few operands for instruction");
5308 }
5309 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5310 if (ErrorLoc == SMLoc())
5311 ErrorLoc = IDLoc;
5312
5314 return Error(ErrorLoc, "invalid VOPDY instruction");
5315 }
5316 return Error(ErrorLoc, "invalid operand for instruction");
5317 }
5318
5319 case Match_PreferE32:
5320 return Error(IDLoc, "internal error: instruction without _e64 suffix "
5321 "should be encoded as e32");
5322 case Match_MnemonicFail:
5323 llvm_unreachable("Invalid instructions should have been handled already");
5324 }
5325 llvm_unreachable("Implement any new match types added!");
5326}
5327
5328bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5329 int64_t Tmp = -1;
5330 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5331 return true;
5332 }
5333 if (getParser().parseAbsoluteExpression(Tmp)) {
5334 return true;
5335 }
5336 Ret = static_cast<uint32_t>(Tmp);
5337 return false;
5338}
5339
5340bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5341 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5342 return TokError("directive only supported for amdgcn architecture");
5343
5344 std::string TargetIDDirective;
5345 SMLoc TargetStart = getTok().getLoc();
5346 if (getParser().parseEscapedString(TargetIDDirective))
5347 return true;
5348
5349 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5350 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5351 return getParser().Error(TargetRange.Start,
5352 (Twine(".amdgcn_target directive's target id ") +
5353 Twine(TargetIDDirective) +
5354 Twine(" does not match the specified target id ") +
5355 Twine(getTargetStreamer().getTargetID()->toString())).str());
5356
5357 return false;
5358}
5359
5360bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5361 return Error(Range.Start, "value out of range", Range);
5362}
5363
5364bool AMDGPUAsmParser::calculateGPRBlocks(
5365 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
5366 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
5367 unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
5368 SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
5369 // TODO(scott.linder): These calculations are duplicated from
5370 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5371 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5372
5373 unsigned NumVGPRs = NextFreeVGPR;
5374 unsigned NumSGPRs = NextFreeSGPR;
5375
5376 if (Version.Major >= 10)
5377 NumSGPRs = 0;
5378 else {
5379 unsigned MaxAddressableNumSGPRs =
5381
5382 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
5383 NumSGPRs > MaxAddressableNumSGPRs)
5384 return OutOfRangeError(SGPRRange);
5385
5386 NumSGPRs +=
5387 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
5388
5389 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5390 NumSGPRs > MaxAddressableNumSGPRs)
5391 return OutOfRangeError(SGPRRange);
5392
5393 if (Features.test(FeatureSGPRInitBug))
5395 }
5396
5397 VGPRBlocks = IsaInfo::getEncodedNumVGPRBlocks(&getSTI(), NumVGPRs,
5398 EnableWavefrontSize32);
5399 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
5400
5401 return false;
5402}
5403
5404bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5405 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5406 return TokError("directive only supported for amdgcn architecture");
5407
5408 if (!isHsaAbi(getSTI()))
5409 return TokError("directive only supported for amdhsa OS");
5410
5411 StringRef KernelName;
5412 if (getParser().parseIdentifier(KernelName))
5413 return true;
5414
5417 &getSTI(), getContext());
5418
5419 StringSet<> Seen;
5420
5421 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5422
5423 SMRange VGPRRange;
5424 uint64_t NextFreeVGPR = 0;
5425 uint64_t AccumOffset = 0;
5426 uint64_t SharedVGPRCount = 0;
5427 uint64_t PreloadLength = 0;
5428 uint64_t PreloadOffset = 0;
5429 SMRange SGPRRange;
5430 uint64_t NextFreeSGPR = 0;
5431
5432 // Count the number of user SGPRs implied from the enabled feature bits.
5433 unsigned ImpliedUserSGPRCount = 0;
5434
5435 // Track if the asm explicitly contains the directive for the user SGPR
5436 // count.
5437 std::optional<unsigned> ExplicitUserSGPRCount;
5438 bool ReserveVCC = true;
5439 bool ReserveFlatScr = true;
5440 std::optional<bool> EnableWavefrontSize32;
5441
5442 while (true) {
5443 while (trySkipToken(AsmToken::EndOfStatement));
5444
5445 StringRef ID;
5446 SMRange IDRange = getTok().getLocRange();
5447 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5448 return true;
5449
5450 if (ID == ".end_amdhsa_kernel")
5451 break;
5452
5453 if (!Seen.insert(ID).second)
5454 return TokError(".amdhsa_ directives cannot be repeated");
5455
5456 SMLoc ValStart = getLoc();
5457 const MCExpr *ExprVal;
5458 if (getParser().parseExpression(ExprVal))
5459 return true;
5460 SMLoc ValEnd = getLoc();
5461 SMRange ValRange = SMRange(ValStart, ValEnd);
5462
5463 int64_t IVal = 0;
5464 uint64_t Val = IVal;
5465 bool EvaluatableExpr;
5466 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5467 if (IVal < 0)
5468 return OutOfRangeError(ValRange);
5469 Val = IVal;
5470 }
5471
5472#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5473 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5474 return OutOfRangeError(RANGE); \
5475 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5476 getContext());
5477
5478// Some fields use the parsed value immediately which requires the expression to
5479// be solvable.
5480#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5481 if (!(RESOLVED)) \
5482 return Error(IDRange.Start, "directive should have resolvable expression", \
5483 IDRange);
5484
5485 if (ID == ".amdhsa_group_segment_fixed_size") {
5487 CHAR_BIT>(Val))
5488 return OutOfRangeError(ValRange);
5489 KD.group_segment_fixed_size = ExprVal;
5490 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5492 CHAR_BIT>(Val))
5493 return OutOfRangeError(ValRange);
5494 KD.private_segment_fixed_size = ExprVal;
5495 } else if (ID == ".amdhsa_kernarg_size") {
5496 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
5497 return OutOfRangeError(ValRange);
5498 KD.kernarg_size = ExprVal;
5499 } else if (ID == ".amdhsa_user_sgpr_count") {
5500 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5501 ExplicitUserSGPRCount = Val;
5502 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5503 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5505 return Error(IDRange.Start,
5506 "directive is not supported with architected flat scratch",
5507 IDRange);
5509 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5510 ExprVal, ValRange);
5511 if (Val)
5512 ImpliedUserSGPRCount += 4;
5513 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5514 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5515 if (!hasKernargPreload())
5516 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5517
5518 if (Val > getMaxNumUserSGPRs())
5519 return OutOfRangeError(ValRange);
5520 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
5521 ValRange);
5522 if (Val) {
5523 ImpliedUserSGPRCount += Val;
5524 PreloadLength = Val;
5525 }
5526 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5527 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5528 if (!hasKernargPreload())
5529 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5530
5531 if (Val >= 1024)
5532 return OutOfRangeError(ValRange);
5533 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
5534 ValRange);
5535 if (Val)
5536 PreloadOffset = Val;
5537 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5538 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5540 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5541 ValRange);
5542 if (Val)
5543 ImpliedUserSGPRCount += 2;
5544 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5545 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5547 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5548 ValRange);
5549 if (Val)
5550 ImpliedUserSGPRCount += 2;
5551 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5552 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5554 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5555 ExprVal, ValRange);
5556 if (Val)
5557 ImpliedUserSGPRCount += 2;
5558 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5559 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5561 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5562 ValRange);
5563 if (Val)
5564 ImpliedUserSGPRCount += 2;
5565 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5567 return Error(IDRange.Start,
5568 "directive is not supported with architected flat scratch",
5569 IDRange);
5570 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5572 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5573 ExprVal, ValRange);
5574 if (Val)
5575 ImpliedUserSGPRCount += 2;
5576 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5577 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5579 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5580 ExprVal, ValRange);
5581 if (Val)
5582 ImpliedUserSGPRCount += 1;
5583 } else if (ID == ".amdhsa_wavefront_size32") {
5584 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5585 if (IVersion.Major < 10)
5586 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5587 EnableWavefrontSize32 = Val;
5589 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5590 ValRange);
5591 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5593 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5594 ValRange);
5595 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5597 return Error(IDRange.Start,
5598 "directive is not supported with architected flat scratch",
5599 IDRange);
5601 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5602 ValRange);
5603 } else if (ID == ".amdhsa_enable_private_segment") {
5605 return Error(
5606 IDRange.Start,
5607 "directive is not supported without architected flat scratch",
5608 IDRange);
5610 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5611 ValRange);
5612 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5614 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5615 ValRange);
5616 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5618 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5619 ValRange);
5620 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5622 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5623 ValRange);
5624 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5626 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5627 ValRange);
5628 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5630 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5631 ValRange);
5632 } else if (ID == ".amdhsa_next_free_vgpr") {
5633 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5634 VGPRRange = ValRange;
5635 NextFreeVGPR = Val;
5636 } else if (ID == ".amdhsa_next_free_sgpr") {
5637 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5638 SGPRRange = ValRange;
5639 NextFreeSGPR = Val;
5640 } else if (ID == ".amdhsa_accum_offset") {
5641 if (!isGFX90A())
5642 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5643 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5644 AccumOffset = Val;
5645 } else if (ID == ".amdhsa_reserve_vcc") {
5646 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5647 if (!isUInt<1>(Val))
5648 return OutOfRangeError(ValRange);
5649 ReserveVCC = Val;
5650 } else if (ID == ".amdhsa_reserve_flat_scratch") {
5651 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5652 if (IVersion.Major < 7)
5653 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5655 return Error(IDRange.Start,
5656 "directive is not supported with architected flat scratch",
5657 IDRange);
5658 if (!isUInt<1>(Val))
5659 return OutOfRangeError(ValRange);
5660 ReserveFlatScr = Val;
5661 } else if (ID == ".amdhsa_reserve_xnack_mask") {
5662 if (IVersion.Major < 8)
5663 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5664 if (!isUInt<1>(Val))
5665 return OutOfRangeError(ValRange);
5666 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5667 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5668 IDRange);
5669 } else if (ID == ".amdhsa_float_round_mode_32") {
5671 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5672 ValRange);
5673 } else if (ID == ".amdhsa_float_round_mode_16_64") {
5675 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5676 ValRange);
5677 } else if (ID == ".amdhsa_float_denorm_mode_32") {
5679 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5680 ValRange);
5681 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5683 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5684 ValRange);
5685 } else if (ID == ".amdhsa_dx10_clamp") {
5686 if (IVersion.Major >= 12)
5687 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5689 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5690 ValRange);
5691 } else if (ID == ".amdhsa_ieee_mode") {
5692 if (IVersion.Major >= 12)
5693 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5695 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5696 ValRange);
5697 } else if (ID == ".amdhsa_fp16_overflow") {
5698 if (IVersion.Major < 9)
5699 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5701 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5702 ValRange);
5703 } else if (ID == ".amdhsa_tg_split") {
5704 if (!isGFX90A())
5705 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5706 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
5707 ExprVal, ValRange);
5708 } else if (ID == ".amdhsa_workgroup_processor_mode") {
5709 if (IVersion.Major < 10)
5710 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5712 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5713 ValRange);
5714 } else if (ID == ".amdhsa_memory_ordered") {
5715 if (IVersion.Major < 10)
5716 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5718 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5719 ValRange);
5720 } else if (ID == ".amdhsa_forward_progress") {
5721 if (IVersion.Major < 10)
5722 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5724 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5725 ValRange);
5726 } else if (ID == ".amdhsa_shared_vgpr_count") {
5727 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5728 if (IVersion.Major < 10 || IVersion.Major >= 12)
5729 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
5730 IDRange);
5731 SharedVGPRCount = Val;
5733 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5734 ValRange);
5735 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5738 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5739 ExprVal, ValRange);
5740 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5742 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5743 ExprVal, ValRange);
5744 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5747 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5748 ExprVal, ValRange);
5749 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5751 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5752 ExprVal, ValRange);
5753 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5755 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5756 ExprVal, ValRange);
5757 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5759 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5760 ExprVal, ValRange);
5761 } else if (ID == ".amdhsa_exception_int_div_zero") {
5763 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5764 ExprVal, ValRange);
5765 } else if (ID == ".amdhsa_round_robin_scheduling") {
5766 if (IVersion.Major < 12)
5767 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
5769 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5770 ValRange);
5771 } else {
5772 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5773 }
5774
5775#undef PARSE_BITS_ENTRY
5776 }
5777
5778 if (!Seen.contains(".amdhsa_next_free_vgpr"))
5779 return TokError(".amdhsa_next_free_vgpr directive is required");
5780
5781 if (!Seen.contains(".amdhsa_next_free_sgpr"))
5782 return TokError(".amdhsa_next_free_sgpr directive is required");
5783
5784 unsigned VGPRBlocks;
5785 unsigned SGPRBlocks;
5786 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5787 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5788 EnableWavefrontSize32, NextFreeVGPR,
5789 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5790 SGPRBlocks))
5791 return true;
5792
5793 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5794 VGPRBlocks))
5795 return OutOfRangeError(VGPRRange);
5797 KD.compute_pgm_rsrc1, MCConstantExpr::create(VGPRBlocks, getContext()),
5798 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5799 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
5800
5801 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5802 SGPRBlocks))
5803 return OutOfRangeError(SGPRRange);
5805 KD.compute_pgm_rsrc1, MCConstantExpr::create(SGPRBlocks, getContext()),
5806 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5807 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
5808
5809 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5810 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5811 "enabled user SGPRs");
5812
5813 unsigned UserSGPRCount =
5814 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5815
5816 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5817 return TokError("too many user SGPRs enabled");
5819 KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
5820 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5821 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
5822
5823 int64_t IVal = 0;
5824 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
5825 return TokError("Kernarg size should be resolvable");
5826 uint64_t kernarg_size = IVal;
5827 if (PreloadLength && kernarg_size &&
5828 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5829 return TokError("Kernarg preload length + offset is larger than the "
5830 "kernarg segment size");
5831
5832 if (isGFX90A()) {
5833 if (!Seen.contains(".amdhsa_accum_offset"))
5834 return TokError(".amdhsa_accum_offset directive is required");
5835 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5836 return TokError("accum_offset should be in range [4..256] in "
5837 "increments of 4");
5838 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5839 return TokError("accum_offset exceeds total VGPR allocation");
5842 MCConstantExpr::create(AccumOffset / 4 - 1, getContext()),
5843 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5844 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
5845 }
5846
5847 if (IVersion.Major >= 10 && IVersion.Major < 12) {
5848 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5849 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5850 return TokError("shared_vgpr_count directive not valid on "
5851 "wavefront size 32");
5852 }
5853 if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5854 return TokError("shared_vgpr_count*2 + "
5855 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5856 "exceed 63\n");
5857 }
5858 }
5859
5860 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5861 NextFreeVGPR, NextFreeSGPR,
5862 ReserveVCC, ReserveFlatScr);
5863 return false;
5864}
5865
5866bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5868 if (ParseAsAbsoluteExpression(Version))
5869 return true;
5870
5871 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5872 return false;
5873}
5874
5875bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5876 amd_kernel_code_t &Header) {
5877 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5878 // assembly for backwards compatibility.
5879 if (ID == "max_scratch_backing_memory_byte_size") {
5880 Parser.eatToEndOfStatement();
5881 return false;
5882 }
5883
5884 SmallString<40> ErrStr;
5885 raw_svector_ostream Err(ErrStr);
5886 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5887 return TokError(Err.str());
5888 }
5889 Lex();
5890
5891 if (ID == "enable_dx10_clamp") {
5892 if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
5893 isGFX12Plus())
5894 return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
5895 }
5896
5897 if (ID == "enable_ieee_mode") {
5898 if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
5899 isGFX12Plus())
5900 return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
5901 }
5902
5903 if (ID == "enable_wavefront_size32") {
5904 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5905 if (!isGFX10Plus())
5906 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5907 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5908 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5909 } else {
5910 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5911 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5912 }
5913 }
5914
5915 if (ID == "wavefront_size") {
5916 if (Header.wavefront_size == 5) {
5917 if (!isGFX10Plus())
5918 return TokError("wavefront_size=5 is only allowed on GFX10+");
5919 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5920 return TokError("wavefront_size=5 requires +WavefrontSize32");
5921 } else if (Header.wavefront_size == 6) {
5922 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5923 return TokError("wavefront_size=6 requires +WavefrontSize64");
5924 }
5925 }
5926
5927 if (ID == "enable_wgp_mode") {
5928 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5929 !isGFX10Plus())
5930 return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5931 }
5932
5933 if (ID == "enable_mem_ordered") {
5934 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5935 !isGFX10Plus())
5936 return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5937 }
5938
5939 if (ID == "enable_fwd_progress") {
5940 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5941 !isGFX10Plus())
5942 return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5943 }
5944
5945 return false;
5946}
5947
5948bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5949 amd_kernel_code_t Header;
5950 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5951
5952 while (true) {
5953 // Lex EndOfStatement. This is in a while loop, because lexing a comment
5954 // will set the current token to EndOfStatement.
5955 while(trySkipToken(AsmToken::EndOfStatement));
5956
5957 StringRef ID;
5958 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5959 return true;
5960
5961 if (ID == ".end_amd_kernel_code_t")
5962 break;
5963
5964 if (ParseAMDKernelCodeTValue(ID, Header))
5965 return true;
5966 }
5967
5968 getTargetStreamer().EmitAMDKernelCodeT(Header);
5969
5970 return false;
5971}
5972
5973bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5974 StringRef KernelName;
5975 if (!parseId(KernelName, "expected symbol name"))
5976 return true;
5977
5978 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5980
5981 KernelScope.initialize(getContext());
5982 return false;
5983}
5984
5985bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5986 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5987 return Error(getLoc(),
5988 ".amd_amdgpu_isa directive is not available on non-amdgcn "
5989 "architectures");
5990 }
5991
5992 auto TargetIDDirective = getLexer().getTok().getStringContents();
5993 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5994 return Error(getParser().getTok().getLoc(), "target id must match options");
5995
5996 getTargetStreamer().EmitISAVersion();
5997 Lex();
5998
5999 return false;
6000}
6001
6002bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6003 assert(isHsaAbi(getSTI()));
6004
6005 std::string HSAMetadataString;
6006 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6007 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6008 return true;
6009
6010 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6011 return Error(getLoc(), "invalid HSA metadata");
6012
6013 return false;
6014}
6015
6016/// Common code to parse out a block of text (typically YAML) between start and
6017/// end directives.
6018bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6019 const char *AssemblerDirectiveEnd,
6020 std::string &CollectString) {
6021
6022 raw_string_ostream CollectStream(CollectString);
6023
6024 getLexer().setSkipSpace(false);
6025
6026 bool FoundEnd = false;
6027 while (!isToken(AsmToken::Eof)) {
6028 while (isToken(AsmToken::Space)) {
6029 CollectStream << getTokenStr();
6030 Lex();
6031 }
6032
6033 if (trySkipId(AssemblerDirectiveEnd)) {
6034 FoundEnd = true;
6035 break;
6036 }
6037
6038 CollectStream << Parser.parseStringToEndOfStatement()
6039 << getContext().getAsmInfo()->getSeparatorString();
6040
6041 Parser.eatToEndOfStatement();
6042 }
6043
6044 getLexer().setSkipSpace(true);
6045
6046 if (isToken(AsmToken::Eof) && !FoundEnd) {
6047 return TokError(Twine("expected directive ") +
6048 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6049 }
6050
6051 CollectStream.flush();
6052 return false;
6053}
6054
6055/// Parse the assembler directive for new MsgPack-format PAL metadata.
6056bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6057 std::string String;
6058 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6060 return true;
6061
6062 auto PALMetadata = getTargetStreamer().getPALMetadata();
6063 if (!PALMetadata->setFromString(String))
6064 return Error(getLoc(), "invalid PAL metadata");
6065 return false;
6066}
6067
6068/// Parse the assembler directive for old linear-format PAL metadata.
6069bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6070 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6071 return Error(getLoc(),
6072 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6073 "not available on non-amdpal OSes")).str());
6074 }
6075
6076 auto PALMetadata = getTargetStreamer().getPALMetadata();
6077 PALMetadata->setLegacy();
6078 for (;;) {
6080 if (ParseAsAbsoluteExpression(Key)) {
6081 return TokError(Twine("invalid value in ") +
6083 }
6084 if (!trySkipToken(AsmToken::Comma)) {
6085 return TokError(Twine("expected an even number of values in ") +
6087 }
6088 if (ParseAsAbsoluteExpression(Value)) {
6089 return TokError(Twine("invalid value in ") +
6091 }
6092 PALMetadata->setRegister(Key, Value);
6093 if (!trySkipToken(AsmToken::Comma))
6094 break;
6095 }
6096 return false;
6097}
6098
6099/// ParseDirectiveAMDGPULDS
6100/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6101bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6102 if (getParser().checkForValidSection())
6103 return true;
6104
6106 SMLoc NameLoc = getLoc();
6107 if (getParser().parseIdentifier(Name))
6108 return TokError("expected identifier in directive");
6109
6110 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6111 if (getParser().parseComma())
6112 return true;
6113
6114 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6115
6116 int64_t Size;
6117 SMLoc SizeLoc = getLoc();
6118 if (getParser().parseAbsoluteExpression(Size))
6119 return true;
6120 if (Size < 0)
6121 return Error(SizeLoc, "size must be non-negative");
6122 if (Size > LocalMemorySize)
6123 return Error(SizeLoc, "size is too large");
6124
6125 int64_t Alignment = 4;
6126 if (trySkipToken(AsmToken::Comma)) {
6127 SMLoc AlignLoc = getLoc();
6128 if (getParser().parseAbsoluteExpression(Alignment))
6129 return true;
6130 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6131 return Error(AlignLoc, "alignment must be a power of two");
6132
6133 // Alignment larger than the size of LDS is possible in theory, as long
6134 // as the linker manages to place to symbol at address 0, but we do want
6135 // to make sure the alignment fits nicely into a 32-bit integer.
6136 if (Alignment >= 1u << 31)
6137 return Error(AlignLoc, "alignment is too large");
6138 }
6139
6140 if (parseEOL())
6141 return true;
6142
6143 Symbol->redefineIfPossible();
6144 if (!Symbol->isUndefined())
6145 return Error(NameLoc, "invalid symbol redefinition");
6146
6147 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6148 return false;
6149}
6150
6151bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6152 StringRef IDVal = DirectiveID.getString();
6153
6154 if (isHsaAbi(getSTI())) {
6155 if (IDVal == ".amdhsa_kernel")
6156 return ParseDirectiveAMDHSAKernel();
6157
6158 if (IDVal == ".amdhsa_code_object_version")
6159 return ParseDirectiveAMDHSACodeObjectVersion();
6160
6161 // TODO: Restructure/combine with PAL metadata directive.
6163 return ParseDirectiveHSAMetadata();
6164 } else {
6165 if (IDVal == ".amd_kernel_code_t")
6166 return ParseDirectiveAMDKernelCodeT();
6167
6168 if (IDVal == ".amdgpu_hsa_kernel")
6169 return ParseDirectiveAMDGPUHsaKernel();
6170
6171 if (IDVal == ".amd_amdgpu_isa")
6172 return ParseDirectiveISAVersion();
6173
6175 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6176 Twine(" directive is "
6177 "not available on non-amdhsa OSes"))
6178 .str());
6179 }
6180 }
6181
6182 if (IDVal == ".amdgcn_target")
6183 return ParseDirectiveAMDGCNTarget();
6184
6185 if (IDVal == ".amdgpu_lds")
6186 return ParseDirectiveAMDGPULDS();
6187
6188 if (IDVal == PALMD::AssemblerDirectiveBegin)
6189 return ParseDirectivePALMetadataBegin();
6190
6191 if (IDVal == PALMD::AssemblerDirective)
6192 return ParseDirectivePALMetadata();
6193
6194 return true;
6195}
6196
6197bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6198 unsigned RegNo) {
6199
6200 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
6201 return isGFX9Plus();
6202
6203 // GFX10+ has 2 more SGPRs 104 and 105.
6204 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
6205 return hasSGPR104_SGPR105();
6206
6207 switch (RegNo) {
6208 case AMDGPU::SRC_SHARED_BASE_LO:
6209 case AMDGPU::SRC_SHARED_BASE:
6210 case AMDGPU::SRC_SHARED_LIMIT_LO:
6211 case AMDGPU::SRC_SHARED_LIMIT:
6212 case AMDGPU::SRC_PRIVATE_BASE_LO:
6213 case AMDGPU::SRC_PRIVATE_BASE:
6214 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
6215 case AMDGPU::SRC_PRIVATE_LIMIT:
6216 return isGFX9Plus();
6217 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
6218 return isGFX9Plus() && !isGFX11Plus();
6219 case AMDGPU::TBA:
6220 case AMDGPU::TBA_LO:
6221 case AMDGPU::TBA_HI:
6222 case AMDGPU::TMA:
6223 case AMDGPU::TMA_LO:
6224 case AMDGPU::TMA_HI:
6225 return !isGFX9Plus();
6226 case AMDGPU::XNACK_MASK:
6227 case AMDGPU::XNACK_MASK_LO:
6228 case AMDGPU::XNACK_MASK_HI:
6229 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6230 case AMDGPU::SGPR_NULL:
6231 return isGFX10Plus();
6232 default:
6233 break;
6234 }
6235
6236 if (isCI())
6237 return true;
6238
6239 if (isSI() || isGFX10Plus()) {
6240 // No flat_scr on SI.
6241 // On GFX10Plus flat scratch is not a valid register operand and can only be
6242 // accessed with s_setreg/s_getreg.
6243 switch (RegNo) {
6244 case AMDGPU::FLAT_SCR:
6245 case AMDGPU::FLAT_SCR_LO:
6246 case AMDGPU::FLAT_SCR_HI:
6247 return false;
6248 default:
6249 return true;
6250 }
6251 }
6252
6253 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6254 // SI/CI have.
6255 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
6256 return hasSGPR102_SGPR103();
6257
6258 return true;
6259}
6260
6261ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6262 StringRef Mnemonic,
6263 OperandMode Mode) {
6264 ParseStatus Res = parseVOPD(Operands);
6265 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6266 return Res;
6267
6268 // Try to parse with a custom parser
6269 Res = MatchOperandParserImpl(Operands, Mnemonic);
6270
6271 // If we successfully parsed the operand or if there as an error parsing,
6272 // we are done.
6273 //
6274 // If we are parsing after we reach EndOfStatement then this means we
6275 // are appending default values to the Operands list. This is only done
6276 // by custom parser, so we shouldn't continue on to the generic parsing.
6277 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6278 return Res;
6279
6280 SMLoc RBraceLoc;
6281 SMLoc LBraceLoc = getLoc();
6282 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6283 unsigned Prefix = Operands.size();
6284
6285 for (;;) {
6286 auto Loc = getLoc();
6287 Res = parseReg(Operands);
6288 if (Res.isNoMatch())
6289 Error(Loc, "expected a register");
6290 if (!Res.isSuccess())
6291 return ParseStatus::Failure;
6292
6293 RBraceLoc = getLoc();
6294 if (trySkipToken(AsmToken::RBrac))
6295 break;
6296
6297 if (!skipToken(AsmToken::Comma,
6298 "expected a comma or a closing square bracket"))
6299 return ParseStatus::Failure;
6300 }
6301
6302 if (Operands.size() - Prefix > 1) {
6303 Operands.insert(Operands.begin() + Prefix,
6304 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6305 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6306 }
6307
6308 return ParseStatus::Success;
6309 }
6310
6311 return parseRegOrImm(Operands);
6312}
6313
6314StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6315 // Clear any forced encodings from the previous instruction.
6316 setForcedEncodingSize(0);
6317 setForcedDPP(false);
6318 setForcedSDWA(false);
6319
6320 if (Name.ends_with("_e64_dpp")) {
6321 setForcedDPP(true);
6322 setForcedEncodingSize(64);
6323 return Name.substr(0, Name.size() - 8);
6324 } else if (Name.ends_with("_e64")) {
6325 setForcedEncodingSize(64);
6326 return Name.substr(0, Name.size() - 4);
6327 } else if (Name.ends_with("_e32")) {
6328 setForcedEncodingSize(32);
6329 return Name.substr(0, Name.size() - 4);
6330 } else if (Name.ends_with("_dpp")) {
6331 setForcedDPP(true);
6332 return Name.substr(0, Name.size() - 4);
6333 } else if (Name.ends_with("_sdwa")) {
6334 setForcedSDWA(true);
6335 return Name.substr(0, Name.size() - 5);
6336 }
6337 return Name;
6338}
6339
6340static void applyMnemonicAliases(StringRef &Mnemonic,
6341 const FeatureBitset &Features,
6342 unsigned VariantID);
6343
6344bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
6346 SMLoc NameLoc, OperandVector &Operands) {
6347 // Add the instruction mnemonic
6348 Name = parseMnemonicSuffix(Name);
6349
6350 // If the target architecture uses MnemonicAlias, call it here to parse
6351 // operands correctly.
6352 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6353
6354 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6355
6356 bool IsMIMG = Name.starts_with("image_");
6357
6358 while (!trySkipToken(AsmToken::EndOfStatement)) {
6359 OperandMode Mode = OperandMode_Default;
6360 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6361 Mode = OperandMode_NSA;
6362 ParseStatus Res = parseOperand(Operands, Name, Mode);
6363
6364 if (!Res.isSuccess()) {
6365 checkUnsupportedInstruction(Name, NameLoc);
6366 if (!Parser.hasPendingError()) {
6367 // FIXME: use real operand location rather than the current location.
6368 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6369 : "not a valid operand.";
6370 Error(getLoc(), Msg);
6371 }
6372 while (!trySkipToken(AsmToken::EndOfStatement)) {
6373 lex();
6374 }
6375 return true;
6376 }
6377
6378 // Eat the comma or space if there is one.
6379 trySkipToken(AsmToken::Comma);
6380 }
6381
6382 return false;
6383}
6384
6385//===----------------------------------------------------------------------===//
6386// Utility functions
6387//===----------------------------------------------------------------------===//
6388
6389ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6391 SMLoc S = getLoc();
6392 if (!trySkipId(Name))
6393 return ParseStatus::NoMatch;
6394
6395 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6396 return ParseStatus::Success;
6397}
6398
6399ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6400 int64_t &IntVal) {
6401
6402 if (!trySkipId(Prefix, AsmToken::Colon))
6403 return ParseStatus::NoMatch;
6404
6405 return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;
6406}
6407
6408ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6409 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6410 std::function<bool(int64_t &)> ConvertResult) {
6411 SMLoc S = getLoc();
6412 int64_t Value = 0;
6413
6414 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6415 if (!Res.isSuccess())
6416 return Res;
6417
6418 if (ConvertResult && !ConvertResult(Value)) {
6419 Error(S, "invalid " + StringRef(Prefix) + " value.");
6420 }
6421
6422 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6423 return ParseStatus::Success;
6424}
6425
6426ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6427 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6428 bool (*ConvertResult)(int64_t &)) {
6429 SMLoc S = getLoc();
6430 if (!trySkipId(Prefix, AsmToken::Colon))
6431 return ParseStatus::NoMatch;
6432
6433 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6434 return ParseStatus::Failure;
6435
6436 unsigned Val = 0;
6437 const unsigned MaxSize = 4;
6438
6439 // FIXME: How to verify the number of elements matches the number of src
6440 // operands?
6441 for (int I = 0; ; ++I) {
6442 int64_t Op;
6443 SMLoc Loc = getLoc();
6444 if (!parseExpr(Op))
6445 return ParseStatus::Failure;
6446
6447 if (Op != 0 && Op != 1)
6448 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6449
6450 Val |= (Op << I);
6451
6452 if (trySkipToken(AsmToken::RBrac))
6453 break;
6454
6455 if (I + 1 == MaxSize)
6456 return Error(getLoc(), "expected a closing square bracket");
6457
6458 if (!skipToken(AsmToken::Comma, "expected a comma"))
6459 return ParseStatus::Failure;
6460 }
6461
6462 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6463 return ParseStatus::Success;
6464}
6465
6466ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6468 AMDGPUOperand::ImmTy ImmTy) {
6469 int64_t Bit;
6470 SMLoc S = getLoc();
6471
6472 if (trySkipId(Name)) {
6473 Bit = 1;
6474 } else if (trySkipId("no", Name)) {
6475 Bit = 0;
6476 } else {
6477 return ParseStatus::NoMatch;
6478 }
6479
6480 if (Name == "r128" && !hasMIMG_R128())
6481 return Error(S, "r128 modifier is not supported on this GPU");
6482 if (Name == "a16" && !hasA16())
6483 return Error(S, "a16 modifier is not supported on this GPU");
6484
6485 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6486 ImmTy = AMDGPUOperand::ImmTyR128A16;
6487
6488 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6489 return ParseStatus::Success;
6490}
6491
6492unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6493 bool &Disabling) const {
6494 Disabling = Id.consume_front("no");
6495
6496 if (isGFX940() && !Mnemo.starts_with("s_")) {
6497 return StringSwitch<unsigned>(Id)
6498 .Case("nt", AMDGPU::CPol::NT)
6499 .Case("sc0", AMDGPU::CPol::SC0)
6500 .Case("sc1", AMDGPU::CPol::SC1)
6501 .Default(0);
6502 }
6503
6504 return StringSwitch<unsigned>(Id)
6505 .Case("dlc", AMDGPU::CPol::DLC)
6506 .Case("glc", AMDGPU::CPol::GLC)
6507 .Case("scc", AMDGPU::CPol::SCC)
6508 .Case("slc", AMDGPU::CPol::SLC)
6509 .Default(0);
6510}
6511
6512ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6513 if (isGFX12Plus()) {
6514 SMLoc StringLoc = getLoc();
6515
6516 int64_t CPolVal = 0;
6519
6520 for (;;) {
6521 if (ResTH.isNoMatch()) {
6522 int64_t TH;
6523 ResTH = parseTH(Operands, TH);
6524 if (ResTH.isFailure())
6525 return ResTH;
6526 if (ResTH.isSuccess()) {
6527 CPolVal |= TH;
6528 continue;
6529 }
6530 }
6531
6532 if (ResScope.isNoMatch()) {
6533 int64_t Scope;
6534 ResScope = parseScope(Operands, Scope);
6535 if (ResScope.isFailure())
6536 return ResScope;
6537 if (ResScope.isSuccess()) {
6538 CPolVal |= Scope;
6539 continue;
6540 }
6541 }
6542
6543 break;
6544 }
6545
6546 if (ResTH.isNoMatch() && ResScope.isNoMatch())
6547 return ParseStatus::NoMatch;
6548
6549 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6550 AMDGPUOperand::ImmTyCPol));
6551 return ParseStatus::Success;
6552 }
6553
6554 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6555 SMLoc OpLoc = getLoc();
6556 unsigned Enabled = 0, Seen = 0;
6557 for (;;) {
6558 SMLoc S = getLoc();
6559 bool Disabling;
6560 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6561 if (!CPol)
6562 break;
6563
6564 lex();
6565
6566 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6567 return Error(S, "dlc modifier is not supported on this GPU");
6568
6569 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6570 return Error(S, "scc modifier is not supported on this GPU");
6571
6572 if (Seen & CPol)
6573 return Error(S, "duplicate cache policy modifier");
6574
6575 if (!Disabling)
6576 Enabled |= CPol;
6577
6578 Seen |= CPol;
6579 }
6580
6581 if (!Seen)
6582 return ParseStatus::NoMatch;
6583
6584 Operands.push_back(
6585 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6586 return ParseStatus::Success;
6587}
6588
6589ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6590 int64_t &Scope) {
6591 Scope = AMDGPU::CPol::SCOPE_CU; // default;
6592
6594 SMLoc StringLoc;
6595 ParseStatus Res;
6596
6597 Res = parseStringWithPrefix("scope", Value, StringLoc);
6598 if (!Res.isSuccess())
6599 return Res;
6600
6602 .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU)
6603 .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE)
6604 .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV)
6605 .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS)
6606 .Default(0xffffffff);
6607
6608 if (Scope == 0xffffffff)
6609 return Error(StringLoc, "invalid scope value");
6610
6611 return ParseStatus::Success;
6612}
6613
6614ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6615 TH = AMDGPU::CPol::TH_RT; // default
6616
6618 SMLoc StringLoc;
6619 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
6620 if (!Res.isSuccess())
6621 return Res;
6622
6623 if (Value == "TH_DEFAULT")
6625 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
6626 Value == "TH_LOAD_NT_WB") {
6627 return Error(StringLoc, "invalid th value");
6628 } else if (Value.consume_front("TH_ATOMIC_")) {
6630 } else if (Value.consume_front("TH_LOAD_")) {
6632 } else if (Value.consume_front("TH_STORE_")) {
6634 } else {
6635 return Error(StringLoc, "invalid th value");
6636 }
6637
6638 if (Value == "BYPASS")
6640
6641 if (TH != 0) {
6648 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
6651 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
6653 .Default(0xffffffff);
6654 else
6660 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
6661 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
6662 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
6663 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
6664 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
6665 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
6666 .Default(0xffffffff);
6667 }
6668
6669 if (TH == 0xffffffff)
6670 return Error(StringLoc, "invalid th value");
6671
6672 return ParseStatus::Success;
6673}
6674
6676 MCInst& Inst, const OperandVector& Operands,
6677 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6678 AMDGPUOperand::ImmTy ImmT,
6679 int64_t Default = 0) {
6680 auto i = OptionalIdx.find(ImmT);
6681 if (i != OptionalIdx.end()) {
6682 unsigned Idx = i->second;
6683 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6684 } else {
6686 }
6687}
6688
6689ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6691 SMLoc &StringLoc) {
6692 if (!trySkipId(Prefix, AsmToken::Colon))
6693 return ParseStatus::NoMatch;
6694
6695 StringLoc = getLoc();
6696 return parseId(Value, "expected an identifier") ? ParseStatus::Success
6698}
6699
6700//===----------------------------------------------------------------------===//
6701// MTBUF format
6702//===----------------------------------------------------------------------===//
6703
6704bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6705 int64_t MaxVal,
6706 int64_t &Fmt) {
6707 int64_t Val;
6708 SMLoc Loc = getLoc();
6709
6710 auto Res = parseIntWithPrefix(Pref, Val);
6711 if (Res.isFailure())
6712 return false;
6713 if (Res.isNoMatch())
6714 return true;
6715
6716 if (Val < 0 || Val > MaxVal) {
6717 Error(Loc, Twine("out of range ", StringRef(Pref)));
6718 return false;
6719 }
6720
6721 Fmt = Val;
6722 return true;
6723}
6724
6725ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
6726 AMDGPUOperand::ImmTy ImmTy) {
6727 const char *Pref = "index_key";
6728 int64_t ImmVal = 0;
6729 SMLoc Loc = getLoc();
6730 auto Res = parseIntWithPrefix(Pref, ImmVal);
6731 if (!Res.isSuccess())
6732 return Res;
6733
6734 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6735 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6736
6737 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6738 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6739
6740 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
6741 return ParseStatus::Success;
6742}
6743
6744ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
6745 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6746}
6747
6748ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
6749 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6750}
6751
6752// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6753// values to live in a joint format operand in the MCInst encoding.
6754ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6755 using namespace llvm::AMDGPU::MTBUFFormat;
6756
6757 int64_t Dfmt = DFMT_UNDEF;
6758 int64_t Nfmt = NFMT_UNDEF;
6759
6760 // dfmt and nfmt can appear in either order, and each is optional.
6761 for (int I = 0; I < 2; ++I) {
6762 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6763 return ParseStatus::Failure;
6764
6765 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6766 return ParseStatus::Failure;
6767
6768 // Skip optional comma between dfmt/nfmt
6769 // but guard against 2 commas following each other.
6770 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6771 !peekToken().is(AsmToken::Comma)) {
6772 trySkipToken(AsmToken::Comma);
6773 }
6774 }
6775
6776 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6777 return ParseStatus::NoMatch;
6778
6779 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6780 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6781
6782 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6783 return ParseStatus::Success;
6784}
6785
6786ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6787 using namespace llvm::AMDGPU::MTBUFFormat;
6788
6789 int64_t Fmt = UFMT_UNDEF;
6790
6791 if (!tryParseFmt("format", UFMT_MAX, Fmt))
6792 return ParseStatus::Failure;
6793
6794 if (Fmt == UFMT_UNDEF)
6795 return ParseStatus::NoMatch;
6796
6797 Format = Fmt;
6798 return ParseStatus::Success;
6799}
6800
6801bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6802 int64_t &Nfmt,
6803 StringRef FormatStr,
6804 SMLoc Loc) {
6805 using namespace llvm::AMDGPU::MTBUFFormat;
6806 int64_t Format;
6807
6808 Format = getDfmt(FormatStr);
6809 if (Format != DFMT_UNDEF) {
6810 Dfmt = Format;
6811 return true;
6812 }
6813
6814 Format = getNfmt(FormatStr, getSTI());
6815 if (Format != NFMT_UNDEF) {
6816 Nfmt = Format;
6817 return true;
6818 }
6819
6820 Error(Loc, "unsupported format");
6821 return false;
6822}
6823
6824ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6825 SMLoc FormatLoc,
6826 int64_t &Format) {
6827 using namespace llvm::AMDGPU::MTBUFFormat;
6828
6829 int64_t Dfmt = DFMT_UNDEF;
6830 int64_t Nfmt = NFMT_UNDEF;
6831 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6832 return ParseStatus::Failure;
6833
6834 if (trySkipToken(AsmToken::Comma)) {
6835 StringRef Str;
6836 SMLoc Loc = getLoc();
6837 if (!parseId(Str, "expected a format string") ||
6838 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6839 return ParseStatus::Failure;
6840 if (Dfmt == DFMT_UNDEF)
6841 return Error(Loc, "duplicate numeric format");
6842 if (Nfmt == NFMT_UNDEF)
6843 return Error(Loc, "duplicate data format");
6844 }
6845
6846 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6847 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6848
6849 if (isGFX10Plus()) {
6850 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6851 if (Ufmt == UFMT_UNDEF)
6852 return Error(FormatLoc, "unsupported format");
6853 Format = Ufmt;
6854 } else {
6855 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6856 }
6857
6858 return ParseStatus::Success;
6859}
6860
6861ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6862 SMLoc Loc,
6863 int64_t &Format) {
6864 using namespace llvm::AMDGPU::MTBUFFormat;
6865
6866 auto Id = getUnifiedFormat(FormatStr, getSTI());
6867 if (Id == UFMT_UNDEF)
6868 return ParseStatus::NoMatch;
6869
6870 if (!isGFX10Plus())
6871 return Error(Loc, "unified format is not supported on this GPU");
6872
6873 Format = Id;
6874 return ParseStatus::Success;
6875}
6876
6877ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6878 using namespace llvm::AMDGPU::MTBUFFormat;
6879 SMLoc Loc = getLoc();
6880
6881 if (!parseExpr(Format))
6882 return ParseStatus::Failure;
6883 if (!isValidFormatEncoding(Format, getSTI()))
6884 return Error(Loc, "out of range format");
6885
6886 return ParseStatus::Success;
6887}
6888
6889ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6890 using namespace llvm::AMDGPU::MTBUFFormat;
6891
6892 if (!trySkipId("format", AsmToken::Colon))
6893 return ParseStatus::NoMatch;
6894
6895 if (trySkipToken(AsmToken::LBrac)) {
6896 StringRef FormatStr;
6897 SMLoc Loc = getLoc();
6898 if (!parseId(FormatStr, "expected a format string"))
6899 return ParseStatus::Failure;
6900
6901 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6902 if (Res.isNoMatch())
6903 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6904 if (!Res.isSuccess())
6905 return Res;
6906
6907 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6908 return ParseStatus::Failure;
6909
6910 return ParseStatus::Success;
6911 }
6912
6913 return parseNumericFormat(Format);
6914}
6915
6916ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6917 using namespace llvm::AMDGPU::MTBUFFormat;
6918
6919 int64_t Format = getDefaultFormatEncoding(getSTI());
6920 ParseStatus Res;
6921 SMLoc Loc = getLoc();
6922
6923 // Parse legacy format syntax.
6924 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6925 if (Res.isFailure())
6926 return Res;
6927
6928 bool FormatFound = Res.isSuccess();
6929
6930 Operands.push_back(
6931 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6932
6933 if (FormatFound)
6934 trySkipToken(AsmToken::Comma);
6935
6936 if (isToken(AsmToken::EndOfStatement)) {
6937 // We are expecting an soffset operand,
6938 // but let matcher handle the error.
6939 return ParseStatus::Success;
6940 }
6941
6942 // Parse soffset.
6943 Res = parseRegOrImm(Operands);
6944 if (!Res.isSuccess())
6945 return Res;
6946
6947 trySkipToken(AsmToken::Comma);
6948
6949 if (!FormatFound) {
6950 Res = parseSymbolicOrNumericFormat(Format);
6951 if (Res.isFailure())
6952 return Res;
6953 if (Res.isSuccess()) {
6954 auto Size = Operands.size();
6955 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6956 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6957 Op.setImm(Format);
6958 }
6959 return ParseStatus::Success;
6960 }
6961
6962 if (isId("format") && peekToken().is(AsmToken::Colon))
6963 return Error(getLoc(), "duplicate format");
6964 return ParseStatus::Success;
6965}
6966
6967ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
6968 ParseStatus Res =
6969 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
6970 if (Res.isNoMatch()) {
6971 Res = parseIntWithPrefix("inst_offset", Operands,
6972 AMDGPUOperand::ImmTyInstOffset);
6973 }
6974 return Res;
6975}
6976
6977ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
6978 ParseStatus Res =
6979 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
6980 if (Res.isNoMatch())
6981 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
6982 return Res;
6983}
6984
6985ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
6986 ParseStatus Res =
6987 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
6988 if (Res.isNoMatch()) {
6989 Res =
6990 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
6991 }
6992 return Res;
6993}
6994
6995//===----------------------------------------------------------------------===//
6996// Exp
6997//===----------------------------------------------------------------------===//
6998
6999void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7000 OptionalImmIndexMap OptionalIdx;
7001
7002 unsigned OperandIdx[4];
7003 unsigned EnMask = 0;
7004 int SrcIdx = 0;
7005
7006 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7007 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7008
7009 // Add the register arguments
7010 if (Op.isReg()) {
7011 assert(SrcIdx < 4);
7012 OperandIdx[SrcIdx] = Inst.size();
7013 Op.addRegOperands(Inst, 1);
7014 ++SrcIdx;
7015 continue;
7016 }
7017
7018 if (Op.isOff()) {
7019 assert(SrcIdx < 4);
7020 OperandIdx[SrcIdx] = Inst.size();
7021 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
7022 ++SrcIdx;
7023 continue;
7024 }
7025
7026 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7027 Op.addImmOperands(Inst, 1);
7028 continue;
7029 }
7030
7031 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7032 continue;
7033
7034 // Handle optional arguments
7035 OptionalIdx[Op.getImmTy()] = i;
7036 }
7037
7038 assert(SrcIdx == 4);
7039
7040 bool Compr = false;
7041 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7042 Compr = true;
7043 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7044 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
7045 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
7046 }
7047
7048 for (auto i = 0; i < SrcIdx; ++i) {
7049 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
7050 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7051 }
7052 }
7053
7054 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7055 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7056
7057 Inst.addOperand(MCOperand::createImm(EnMask));
7058}
7059
7060//===----------------------------------------------------------------------===//
7061// s_waitcnt
7062//===----------------------------------------------------------------------===//
7063
7064static bool
7066 const AMDGPU::IsaVersion ISA,
7067 int64_t &IntVal,
7068 int64_t CntVal,
7069 bool Saturate,
7070 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7071 unsigned (*decode)(const IsaVersion &Version, unsigned))
7072{
7073 bool Failed = false;
7074
7075 IntVal = encode(ISA, IntVal, CntVal);
7076 if (CntVal != decode(ISA, IntVal)) {
7077 if (Saturate) {
7078 IntVal = encode(ISA, IntVal, -1);
7079 } else {
7080 Failed = true;
7081 }
7082 }
7083 return Failed;
7084}
7085
7086bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7087
7088 SMLoc CntLoc = getLoc();
7089 StringRef CntName = getTokenStr();
7090
7091 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7092 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7093 return false;
7094
7095 int64_t CntVal;
7096 SMLoc ValLoc = getLoc();
7097 if (!parseExpr(CntVal))
7098 return false;
7099
7101
7102 bool Failed = true;
7103 bool Sat = CntName.ends_with("_sat");
7104
7105 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7106 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7107 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7108 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7109 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7110 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7111 } else {
7112 Error(CntLoc, "invalid counter name " + CntName);
7113 return false;
7114 }
7115
7116 if (Failed) {
7117 Error(ValLoc, "too large value for " + CntName);
7118 return false;
7119 }
7120
7121 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7122 return false;
7123
7124 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7125 if (isToken(AsmToken::EndOfStatement)) {
7126 Error(getLoc(), "expected a counter name");
7127 return false;
7128 }
7129 }
7130
7131 return true;
7132}
7133
7134ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7136 int64_t Waitcnt = getWaitcntBitMask(ISA);
7137 SMLoc S = getLoc();
7138
7139 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7140 while (!isToken(AsmToken::EndOfStatement)) {
7141 if (!parseCnt(Waitcnt))
7142 return ParseStatus::Failure;
7143 }
7144 } else {
7145 if (!parseExpr(Waitcnt))
7146 return ParseStatus::Failure;
7147 }
7148
7149 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7150 return ParseStatus::Success;
7151}
7152
7153bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7154 SMLoc FieldLoc = getLoc();
7155 StringRef FieldName = getTokenStr();
7156 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7157 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7158 return false;
7159
7160 SMLoc ValueLoc = getLoc();
7161 StringRef ValueName = getTokenStr();
7162 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7163 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7164 return false;
7165
7166 unsigned Shift;
7167 if (FieldName == "instid0") {
7168 Shift = 0;
7169 } else if (FieldName == "instskip") {
7170 Shift = 4;
7171 } else if (FieldName == "instid1") {
7172 Shift = 7;
7173 } else {
7174 Error(FieldLoc, "invalid field name " + FieldName);
7175 return false;
7176 }
7177
7178 int Value;
7179 if (Shift == 4) {
7180 // Parse values for instskip.
7182 .Case("SAME", 0)
7183 .Case("NEXT", 1)
7184 .Case("SKIP_1", 2)
7185 .Case("SKIP_2", 3)
7186 .Case("SKIP_3", 4)
7187 .Case("SKIP_4", 5)
7188 .Default(-1);
7189 } else {
7190 // Parse values for instid0 and instid1.
7192 .Case("NO_DEP", 0)
7193 .Case("VALU_DEP_1", 1)
7194 .Case("VALU_DEP_2", 2)
7195 .Case("VALU_DEP_3", 3)
7196 .Case("VALU_DEP_4", 4)
7197 .Case("TRANS32_DEP_1", 5)
7198 .Case("TRANS32_DEP_2", 6)
7199 .Case("TRANS32_DEP_3", 7)
7200 .Case("FMA_ACCUM_CYCLE_1", 8)
7201 .Case("SALU_CYCLE_1", 9)
7202 .Case("SALU_CYCLE_2", 10)
7203 .Case("SALU_CYCLE_3", 11)
7204 .Default(-1);
7205 }
7206 if (Value < 0) {
7207 Error(ValueLoc, "invalid value name " + ValueName);
7208 return false;
7209 }
7210
7211 Delay |= Value << Shift;
7212 return true;
7213}
7214
7215ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7216 int64_t Delay = 0;
7217 SMLoc S = getLoc();
7218
7219 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7220 do {
7221 if (!parseDelay(Delay))
7222 return ParseStatus::Failure;
7223 } while (trySkipToken(AsmToken::Pipe));
7224 } else {
7225 if (!parseExpr(Delay))
7226 return ParseStatus::Failure;
7227 }
7228
7229 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7230 return ParseStatus::Success;
7231}
7232
7233bool
7234AMDGPUOperand::isSWaitCnt() const {
7235 return isImm();
7236}
7237
7238bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7239
7240//===----------------------------------------------------------------------===//
7241// DepCtr
7242//===----------------------------------------------------------------------===//
7243
7244void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7245 StringRef DepCtrName) {
7246 switch (ErrorId) {
7247 case OPR_ID_UNKNOWN:
7248 Error(Loc, Twine("invalid counter name ", DepCtrName));
7249 return;
7250 case OPR_ID_UNSUPPORTED:
7251 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7252 return;
7253 case OPR_ID_DUPLICATE:
7254 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7255 return;
7256 case OPR_VAL_INVALID:
7257 Error(Loc, Twine("invalid value for ", DepCtrName));
7258 return;
7259 default:
7260 assert(false);
7261 }
7262}
7263
7264bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7265
7266 using namespace llvm::AMDGPU::DepCtr;
7267
7268 SMLoc DepCtrLoc = getLoc();
7269 StringRef DepCtrName = getTokenStr();
7270
7271 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7272 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7273 return false;
7274
7275 int64_t ExprVal;
7276 if (!parseExpr(ExprVal))
7277 return false;
7278
7279 unsigned PrevOprMask = UsedOprMask;
7280 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7281
7282 if (CntVal < 0) {
7283 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7284 return false;
7285 }
7286
7287 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7288 return false;
7289
7290 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7291 if (isToken(AsmToken::EndOfStatement)) {
7292 Error(getLoc(), "expected a counter name");
7293 return false;
7294 }
7295 }
7296
7297 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7298 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7299 return true;
7300}
7301
7302ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7303 using namespace llvm::AMDGPU::DepCtr;
7304
7305 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7306 SMLoc Loc = getLoc();
7307
7308 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7309 unsigned UsedOprMask = 0;
7310 while (!isToken(AsmToken::EndOfStatement)) {
7311 if (!parseDepCtr(DepCtr, UsedOprMask))
7312 return ParseStatus::Failure;
7313 }
7314 } else {
7315 if (!parseExpr(DepCtr))
7316 return ParseStatus::Failure;
7317 }
7318
7319 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7320 return ParseStatus::Success;
7321}
7322
7323bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7324
7325//===----------------------------------------------------------------------===//
7326// hwreg
7327//===----------------------------------------------------------------------===//
7328
7329ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7330 OperandInfoTy &Offset,
7331 OperandInfoTy &Width) {
7332 using namespace llvm::AMDGPU::Hwreg;
7333
7334 if (!trySkipId("hwreg", AsmToken::LParen))
7335 return ParseStatus::NoMatch;
7336
7337 // The register may be specified by name or using a numeric code
7338 HwReg.Loc = getLoc();
7339 if (isToken(AsmToken::Identifier) &&
7340 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7341 HwReg.IsSymbolic = true;
7342 lex(); // skip register name
7343 } else if (!parseExpr(HwReg.Val, "a register name")) {
7344 return ParseStatus::Failure;
7345 }
7346
7347 if (trySkipToken(AsmToken::RParen))
7348 return ParseStatus::Success;
7349
7350 // parse optional params
7351 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7352 return ParseStatus::Failure;
7353
7354 Offset.Loc = getLoc();
7355 if (!parseExpr(Offset.Val))
7356 return ParseStatus::Failure;
7357
7358 if (!skipToken(AsmToken::Comma, "expected a comma"))
7359 return ParseStatus::Failure;
7360
7361 Width.Loc = getLoc();
7362 if (!parseExpr(Width.Val) ||
7363 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7364 return ParseStatus::Failure;
7365
7366 return ParseStatus::Success;
7367}
7368
7369ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7370 using namespace llvm::AMDGPU::Hwreg;
7371
7372 int64_t ImmVal = 0;
7373 SMLoc Loc = getLoc();
7374
7375 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
7376 HwregId::Default);
7377 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
7378 HwregOffset::Default);
7379 struct : StructuredOpField {
7380 using StructuredOpField::StructuredOpField;
7381 bool validate(AMDGPUAsmParser &Parser) const override {
7382 if (!isUIntN(Width, Val - 1))
7383 return Error(Parser, "only values from 1 to 32 are legal");
7384 return true;
7385 }
7386 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
7387 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
7388
7389 if (Res.isNoMatch())
7390 Res = parseHwregFunc(HwReg, Offset, Width);
7391
7392 if (Res.isSuccess()) {
7393 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
7394 return ParseStatus::Failure;
7395 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
7396 }
7397
7398 if (Res.isNoMatch() &&
7399 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
7401
7402 if (!Res.isSuccess())
7403 return ParseStatus::Failure;
7404
7405 if (!isUInt<16>(ImmVal))
7406 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7407 Operands.push_back(
7408 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7409 return ParseStatus::Success;
7410}
7411
7412bool AMDGPUOperand::isHwreg() const {
7413 return isImmTy(ImmTyHwreg);
7414}
7415
7416//===----------------------------------------------------------------------===//
7417// sendmsg
7418//===----------------------------------------------------------------------===//
7419
7420bool
7421AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7422 OperandInfoTy &Op,
7423 OperandInfoTy &Stream) {
7424 using namespace llvm::AMDGPU::SendMsg;
7425
7426 Msg.Loc = getLoc();
7427 if (isToken(AsmToken::Identifier) &&
7428 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7429 Msg.IsSymbolic = true;
7430 lex(); // skip message name
7431 } else if (!parseExpr(Msg.Val, "a message name")) {
7432 return false;
7433 }
7434
7435 if (trySkipToken(AsmToken::Comma)) {
7436 Op.IsDefined = true;
7437 Op.Loc = getLoc();
7438 if (isToken(AsmToken::Identifier) &&
7439 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
7441 lex(); // skip operation name
7442 } else if (!parseExpr(Op.Val, "an operation name")) {
7443 return false;
7444 }
7445
7446 if (trySkipToken(AsmToken::Comma)) {
7447 Stream.IsDefined = true;
7448 Stream.Loc = getLoc();
7449 if (!parseExpr(Stream.Val))
7450 return false;
7451 }
7452 }
7453
7454 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7455}
7456
7457bool
7458AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7459 const OperandInfoTy &Op,
7460 const OperandInfoTy &Stream) {
7461 using namespace llvm::AMDGPU::SendMsg;
7462
7463 // Validation strictness depends on whether message is specified
7464 // in a symbolic or in a numeric form. In the latter case
7465 // only encoding possibility is checked.
7466 bool Strict = Msg.IsSymbolic;
7467
7468 if (Strict) {
7469 if (Msg.Val == OPR_ID_UNSUPPORTED) {
7470 Error(Msg.Loc, "specified message id is not supported on this GPU");
7471 return false;
7472 }
7473 } else {
7474 if (!isValidMsgId(Msg.Val, getSTI())) {
7475 Error(Msg.Loc, "invalid message id");
7476 return false;
7477 }
7478 }
7479 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
7480 if (Op.IsDefined) {
7481 Error(Op.Loc, "message does not support operations");
7482 } else {
7483 Error(Msg.Loc, "missing message operation");
7484 }
7485 return false;
7486 }
7487 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
7488 if (Op.Val == OPR_ID_UNSUPPORTED)
7489 Error(Op.Loc, "specified operation id is not supported on this GPU");
7490 else
7491 Error(Op.Loc, "invalid operation id");
7492 return false;
7493 }
7494 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
7495 Stream.IsDefined) {
7496 Error(Stream.Loc, "message operation does not support streams");
7497 return false;
7498 }
7499 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
7500 Error(Stream.Loc, "invalid message stream id");
7501 return false;
7502 }
7503 return true;
7504}
7505
7506ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7507 using namespace llvm::AMDGPU::SendMsg;
7508
7509 int64_t ImmVal = 0;
7510 SMLoc Loc = getLoc();
7511
7512 if (trySkipId("sendmsg", AsmToken::LParen)) {
7513 OperandInfoTy Msg(OPR_ID_UNKNOWN);
7514 OperandInfoTy Op(OP_NONE_);
7515 OperandInfoTy Stream(STREAM_ID_NONE_);
7516 if (parseSendMsgBody(Msg, Op, Stream) &&
7517 validateSendMsg(Msg, Op, Stream)) {
7518 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
7519 } else {
7520 return ParseStatus::Failure;
7521 }
7522 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7523 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7524 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7525 } else {
7526 return ParseStatus::Failure;
7527 }
7528
7529 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7530 return ParseStatus::Success;
7531}
7532
7533bool AMDGPUOperand::isSendMsg() const {
7534 return isImmTy(ImmTySendMsg);
7535}
7536
7537//===----------------------------------------------------------------------===//
7538// v_interp
7539//===----------------------------------------------------------------------===//
7540
7541ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7542 StringRef Str;
7543 SMLoc S = getLoc();
7544
7545 if (!parseId(Str))
7546 return ParseStatus::NoMatch;
7547
7548 int Slot = StringSwitch<int>(Str)
7549 .Case("p10", 0)
7550 .Case("p20", 1)
7551 .Case("p0", 2)
7552 .Default(-1);
7553
7554 if (Slot == -1)
7555 return Error(S, "invalid interpolation slot");
7556
7557 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
7558 AMDGPUOperand::ImmTyInterpSlot));
7559 return ParseStatus::Success;
7560}
7561
7562ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7563 StringRef Str;
7564 SMLoc S = getLoc();
7565
7566 if (!parseId(Str))
7567 return ParseStatus::NoMatch;
7568
7569 if (!Str.starts_with("attr"))
7570 return Error(S, "invalid interpolation attribute");
7571
7572 StringRef Chan = Str.take_back(2);
7573 int AttrChan = StringSwitch<int>(Chan)
7574 .Case(".x", 0)
7575 .Case(".y", 1)
7576 .Case(".z", 2)
7577 .Case(".w", 3)
7578 .Default(-1);
7579 if (AttrChan == -1)
7580 return Error(S, "invalid or missing interpolation attribute channel");
7581
7582 Str = Str.drop_back(2).drop_front(4);
7583
7584 uint8_t Attr;
7585 if (Str.getAsInteger(10, Attr))
7586 return Error(S, "invalid or missing interpolation attribute number");
7587
7588 if (Attr > 32)
7589 return Error(S, "out of bounds interpolation attribute number");
7590
7591 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
7592
7593 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
7594 AMDGPUOperand::ImmTyInterpAttr));
7595 Operands.push_back(AMDGPUOperand::CreateImm(
7596 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7597 return ParseStatus::Success;
7598}
7599
7600//===----------------------------------------------------------------------===//
7601// exp
7602//===----------------------------------------------------------------------===//
7603
7604ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7605 using namespace llvm::AMDGPU::Exp;
7606
7607 StringRef Str;
7608 SMLoc S = getLoc();
7609
7610 if (!parseId(Str))
7611 return ParseStatus::NoMatch;
7612
7613 unsigned Id = getTgtId(Str);
7614 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
7615 return Error(S, (Id == ET_INVALID)
7616 ? "invalid exp target"
7617 : "exp target is not supported on this GPU");
7618
7619 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
7620 AMDGPUOperand::ImmTyExpTgt));
7621 return ParseStatus::Success;
7622}
7623
7624//===----------------------------------------------------------------------===//
7625// parser helpers
7626//===----------------------------------------------------------------------===//
7627
7628bool
7629AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7630 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
7631}
7632
7633bool
7634AMDGPUAsmParser::isId(const StringRef Id) const {
7635 return isId(getToken(), Id);
7636}
7637
7638bool
7639AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7640 return getTokenKind() == Kind;
7641}
7642
7643StringRef AMDGPUAsmParser::getId() const {
7644 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7645}
7646
7647bool
7648AMDGPUAsmParser::trySkipId(const StringRef Id) {
7649 if (isId(Id)) {
7650 lex();
7651 return true;
7652 }
7653 return false;
7654}
7655
7656bool
7657AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7658 if (isToken(AsmToken::Identifier)) {
7659 StringRef Tok = getTokenStr();
7660 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
7661 lex();
7662 return true;
7663 }
7664 }
7665 return false;
7666}
7667
7668bool
7669AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7670 if (isId(Id) && peekToken().is(Kind)) {
7671 lex();
7672 lex();
7673 return true;
7674 }
7675 return false;
7676}
7677
7678bool
7679AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7680 if (isToken(Kind)) {
7681 lex();
7682 return true;
7683 }
7684 return false;
7685}
7686
7687bool
7688AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7689 const StringRef ErrMsg) {
7690 if (!trySkipToken(Kind)) {
7691 Error(getLoc(), ErrMsg);
7692 return false;
7693 }
7694 return true;
7695}
7696
7697bool
7698AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7699 SMLoc S = getLoc();
7700
7701 const MCExpr *Expr;
7702 if (Parser.parseExpression(Expr))
7703 return false;
7704
7705 if (Expr->evaluateAsAbsolute(Imm))
7706 return true;
7707
7708 if (Expected.empty()) {
7709 Error(S, "expected absolute expression");
7710 } else {
7711 Error(S, Twine("expected ", Expected) +
7712 Twine(" or an absolute expression"));
7713 }
7714 return false;
7715}
7716
7717bool
7718AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7719 SMLoc S = getLoc();
7720
7721 const MCExpr *Expr;
7722 if (Parser.parseExpression(Expr))
7723 return false;
7724
7725 int64_t IntVal;
7726 if (Expr->evaluateAsAbsolute(IntVal)) {
7727 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7728 } else {
7729 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7730 }
7731 return true;
7732}
7733
7734bool
7735AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7736 if (isToken(AsmToken::String)) {
7737 Val = getToken().getStringContents();
7738 lex();
7739 return true;
7740 } else {
7741 Error(getLoc(), ErrMsg);
7742 return false;
7743 }
7744}
7745
7746bool
7747AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7748 if (isToken(AsmToken::Identifier)) {
7749 Val = getTokenStr();
7750 lex();
7751 return true;
7752 } else {
7753 if (!ErrMsg.empty())
7754 Error(getLoc(), ErrMsg);
7755 return false;
7756 }
7757}
7758
7760AMDGPUAsmParser::getToken() const {
7761 return Parser.getTok();
7762}
7763
7764AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7765 return isToken(AsmToken::EndOfStatement)
7766 ? getToken()
7767 : getLexer().peekTok(ShouldSkipSpace);
7768}
7769
7770void
7771AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7772 auto TokCount = getLexer().peekTokens(Tokens);
7773
7774 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7775 Tokens[Idx] = AsmToken(AsmToken::Error, "");
7776}
7777
7779AMDGPUAsmParser::getTokenKind() const {
7780 return getLexer().getKind();
7781}
7782
7783SMLoc
7784AMDGPUAsmParser::getLoc() const {
7785 return getToken().getLoc();
7786}
7787
7789AMDGPUAsmParser::getTokenStr() const {
7790 return getToken().getString();
7791}
7792
7793void
7794AMDGPUAsmParser::lex() {
7795 Parser.Lex();
7796}
7797
7798SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7799 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7800}
7801
7802SMLoc
7803AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7804 const OperandVector &Operands) const {
7805 for (unsigned i = Operands.size() - 1; i > 0; --i) {
7806 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7807 if (Test(Op))
7808 return Op.getStartLoc();
7809 }
7810 return getInstLoc(Operands);
7811}
7812
7813SMLoc
7814AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7815 const OperandVector &Operands) const {
7816 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7817 return getOperandLoc(Test, Operands);
7818}
7819
7820SMLoc
7821AMDGPUAsmParser::getRegLoc(unsigned Reg,
7822 const OperandVector &Operands) const {
7823 auto Test = [=](const AMDGPUOperand& Op) {
7824 return Op.isRegKind() && Op.getReg() == Reg;
7825 };
7826 return getOperandLoc(Test, Operands);
7827}
7828
7829SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7830 bool SearchMandatoryLiterals) const {
7831 auto Test = [](const AMDGPUOperand& Op) {
7832 return Op.IsImmKindLiteral() || Op.isExpr();
7833 };
7834 SMLoc Loc = getOperandLoc(Test, Operands);
7835 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7836 Loc = getMandatoryLitLoc(Operands);
7837 return Loc;
7838}
7839
7840SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7841 auto Test = [](const AMDGPUOperand &Op) {
7842 return Op.IsImmKindMandatoryLiteral();
7843 };
7844 return getOperandLoc(Test, Operands);
7845}
7846
7847SMLoc
7848AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7849 auto Test = [](const AMDGPUOperand& Op) {
7850 return Op.isImmKindConst();
7851 };
7852 return getOperandLoc(Test, Operands);
7853}
7854
7856AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
7857 if (!trySkipToken(AsmToken::LCurly))
7858 return ParseStatus::NoMatch;
7859
7860 bool First = true;
7861 while (!trySkipToken(AsmToken::RCurly)) {
7862 if (!First &&
7863 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
7864 return ParseStatus::Failure;
7865
7866 StringRef Id = getTokenStr();
7867 SMLoc IdLoc = getLoc();
7868 if (!skipToken(AsmToken::Identifier, "field name expected") ||
7869 !skipToken(AsmToken::Colon, "colon expected"))
7870 return ParseStatus::Failure;
7871
7872 auto I =
7873 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
7874 if (I == Fields.end())
7875 return Error(IdLoc, "unknown field");
7876 if ((*I)->IsDefined)
7877 return Error(IdLoc, "duplicate field");
7878
7879 // TODO: Support symbolic values.
7880 (*I)->Loc = getLoc();
7881 if (!parseExpr((*I)->Val))
7882 return ParseStatus::Failure;
7883 (*I)->IsDefined = true;
7884
7885 First = false;
7886 }
7887 return ParseStatus::Success;
7888}
7889
7890bool AMDGPUAsmParser::validateStructuredOpFields(
7892 return all_of(Fields, [this](const StructuredOpField *F) {
7893 return F->validate(*this);
7894 });
7895}
7896
7897//===----------------------------------------------------------------------===//
7898// swizzle
7899//===----------------------------------------------------------------------===//
7900
7902static unsigned
7903encodeBitmaskPerm(const unsigned AndMask,
7904 const unsigned OrMask,
7905 const unsigned XorMask) {
7906 using namespace llvm::AMDGPU::Swizzle;
7907
7908 return BITMASK_PERM_ENC |
7909 (AndMask << BITMASK_AND_SHIFT) |
7910 (OrMask << BITMASK_OR_SHIFT) |
7911 (XorMask << BITMASK_XOR_SHIFT);
7912}
7913
7914bool
7915AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7916 const unsigned MinVal,
7917 const unsigned MaxVal,
7918 const StringRef ErrMsg,
7919 SMLoc &Loc) {
7920 if (!skipToken(AsmToken::Comma, "expected a comma")) {
7921 return false;
7922 }
7923 Loc = getLoc();
7924 if (!parseExpr(Op)) {
7925 return false;
7926 }
7927 if (Op < MinVal || Op > MaxVal) {
7928 Error(Loc, ErrMsg);
7929 return false;
7930 }
7931
7932 return true;
7933}
7934
7935bool
7936AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7937 const unsigned MinVal,
7938 const unsigned MaxVal,
7939 const StringRef ErrMsg) {
7940 SMLoc Loc;
7941 for (unsigned i = 0; i < OpNum; ++i) {
7942 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7943 return false;
7944 }
7945
7946 return true;
7947}
7948
7949bool
7950AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7951 using namespace llvm::AMDGPU::Swizzle;
7952
7953 int64_t Lane[LANE_NUM];
7954 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7955 "expected a 2-bit lane id")) {
7957 for (unsigned I = 0; I < LANE_NUM; ++I) {
7958 Imm |= Lane[I] << (LANE_SHIFT * I);
7959 }
7960 return true;
7961 }
7962 return false;
7963}
7964
7965bool
7966AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7967 using namespace llvm::AMDGPU::Swizzle;
7968
7969 SMLoc Loc;
7970 int64_t GroupSize;
7971 int64_t LaneIdx;
7972
7973 if (!parseSwizzleOperand(GroupSize,
7974 2, 32,
7975 "group size must be in the interval [2,32]",
7976 Loc)) {
7977 return false;
7978 }
7979 if (!isPowerOf2_64(GroupSize)) {
7980 Error(Loc, "group size must be a power of two");
7981 return false;
7982 }
7983 if (parseSwizzleOperand(LaneIdx,
7984 0, GroupSize - 1,
7985 "lane id must be in the interval [0,group size - 1]",
7986 Loc)) {
7987 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7988 return true;
7989 }
7990 return false;
7991}
7992
7993bool
7994AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7995 using namespace llvm::AMDGPU::Swizzle;
7996
7997 SMLoc Loc;
7998 int64_t GroupSize;
7999
8000 if (!parseSwizzleOperand(GroupSize,
8001 2, 32,
8002 "group size must be in the interval [2,32]",
8003 Loc)) {
8004 return false;
8005 }
8006 if (!isPowerOf2_64(GroupSize)) {
8007 Error(Loc, "group size must be a power of two");
8008 return false;
8009 }
8010
8011 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8012 return true;
8013}
8014
8015bool
8016AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8017 using namespace llvm::AMDGPU::Swizzle;
8018
8019 SMLoc Loc;
8020 int64_t GroupSize;
8021
8022 if (!parseSwizzleOperand(GroupSize,
8023 1, 16,
8024 "group size must be in the interval [1,16]",
8025 Loc)) {
8026 return false;
8027 }
8028 if (!isPowerOf2_64(GroupSize)) {
8029 Error(Loc, "group size must be a power of two");
8030 return false;
8031 }
8032
8033 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8034 return true;
8035}
8036
8037bool
8038AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8039 using namespace llvm::AMDGPU::Swizzle;
8040
8041 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8042 return false;
8043 }
8044
8045 StringRef Ctl;
8046 SMLoc StrLoc = getLoc();
8047 if (!parseString(Ctl)) {
8048 return false;
8049 }
8050 if (Ctl.size() != BITMASK_WIDTH) {
8051 Error(StrLoc, "expected a 5-character mask");
8052 return false;
8053 }
8054
8055 unsigned AndMask = 0;
8056 unsigned OrMask = 0;
8057 unsigned XorMask = 0;
8058
8059 for (size_t i = 0; i < Ctl.size(); ++i) {
8060 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8061 switch(Ctl[i]) {
8062 default:
8063 Error(StrLoc, "invalid mask");
8064 return false;
8065 case '0':
8066 break;
8067 case '1':
8068 OrMask |= Mask;
8069 break;
8070 case 'p':
8071 AndMask |= Mask;
8072 break;
8073 case 'i':
8074 AndMask |= Mask;
8075 XorMask |= Mask;
8076 break;
8077 }
8078 }
8079
8080 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8081 return true;
8082}
8083
8084bool
8085AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8086
8087 SMLoc OffsetLoc = getLoc();
8088
8089 if (!parseExpr(Imm, "a swizzle macro")) {
8090 return false;
8091 }
8092 if (!isUInt<16>(Imm)) {
8093 Error(OffsetLoc, "expected a 16-bit offset");
8094 return false;
8095 }
8096 return true;
8097}
8098
8099bool
8100AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8101 using namespace llvm::AMDGPU::Swizzle;
8102
8103 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8104
8105 SMLoc ModeLoc = getLoc();
8106 bool Ok = false;
8107
8108 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8109 Ok = parseSwizzleQuadPerm(Imm);
8110 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8111 Ok = parseSwizzleBitmaskPerm(Imm);
8112 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8113 Ok = parseSwizzleBroadcast(Imm);
8114 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8115 Ok = parseSwizzleSwap(Imm);
8116 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8117 Ok = parseSwizzleReverse(Imm);
8118 } else {
8119 Error(ModeLoc, "expected a swizzle mode");
8120 }
8121
8122 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8123 }
8124
8125 return false;
8126}
8127
8128ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8129 SMLoc S = getLoc();
8130 int64_t Imm = 0;
8131
8132 if (trySkipId("offset")) {
8133
8134 bool Ok = false;
8135 if (skipToken(AsmToken::Colon, "expected a colon")) {
8136 if (trySkipId("swizzle")) {
8137 Ok = parseSwizzleMacro(Imm);
8138 } else {
8139 Ok = parseSwizzleOffset(Imm);
8140 }
8141 }
8142
8143 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8144
8146 }
8147 return ParseStatus::NoMatch;
8148}
8149
8150bool
8151AMDGPUOperand::isSwizzle() const {
8152 return isImmTy(ImmTySwizzle);
8153}
8154
8155//===----------------------------------------------------------------------===//
8156// VGPR Index Mode
8157//===----------------------------------------------------------------------===//
8158
8159int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8160
8161 using namespace llvm::AMDGPU::VGPRIndexMode;
8162
8163 if (trySkipToken(AsmToken::RParen)) {
8164 return OFF;
8165 }
8166
8167 int64_t Imm = 0;
8168
8169 while (true) {
8170 unsigned Mode = 0;
8171 SMLoc S = getLoc();
8172
8173 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8174 if (trySkipId(IdSymbolic[ModeId])) {
8175 Mode = 1 << ModeId;
8176 break;
8177 }
8178 }
8179
8180 if (Mode == 0) {
8181 Error(S, (Imm == 0)?
8182 "expected a VGPR index mode or a closing parenthesis" :
8183 "expected a VGPR index mode");
8184 return UNDEF;
8185 }
8186
8187 if (Imm & Mode) {
8188 Error(S, "duplicate VGPR index mode");
8189 return UNDEF;
8190 }
8191 Imm |= Mode;
8192
8193 if (trySkipToken(AsmToken::RParen))
8194 break;
8195 if (!skipToken(AsmToken::Comma,
8196 "expected a comma or a closing parenthesis"))
8197 return UNDEF;
8198 }
8199
8200 return Imm;
8201}
8202
8203ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8204
8205 using namespace llvm::AMDGPU::VGPRIndexMode;
8206
8207 int64_t Imm = 0;
8208 SMLoc S = getLoc();
8209
8210 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8211 Imm = parseGPRIdxMacro();
8212 if (Imm == UNDEF)
8213 return ParseStatus::Failure;
8214 } else {
8215 if (getParser().parseAbsoluteExpression(Imm))
8216 return ParseStatus::Failure;
8217 if (Imm < 0 || !isUInt<4>(Imm))
8218 return Error(S, "invalid immediate: only 4-bit values are legal");
8219 }
8220
8221 Operands.push_back(
8222 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8223 return ParseStatus::Success;
8224}
8225
8226bool AMDGPUOperand::isGPRIdxMode() const {
8227 return isImmTy(ImmTyGprIdxMode);
8228}
8229
8230//===----------------------------------------------------------------------===//
8231// sopp branch targets
8232//===----------------------------------------------------------------------===//
8233
8234ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8235
8236 // Make sure we are not parsing something
8237 // that looks like a label or an expression but is not.
8238 // This will improve error messages.
8239 if (isRegister() || isModifier())
8240 return ParseStatus::NoMatch;
8241
8242 if (!parseExpr(Operands))
8243 return ParseStatus::Failure;
8244
8245 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8246 assert(Opr.isImm() || Opr.isExpr());
8247 SMLoc Loc = Opr.getStartLoc();
8248
8249 // Currently we do not support arbitrary expressions as branch targets.
8250 // Only labels and absolute expressions are accepted.
8251 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8252 Error(Loc, "expected an absolute expression or a label");
8253 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8254 Error(Loc, "expected a 16-bit signed jump offset");
8255 }
8256
8257 return ParseStatus::Success;
8258}
8259
8260//===----------------------------------------------------------------------===//
8261// Boolean holding registers
8262//===----------------------------------------------------------------------===//
8263
8264ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8265 return parseReg(Operands);
8266}
8267
8268//===----------------------------------------------------------------------===//
8269// mubuf
8270//===----------------------------------------------------------------------===//
8271
8272void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8273 const OperandVector &Operands,
8274 bool IsAtomic) {
8275 OptionalImmIndexMap OptionalIdx;
8276 unsigned FirstOperandIdx = 1;
8277 bool IsAtomicReturn = false;
8278
8279 if (IsAtomic) {
8280 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
8282 }
8283
8284 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8285 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8286
8287 // Add the register arguments
8288 if (Op.isReg()) {
8289 Op.addRegOperands(Inst, 1);
8290 // Insert a tied src for atomic return dst.
8291 // This cannot be postponed as subsequent calls to
8292 // addImmOperands rely on correct number of MC operands.
8293 if (IsAtomicReturn && i == FirstOperandIdx)
8294 Op.addRegOperands(Inst, 1);
8295 continue;
8296 }
8297
8298 // Handle the case where soffset is an immediate
8299 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8300 Op.addImmOperands(Inst, 1);
8301 continue;
8302 }
8303
8304 // Handle tokens like 'offen' which are sometimes hard-coded into the
8305 // asm string. There are no MCInst operands for these.
8306 if (Op.isToken()) {
8307 continue;
8308 }
8309 assert(Op.isImm());
8310
8311 // Handle optional arguments
8312 OptionalIdx[Op.getImmTy()] = i;
8313 }
8314
8315 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8316 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8317}
8318
8319//===----------------------------------------------------------------------===//
8320// smrd
8321//===----------------------------------------------------------------------===//
8322
8323bool AMDGPUOperand::isSMRDOffset8() const {
8324 return isImmLiteral() && isUInt<8>(getImm());
8325}
8326
8327bool AMDGPUOperand::isSMEMOffset() const {
8328 // Offset range is checked later by validator.
8329 return isImmLiteral();
8330}
8331
8332bool AMDGPUOperand::isSMRDLiteralOffset() const {
8333 // 32-bit literals are only supported on CI and we only want to use them
8334 // when the offset is > 8-bits.
8335 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8336}
8337
8338//===----------------------------------------------------------------------===//
8339// vop3
8340//===----------------------------------------------------------------------===//
8341
8342static bool ConvertOmodMul(int64_t &Mul) {
8343 if (Mul != 1 && Mul != 2 && Mul != 4)
8344 return false;
8345
8346 Mul >>= 1;
8347 return true;
8348}
8349
8350static bool ConvertOmodDiv(int64_t &Div) {
8351 if (Div == 1) {
8352 Div = 0;
8353 return true;
8354 }
8355
8356 if (Div == 2) {
8357 Div = 3;
8358 return true;
8359 }
8360
8361 return false;
8362}
8363
8364// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8365// This is intentional and ensures compatibility with sp3.
8366// See bug 35397 for details.
8367bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8368 if (BoundCtrl == 0 || BoundCtrl == 1) {
8369 if (!isGFX11Plus())
8370 BoundCtrl = 1;
8371 return true;
8372 }
8373 return false;
8374}
8375
8376void AMDGPUAsmParser::onBeginOfFile() {
8377 if (!getParser().getStreamer().getTargetStreamer() ||
8378 getSTI().getTargetTriple().getArch() == Triple::r600)
8379 return;
8380
8381 if (!getTargetStreamer().getTargetID())
8382 getTargetStreamer().initializeTargetID(getSTI(),
8383 getSTI().getFeatureString());
8384
8385 if (isHsaAbi(getSTI()))
8386 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8387}
8388
8389/// Parse AMDGPU specific expressions.
8390///
8391/// expr ::= or(expr, ...) |
8392/// max(expr, ...)
8393///
8394bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8396
8397 if (isToken(AsmToken::Identifier)) {
8398 StringRef TokenId = getTokenStr();
8399 AGVK VK = StringSwitch<AGVK>(TokenId)
8400 .Case("max", AGVK::AGVK_Max)
8401 .Case("or", AGVK::AGVK_Or)
8402 .Default(AGVK::AGVK_None);
8403
8404 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
8406 uint64_t CommaCount = 0;
8407 lex(); // Eat 'max'/'or'
8408 lex(); // Eat '('
8409 while (true) {
8410 if (trySkipToken(AsmToken::RParen)) {
8411 if (Exprs.empty()) {
8412 Error(getToken().getLoc(),
8413 "empty " + Twine(TokenId) + " expression");
8414 return true;
8415 }
8416 if (CommaCount + 1 != Exprs.size()) {
8417 Error(getToken().getLoc(),
8418 "mismatch of commas in " + Twine(TokenId) + " expression");
8419 return true;
8420 }
8421 Res = AMDGPUVariadicMCExpr::create(VK, Exprs, getContext());
8422 return false;
8423 }
8424 const MCExpr *Expr;
8425 if (getParser().parseExpression(Expr, EndLoc))
8426 return true;
8427 Exprs.push_back(Expr);
8428 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
8429 if (LastTokenWasComma)
8430 CommaCount++;
8431 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
8432 Error(getToken().getLoc(),
8433 "unexpected token in " + Twine(TokenId) + " expression");
8434 return true;
8435 }
8436 }
8437 }
8438 }
8439 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
8440}
8441
8442ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8443 StringRef Name = getTokenStr();
8444 if (Name == "mul") {
8445 return parseIntWithPrefix("mul", Operands,
8446 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8447 }
8448
8449 if (Name == "div") {
8450 return parseIntWithPrefix("div", Operands,
8451 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8452 }
8453
8454 return ParseStatus::NoMatch;
8455}
8456
8457// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8458// the number of src operands present, then copies that bit into src0_modifiers.
8459static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
8460 int Opc = Inst.getOpcode();
8461 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8462 if (OpSelIdx == -1)
8463 return;
8464
8465 int SrcNum;
8466 const int Ops[] = { AMDGPU::OpName::src0,
8467 AMDGPU::OpName::src1,
8468 AMDGPU::OpName::src2 };
8469 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8470 ++SrcNum)
8471 ;
8472 assert(SrcNum > 0);
8473
8474 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8475
8476 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
8477 if (DstIdx == -1)
8478 return;
8479
8480 const MCOperand &DstOp = Inst.getOperand(DstIdx);
8481 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8482 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8483 if (DstOp.isReg() &&
8484 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
8485 if (AMDGPU::isHi(DstOp.getReg(), MRI))
8486 ModVal |= SISrcMods::DST_OP_SEL;
8487 } else {
8488 if ((OpSel & (1 << SrcNum)) != 0)
8489 ModVal |= SISrcMods::DST_OP_SEL;
8490 }
8491 Inst.getOperand(ModIdx).setImm(ModVal);
8492}
8493
8494void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8495 const OperandVector &Operands) {
8496 cvtVOP3P(Inst, Operands);
8497 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8498}
8499
8500void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8501 OptionalImmIndexMap &OptionalIdx) {
8502 cvtVOP3P(Inst, Operands, OptionalIdx);
8503 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8504}
8505
8506static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8507 return
8508 // 1. This operand is input modifiers
8509 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8510 // 2. This is not last operand
8511 && Desc.NumOperands > (OpNum + 1)
8512 // 3. Next operand is register class
8513 && Desc.operands()[OpNum + 1].RegClass != -1
8514 // 4. Next register is not tied to any other operand
8515 && Desc.getOperandConstraint(OpNum + 1,
8516 MCOI::OperandConstraint::TIED_TO) == -1;
8517}
8518
8519void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8520{
8521 OptionalImmIndexMap OptionalIdx;
8522 unsigned Opc = Inst.getOpcode();
8523
8524 unsigned I = 1;
8525 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8526 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8527 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8528 }
8529
8530 for (unsigned E = Operands.size(); I != E; ++I) {
8531 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8533 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8534 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
8535 Op.isInterpAttrChan()) {
8536 Inst.addOperand(MCOperand::createImm(Op.getImm()));
8537 } else if (Op.isImmModifier()) {
8538 OptionalIdx[Op.getImmTy()] = I;
8539 } else {
8540 llvm_unreachable("unhandled operand type");
8541 }
8542 }
8543
8544 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
8545 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8546 AMDGPUOperand::ImmTyHigh);
8547
8548 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8549 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8550 AMDGPUOperand::ImmTyClampSI);
8551
8552 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8553 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8554 AMDGPUOperand::ImmTyOModSI);
8555}
8556
8557void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8558{
8559 OptionalImmIndexMap OptionalIdx;
8560 unsigned Opc = Inst.getOpcode();
8561
8562 unsigned I = 1;
8563 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8564 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8565 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8566 }
8567
8568 for (unsigned E = Operands.size(); I != E; ++I) {
8569 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8571 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8572 } else if (Op.isImmModifier()) {
8573 OptionalIdx[Op.getImmTy()] = I;
8574 } else {
8575 llvm_unreachable("unhandled operand type");
8576 }
8577 }
8578
8579 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8580
8581 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8582 if (OpSelIdx != -1)
8583 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8584
8585 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8586
8587 if (OpSelIdx == -1)
8588 return;
8589
8590 const int Ops[] = { AMDGPU::OpName::src0,
8591 AMDGPU::OpName::src1,
8592 AMDGPU::OpName::src2 };
8593 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8594 AMDGPU::OpName::src1_modifiers,
8595 AMDGPU::OpName::src2_modifiers };
8596
8597 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8598
8599 for (int J = 0; J < 3; ++J) {
8600 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8601 if (OpIdx == -1)
8602 break;
8603
8604 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8605 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8606
8607 if ((OpSel & (1 << J)) != 0)
8608 ModVal |= SISrcMods::OP_SEL_0;
8609 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8610 (OpSel & (1 << 3)) != 0)
8611 ModVal |= SISrcMods::DST_OP_SEL;
8612
8613 Inst.getOperand(ModIdx).setImm(ModVal);
8614 }
8615}
8616
8617void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8618 OptionalImmIndexMap &OptionalIdx) {
8619 unsigned Opc = Inst.getOpcode();
8620
8621 unsigned I = 1;
8622 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8623 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8624 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8625 }
8626
8627 for (unsigned E = Operands.size(); I != E; ++I) {
8628 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8630 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8631 } else if (Op.isImmModifier()) {
8632 OptionalIdx[Op.getImmTy()] = I;
8633 } else if (Op.isRegOrImm()) {
8634 Op.addRegOrImmOperands(Inst, 1);
8635 } else {
8636 llvm_unreachable("unhandled operand type");
8637 }
8638 }
8639
8640 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
8641 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
8642 Inst.addOperand(Inst.getOperand(0));
8643 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8644 AMDGPUOperand::ImmTyByteSel);
8645 }
8646
8647 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8648 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8649 AMDGPUOperand::ImmTyClampSI);
8650
8651 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8652 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8653 AMDGPUOperand::ImmTyOModSI);
8654
8655 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8656 // it has src2 register operand that is tied to dst operand
8657 // we don't allow modifiers for this operand in assembler so src2_modifiers
8658 // should be 0.
8659 if (isMAC(Opc)) {
8660 auto it = Inst.begin();
8661 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8662 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8663 ++it;
8664 // Copy the operand to ensure it's not invalidated when Inst grows.
8665 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8666 }
8667}
8668
8669void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8670 OptionalImmIndexMap OptionalIdx;
8671 cvtVOP3(Inst, Operands, OptionalIdx);
8672}
8673
8674void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8675 OptionalImmIndexMap &OptIdx) {
8676 const int Opc = Inst.getOpcode();
8677 const MCInstrDesc &Desc = MII.get(Opc);
8678
8679 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8680
8681 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8682 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8683 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
8684 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
8685 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8686 Inst.addOperand(Inst.getOperand(0));
8687 }
8688
8689 // Adding vdst_in operand is already covered for these DPP instructions in
8690 // cvtVOP3DPP.
8691 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
8692 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8693 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8694 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8695 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 ||
8696 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
8697 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
8698 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
8699 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
8700 assert(!IsPacked);
8701 Inst.addOperand(Inst.getOperand(0));
8702 }
8703
8704 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8705 // instruction, and then figure out where to actually put the modifiers
8706
8707 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8708 if (OpSelIdx != -1) {
8709 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8710 }
8711
8712 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8713 if (OpSelHiIdx != -1) {
8714 int DefaultVal = IsPacked ? -1 : 0;
8715 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8716 DefaultVal);
8717 }
8718
8719 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8720 if (NegLoIdx != -1)
8721 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8722
8723 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8724 if (NegHiIdx != -1)
8725 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8726
8727 const int Ops[] = { AMDGPU::OpName::src0,
8728 AMDGPU::OpName::src1,
8729 AMDGPU::OpName::src2 };
8730 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8731 AMDGPU::OpName::src1_modifiers,
8732 AMDGPU::OpName::src2_modifiers };
8733
8734 unsigned OpSel = 0;
8735 unsigned OpSelHi = 0;
8736 unsigned NegLo = 0;
8737 unsigned NegHi = 0;
8738
8739 if (OpSelIdx != -1)
8740 OpSel = Inst.getOperand(OpSelIdx).getImm();
8741
8742 if (OpSelHiIdx != -1)
8743 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8744
8745 if (NegLoIdx != -1)
8746 NegLo = Inst.getOperand(NegLoIdx).getImm();
8747
8748 if (NegHiIdx != -1)
8749 NegHi = Inst.getOperand(NegHiIdx).getImm();
8750
8751 for (int J = 0; J < 3; ++J) {
8752 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8753 if (OpIdx == -1)
8754 break;
8755
8756 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8757
8758 if (ModIdx == -1)
8759 continue;
8760
8761 uint32_t ModVal = 0;
8762
8763 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
8764 if (SrcOp.isReg() && getMRI()
8765 ->getRegClass(AMDGPU::VGPR_16RegClassID)
8766 .contains(SrcOp.getReg())) {
8767 bool VGPRSuffixIsHi = AMDGPU::isHi(SrcOp.getReg(), *getMRI());
8768 if (VGPRSuffixIsHi)
8769 ModVal |= SISrcMods::OP_SEL_0;
8770 } else {
8771 if ((OpSel & (1 << J)) != 0)
8772 ModVal |= SISrcMods::OP_SEL_0;
8773 }
8774
8775 if ((OpSelHi & (1 << J)) != 0)
8776 ModVal |= SISrcMods::OP_SEL_1;
8777
8778 if ((NegLo & (1 << J)) != 0)
8779 ModVal |= SISrcMods::NEG;
8780
8781 if ((NegHi & (1 << J)) != 0)
8782 ModVal |= SISrcMods::NEG_HI;
8783
8784 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8785 }
8786}
8787
8788void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8789 OptionalImmIndexMap OptIdx;
8790 cvtVOP3(Inst, Operands, OptIdx);
8791 cvtVOP3P(Inst, Operands, OptIdx);
8792}
8793
8795 unsigned i, unsigned Opc, unsigned OpName) {
8796 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
8797 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8798 else
8799 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
8800}
8801
8802void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
8803 unsigned Opc = Inst.getOpcode();
8804
8805 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
8806 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
8807 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
8808 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
8809 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
8810
8811 OptionalImmIndexMap OptIdx;
8812 for (unsigned i = 5; i < Operands.size(); ++i) {
8813 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8814 OptIdx[Op.getImmTy()] = i;
8815 }
8816
8817 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
8818 addOptionalImmOperand(Inst, Operands, OptIdx,
8819 AMDGPUOperand::ImmTyIndexKey8bit);
8820
8821 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
8822 addOptionalImmOperand(Inst, Operands, OptIdx,
8823 AMDGPUOperand::ImmTyIndexKey16bit);
8824
8825 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8826 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI);
8827
8828 cvtVOP3P(Inst, Operands, OptIdx);
8829}
8830
8831//===----------------------------------------------------------------------===//
8832// VOPD
8833//===----------------------------------------------------------------------===//
8834
8835ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8836 if (!hasVOPD(getSTI()))
8837 return ParseStatus::NoMatch;
8838
8839 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8840 SMLoc S = getLoc();
8841 lex();
8842 lex();
8843 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8844 SMLoc OpYLoc = getLoc();
8845 StringRef OpYName;
8846 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8847 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8848 return ParseStatus::Success;
8849 }
8850 return Error(OpYLoc, "expected a VOPDY instruction after ::");
8851 }
8852 return ParseStatus::NoMatch;
8853}
8854
8855// Create VOPD MCInst operands using parsed assembler operands.
8856void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8857 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8858 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8859 if (Op.isReg()) {
8860 Op.addRegOperands(Inst, 1);
8861 return;
8862 }
8863 if (Op.isImm()) {
8864 Op.addImmOperands(Inst, 1);
8865 return;
8866 }
8867 llvm_unreachable("Unhandled operand type in cvtVOPD");
8868 };
8869
8870 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8871
8872 // MCInst operands are ordered as follows:
8873 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8874
8875 for (auto CompIdx : VOPD::COMPONENTS) {
8876 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8877 }
8878
8879 for (auto CompIdx : VOPD::COMPONENTS) {
8880 const auto &CInfo = InstInfo[CompIdx];
8881 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8882 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8883 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8884 if (CInfo.hasSrc2Acc())
8885 addOp(CInfo.getIndexOfDstInParsedOperands());
8886 }
8887}
8888
8889//===----------------------------------------------------------------------===//
8890// dpp
8891//===----------------------------------------------------------------------===//
8892
8893bool AMDGPUOperand::isDPP8() const {
8894 return isImmTy(ImmTyDPP8);
8895}
8896
8897bool AMDGPUOperand::isDPPCtrl() const {
8898 using namespace AMDGPU::DPP;
8899
8900 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8901 if (result) {
8902 int64_t Imm = getImm();
8903 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8904 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8905 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8906 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8907 (Imm == DppCtrl::WAVE_SHL1) ||
8908 (Imm == DppCtrl::WAVE_ROL1) ||
8909 (Imm == DppCtrl::WAVE_SHR1) ||
8910 (Imm == DppCtrl::WAVE_ROR1) ||
8911 (Imm == DppCtrl::ROW_MIRROR) ||
8912 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8913 (Imm == DppCtrl::BCAST15) ||
8914 (Imm == DppCtrl::BCAST31) ||
8915 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8916 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8917 }
8918 return false;
8919}
8920
8921//===----------------------------------------------------------------------===//
8922// mAI
8923//===----------------------------------------------------------------------===//
8924
8925bool AMDGPUOperand::isBLGP() const {
8926 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8927}
8928
8929bool AMDGPUOperand::isS16Imm() const {
8930 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8931}
8932
8933bool AMDGPUOperand::isU16Imm() const {
8934 return isImmLiteral() && isUInt<16>(getImm());
8935}
8936
8937//===----------------------------------------------------------------------===//
8938// dim
8939//===----------------------------------------------------------------------===//
8940
8941bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8942 // We want to allow "dim:1D" etc.,
8943 // but the initial 1 is tokenized as an integer.
8944 std::string Token;
8945 if (isToken(AsmToken::Integer)) {
8946 SMLoc Loc = getToken().getEndLoc();
8947 Token = std::string(getTokenStr());
8948 lex();
8949 if (getLoc() != Loc)
8950 return false;
8951 }
8952
8953 StringRef Suffix;
8954 if (!parseId(Suffix))
8955 return false;
8956 Token += Suffix;
8957
8958 StringRef DimId = Token;
8959 if (DimId.starts_with("SQ_RSRC_IMG_"))
8960 DimId = DimId.drop_front(12);
8961
8963 if (!DimInfo)
8964 return false;
8965
8966 Encoding = DimInfo->Encoding;
8967 return true;
8968}
8969
8970ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8971 if (!isGFX10Plus())
8972 return ParseStatus::NoMatch;
8973
8974 SMLoc S = getLoc();
8975
8976 if (!trySkipId("dim", AsmToken::Colon))
8977 return ParseStatus::NoMatch;
8978
8979 unsigned Encoding;
8980 SMLoc Loc = getLoc();
8981 if (!parseDimId(Encoding))
8982 return Error(Loc, "invalid dim value");
8983
8984 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8985 AMDGPUOperand::ImmTyDim));
8986 return ParseStatus::Success;
8987}
8988
8989//===----------------------------------------------------------------------===//
8990// dpp
8991//===----------------------------------------------------------------------===//
8992
8993ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8994 SMLoc S = getLoc();
8995
8996 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8997 return ParseStatus::NoMatch;
8998
8999 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9000
9001 int64_t Sels[8];
9002
9003 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9004 return ParseStatus::Failure;
9005
9006 for (size_t i = 0; i < 8; ++i) {
9007 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9008 return ParseStatus::Failure;
9009
9010 SMLoc Loc = getLoc();
9011 if (getParser().parseAbsoluteExpression(Sels[i]))
9012 return ParseStatus::Failure;
9013 if (0 > Sels[i] || 7 < Sels[i])
9014 return Error(Loc, "expected a 3-bit value");
9015 }
9016
9017 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9018 return ParseStatus::Failure;
9019
9020 unsigned DPP8 = 0;
9021 for (size_t i = 0; i < 8; ++i)
9022 DPP8 |= (Sels[i] << (i * 3));
9023
9024 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9025 return ParseStatus::Success;
9026}
9027
9028bool
9029AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9030 const OperandVector &Operands) {
9031 if (Ctrl == "row_newbcast")
9032 return isGFX90A();
9033
9034 if (Ctrl == "row_share" ||
9035 Ctrl == "row_xmask")
9036 return isGFX10Plus();
9037
9038 if (Ctrl == "wave_shl" ||
9039 Ctrl == "wave_shr" ||
9040 Ctrl == "wave_rol" ||
9041 Ctrl == "wave_ror" ||
9042 Ctrl == "row_bcast")
9043 return isVI() || isGFX9();
9044
9045 return Ctrl == "row_mirror" ||
9046 Ctrl == "row_half_mirror" ||
9047 Ctrl == "quad_perm" ||
9048 Ctrl == "row_shl" ||
9049 Ctrl == "row_shr" ||
9050 Ctrl == "row_ror";
9051}
9052
9053int64_t
9054AMDGPUAsmParser::parseDPPCtrlPerm() {
9055 // quad_perm:[%d,%d,%d,%d]
9056
9057 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9058 return -1;
9059
9060 int64_t Val = 0;
9061 for (int i = 0; i < 4; ++i) {
9062 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9063 return -1;
9064
9065 int64_t Temp;
9066 SMLoc Loc = getLoc();
9067 if (getParser().parseAbsoluteExpression(Temp))
9068 return -1;
9069 if (Temp < 0 || Temp > 3) {
9070 Error(Loc, "expected a 2-bit value");
9071 return -1;
9072 }
9073
9074 Val += (Temp << i * 2);
9075 }
9076
9077 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9078 return -1;
9079
9080 return Val;
9081}
9082
9083int64_t
9084AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9085 using namespace AMDGPU::DPP;
9086
9087 // sel:%d
9088
9089 int64_t Val;
9090 SMLoc Loc = getLoc();
9091
9092 if (getParser().parseAbsoluteExpression(Val))
9093 return -1;
9094
9095 struct DppCtrlCheck {
9096 int64_t Ctrl;
9097 int Lo;
9098 int Hi;
9099 };
9100
9101 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9102 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9103 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9104 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9105 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9106 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9107 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9108 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9109 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9110 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9111 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9112 .Default({-1, 0, 0});
9113
9114 bool Valid;
9115 if (Check.Ctrl == -1) {
9116 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
9117 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9118 } else {
9119 Valid = Check.Lo <= Val && Val <= Check.Hi;
9120 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
9121 }
9122
9123 if (!Valid) {
9124 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
9125 return -1;
9126 }
9127
9128 return Val;
9129}
9130
9131ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
9132 using namespace AMDGPU::DPP;
9133
9134 if (!isToken(AsmToken::Identifier) ||
9135 !isSupportedDPPCtrl(getTokenStr(), Operands))
9136 return ParseStatus::NoMatch;
9137
9138 SMLoc S = getLoc();
9139 int64_t Val = -1;
9141
9142 parseId(Ctrl);
9143
9144 if (Ctrl == "row_mirror") {
9145 Val = DppCtrl::ROW_MIRROR;
9146 } else if (Ctrl == "row_half_mirror") {
9147 Val = DppCtrl::ROW_HALF_MIRROR;
9148 } else {
9149 if (skipToken(AsmToken::Colon, "expected a colon")) {
9150 if (Ctrl == "quad_perm") {
9151 Val = parseDPPCtrlPerm();
9152 } else {
9153 Val = parseDPPCtrlSel(Ctrl);
9154 }
9155 }
9156 }
9157
9158 if (Val == -1)
9159 return ParseStatus::Failure;
9160
9161 Operands.push_back(
9162 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9163 return ParseStatus::Success;
9164}
9165
9166void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
9167 bool IsDPP8) {
9168 OptionalImmIndexMap OptionalIdx;
9169 unsigned Opc = Inst.getOpcode();
9170 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9171
9172 // MAC instructions are special because they have 'old'
9173 // operand which is not tied to dst (but assumed to be).
9174 // They also have dummy unused src2_modifiers.
9175 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
9176 int Src2ModIdx =
9177 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
9178 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9179 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
9180
9181 unsigned I = 1;
9182 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9183 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9184 }
9185
9186 int Fi = 0;
9187 for (unsigned E = Operands.size(); I != E; ++I) {
9188
9189 if (IsMAC) {
9190 int NumOperands = Inst.getNumOperands();
9191 if (OldIdx == NumOperands) {
9192 // Handle old operand
9193 constexpr int DST_IDX = 0;
9194 Inst.addOperand(Inst.getOperand(DST_IDX));
9195 } else if (Src2ModIdx == NumOperands) {
9196 // Add unused dummy src2_modifiers
9198 }
9199 }
9200
9201 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9202 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
9203 Inst.addOperand(Inst.getOperand(0));
9204 }
9205
9206 bool IsVOP3CvtSrDpp =
9207 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9208 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9209 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9210 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9211 if (IsVOP3CvtSrDpp) {
9212 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
9215 }
9216 }
9217
9218 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9220 if (TiedTo != -1) {
9221 assert((unsigned)TiedTo < Inst.getNumOperands());
9222 // handle tied old or src2 for MAC instructions
9223 Inst.addOperand(Inst.getOperand(TiedTo));
9224 }
9225 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9226 // Add the register arguments
9227 if (IsDPP8 && Op.isDppFI()) {
9228 Fi = Op.getImm();
9229 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9230 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9231 } else if (Op.isReg()) {
9232 Op.addRegOperands(Inst, 1);
9233 } else if (Op.isImm() &&
9234 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
9235 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
9236 Op.addImmOperands(Inst, 1);
9237 } else if (Op.isImm()) {
9238 OptionalIdx[Op.getImmTy()] = I;
9239 } else {
9240 llvm_unreachable("unhandled operand type");
9241 }
9242 }
9243
9244 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel))
9245 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9246 AMDGPUOperand::ImmTyByteSel);
9247
9248 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9249 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
9250
9251 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9252 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
9253
9254 if (Desc.TSFlags & SIInstrFlags::VOP3P)
9255 cvtVOP3P(Inst, Operands, OptionalIdx);
9256 else if (Desc.TSFlags & SIInstrFlags::VOP3)
9257 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
9258 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9259 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9260 }
9261
9262 if (IsDPP8) {
9263 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
9264 using namespace llvm::AMDGPU::DPP;
9265 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9266 } else {
9267 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
9268 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9269 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9270 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9271
9272 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
9273 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9274 AMDGPUOperand::ImmTyDppFI);
9275 }
9276}
9277
9278void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
9279 OptionalImmIndexMap OptionalIdx;
9280
9281 unsigned I = 1;
9282 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9283 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9284 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9285 }
9286
9287 int Fi = 0;
9288 for (unsigned E = Operands.size(); I != E; ++I) {
9289 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9291 if (TiedTo != -1) {
9292 assert((unsigned)TiedTo < Inst.getNumOperands());
9293 // handle tied old or src2 for MAC instructions
9294 Inst.addOperand(Inst.getOperand(TiedTo));
9295 }
9296 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9297 // Add the register arguments
9298 if (Op.isReg() && validateVccOperand(Op.getReg())) {
9299 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
9300 // Skip it.
9301 continue;
9302 }
9303
9304 if (IsDPP8) {
9305 if (Op.isDPP8()) {
9306 Op.addImmOperands(Inst, 1);
9307 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9308 Op.addRegWithFPInputModsOperands(Inst, 2);
9309 } else if (Op.isDppFI()) {
9310 Fi = Op.getImm();
9311 } else if (Op.isReg()) {
9312 Op.addRegOperands(Inst, 1);
9313 } else {
9314 llvm_unreachable("Invalid operand type");
9315 }
9316 } else {
9318 Op.addRegWithFPInputModsOperands(Inst, 2);
9319 } else if (Op.isReg()) {
9320 Op.addRegOperands(Inst, 1);
9321 } else if (Op.isDPPCtrl()) {
9322 Op.addImmOperands(Inst, 1);
9323 } else if (Op.isImm()) {
9324 // Handle optional arguments
9325 OptionalIdx[Op.getImmTy()] = I;
9326 } else {
9327 llvm_unreachable("Invalid operand type");
9328 }
9329 }
9330 }
9331
9332 if (IsDPP8) {
9333 using namespace llvm::AMDGPU::DPP;
9334 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9335 } else {
9336 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9337 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9338 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9339 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
9340 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9341 AMDGPUOperand::ImmTyDppFI);
9342 }
9343 }
9344}
9345
9346//===----------------------------------------------------------------------===//
9347// sdwa
9348//===----------------------------------------------------------------------===//
9349
9350ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9351 StringRef Prefix,
9352 AMDGPUOperand::ImmTy Type) {
9353 using namespace llvm::AMDGPU::SDWA;
9354
9355 SMLoc S = getLoc();
9357
9358 SMLoc StringLoc;
9359 ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
9360 if (!Res.isSuccess())
9361 return Res;
9362
9363 int64_t Int;
9365 .Case("BYTE_0", SdwaSel::BYTE_0)
9366 .Case("BYTE_1", SdwaSel::BYTE_1)
9367 .Case("BYTE_2", SdwaSel::BYTE_2)
9368 .Case("BYTE_3", SdwaSel::BYTE_3)
9369 .Case("WORD_0", SdwaSel::WORD_0)
9370 .Case("WORD_1", SdwaSel::WORD_1)
9371 .Case("DWORD", SdwaSel::DWORD)
9372 .Default(0xffffffff);
9373
9374 if (Int == 0xffffffff)
9375 return Error(StringLoc, "invalid " + Twine(Prefix) + " value");
9376
9377 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
9378 return ParseStatus::Success;
9379}
9380
9381ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9382 using namespace llvm::AMDGPU::SDWA;
9383
9384 SMLoc S = getLoc();
9386
9387 SMLoc StringLoc;
9388 ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc);
9389 if (!Res.isSuccess())
9390 return Res;
9391
9392 int64_t Int;
9394 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
9395 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
9396 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
9397 .Default(0xffffffff);
9398
9399 if (Int == 0xffffffff)
9400 return Error(StringLoc, "invalid dst_unused value");
9401
9402 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused));
9403 return ParseStatus::Success;
9404}
9405
9406void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9407 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
9408}
9409
9410void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9411 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
9412}
9413
9414void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9415 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
9416}
9417
9418void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9419 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
9420}
9421
9422void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9423 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9424}
9425
9426void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9427 uint64_t BasicInstType,
9428 bool SkipDstVcc,
9429 bool SkipSrcVcc) {
9430 using namespace llvm::AMDGPU::SDWA;
9431
9432 OptionalImmIndexMap OptionalIdx;
9433 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9434 bool SkippedVcc = false;
9435
9436 unsigned I = 1;
9437 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9438 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9439 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9440 }
9441
9442 for (unsigned E = Operands.size(); I != E; ++I) {
9443 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9444 if (SkipVcc && !SkippedVcc && Op.isReg() &&
9445 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9446 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9447 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9448 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9449 // Skip VCC only if we didn't skip it on previous iteration.
9450 // Note that src0 and src1 occupy 2 slots each because of modifiers.
9451 if (BasicInstType == SIInstrFlags::VOP2 &&
9452 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9453 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9454 SkippedVcc = true;
9455 continue;
9456 } else if (BasicInstType == SIInstrFlags::VOPC &&
9457 Inst.getNumOperands() == 0) {
9458 SkippedVcc = true;
9459 continue;
9460 }
9461 }
9463 Op.addRegOrImmWithInputModsOperands(Inst, 2);
9464 } else if (Op.isImm()) {
9465 // Handle optional arguments
9466 OptionalIdx[Op.getImmTy()] = I;
9467 } else {
9468 llvm_unreachable("Invalid operand type");
9469 }
9470 SkippedVcc = false;
9471 }
9472
9473 const unsigned Opc = Inst.getOpcode();
9474 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9475 Opc != AMDGPU::V_NOP_sdwa_vi) {
9476 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9477 switch (BasicInstType) {
9478 case SIInstrFlags::VOP1:
9479 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9480 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9481 AMDGPUOperand::ImmTyClampSI, 0);
9482
9483 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9484 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9485 AMDGPUOperand::ImmTyOModSI, 0);
9486
9487 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
9488 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9489 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9490
9491 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
9492 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9493 AMDGPUOperand::ImmTySDWADstUnused,
9494 DstUnused::UNUSED_PRESERVE);
9495
9496 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9497 break;
9498
9499 case SIInstrFlags::VOP2:
9500 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9501
9502 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
9503 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9504
9505 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9506 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
9507 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9508 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9509 break;
9510
9511 case SIInstrFlags::VOPC:
9512 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
9513 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9514 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9515 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9516 break;
9517
9518 default:
9519 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9520 }
9521 }
9522
9523 // special case v_mac_{f16, f32}:
9524 // it has src2 register operand that is tied to dst operand
9525 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9526 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9527 auto it = Inst.begin();
9528 std::advance(
9529 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9530 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9531 }
9532}
9533
9534/// Force static initialization.
9538}
9539
9540#define GET_REGISTER_MATCHER
9541#define GET_MATCHER_IMPLEMENTATION
9542#define GET_MNEMONIC_SPELL_CHECKER
9543#define GET_MNEMONIC_CHECKER
9544#include "AMDGPUGenAsmMatcher.inc"
9545
9546ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9547 unsigned MCK) {
9548 switch (MCK) {
9549 case MCK_addr64:
9550 return parseTokenOp("addr64", Operands);
9551 case MCK_done:
9552 return parseTokenOp("done", Operands);
9553 case MCK_idxen:
9554 return parseTokenOp("idxen", Operands);
9555 case MCK_lds:
9556 return parseTokenOp("lds", Operands);
9557 case MCK_offen:
9558 return parseTokenOp("offen", Operands);
9559 case MCK_off:
9560 return parseTokenOp("off", Operands);
9561 case MCK_row_95_en:
9562 return parseTokenOp("row_en", Operands);
9563 case MCK_gds:
9564 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
9565 case MCK_tfe:
9566 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
9567 }
9568 return tryCustomParseOperand(Operands, MCK);
9569}
9570
9571// This function should be defined after auto-generated include so that we have
9572// MatchClassKind enum defined
9573unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9574 unsigned Kind) {
9575 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9576 // But MatchInstructionImpl() expects to meet token and fails to validate
9577 // operand. This method checks if we are given immediate operand but expect to
9578 // get corresponding token.
9579 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9580 switch (Kind) {
9581 case MCK_addr64:
9582 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9583 case MCK_gds:
9584 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9585 case MCK_lds:
9586 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9587 case MCK_idxen:
9588 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9589 case MCK_offen:
9590 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9591 case MCK_tfe:
9592 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9593 case MCK_SSrc_b32:
9594 // When operands have expression values, they will return true for isToken,
9595 // because it is not possible to distinguish between a token and an
9596 // expression at parse time. MatchInstructionImpl() will always try to
9597 // match an operand as a token, when isToken returns true, and when the
9598 // name of the expression is not a valid token, the match will fail,
9599 // so we need to handle it here.
9600 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9601 case MCK_SSrc_f32:
9602 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9603 case MCK_SOPPBrTarget:
9604 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9605 case MCK_VReg32OrOff:
9606 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9607 case MCK_InterpSlot:
9608 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9609 case MCK_InterpAttr:
9610 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9611 case MCK_InterpAttrChan:
9612 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9613 case MCK_SReg_64:
9614 case MCK_SReg_64_XEXEC:
9615 // Null is defined as a 32-bit register but
9616 // it should also be enabled with 64-bit operands.
9617 // The following code enables it for SReg_64 operands
9618 // used as source and destination. Remaining source
9619 // operands are handled in isInlinableImm.
9620 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9621 default:
9622 return Match_InvalidOperand;
9623 }
9624}
9625
9626//===----------------------------------------------------------------------===//
9627// endpgm
9628//===----------------------------------------------------------------------===//
9629
9630ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9631 SMLoc S = getLoc();
9632 int64_t Imm = 0;
9633
9634 if (!parseExpr(Imm)) {
9635 // The operand is optional, if not present default to 0
9636 Imm = 0;
9637 }
9638
9639 if (!isUInt<16>(Imm))
9640 return Error(S, "expected a 16-bit value");
9641
9642 Operands.push_back(
9643 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9644 return ParseStatus::Success;
9645}
9646
9647bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9648
9649//===----------------------------------------------------------------------===//
9650// Split Barrier
9651//===----------------------------------------------------------------------===//
9652
9653bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static unsigned getSpecialRegForName(StringRef RegName)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, unsigned OpName)
static bool IsRevOpcode(const unsigned Opcode)
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_READNONE
Definition: Compiler.h:220
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
@ Default
Definition: DwarfDebug.cpp:87
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
Definition: InlineInfo.cpp:180
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
LLVMContext & Context
#define P(N)
#define G_00B848_FWD_PROGRESS(x)
Definition: SIDefines.h:1154
#define G_00B848_MEM_ORDERED(x)
Definition: SIDefines.h:1151
#define G_00B848_IEEE_MODE(x)
Definition: SIDefines.h:1145
#define G_00B848_DX10_CLAMP(x)
Definition: SIDefines.h:1136
#define G_00B848_WGP_MODE(x)
Definition: SIDefines.h:1148
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
unsigned unsigned DefaultVal
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
static bool Enabled
Definition: Statistic.cpp:46
StringSet - A set-like wrapper for the StringMap.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
BinaryOperator * Mul
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
support::ulittle16_t & Hi
Definition: aarch32.cpp:205
static const AMDGPUVariadicMCExpr * create(VariadicKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5196
Class for arbitrary precision integers.
Definition: APInt.h:76
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmMacro.h:110
bool is(TokenKind K) const
Definition: MCAsmMacro.h:82
TokenKind getKind() const
Definition: MCAsmMacro.h:81
This class represents an Operation in the Expression.
Register getReg() const
Base class for user error types.
Definition: Error.h:352
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:474
Class representing an expression and its matching format.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:123
virtual MCStreamer & getStreamer()=0
Return the output streamer for the assembler.
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
Context object for machine code objects.
Definition: MCContext.h:81
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:455
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:201
const MCSubtargetInfo * getSubtargetInfo() const
Definition: MCContext.h:459
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
SMLoc getLoc() const
Definition: MCInst.h:204
void setLoc(SMLoc loc)
Definition: MCInst.h:203
unsigned getOpcode() const
Definition: MCInst.h:198
iterator insert(iterator I, const MCOperand &Op)
Definition: MCInst.h:224
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
iterator begin()
Definition: MCInst.h:219
size_t size() const
Definition: MCInst.h:218
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:444
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
void setImm(int64_t Val)
Definition: MCInst.h:85
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
void setReg(unsigned Reg)
Set the register number.
Definition: MCInst.h:75
int64_t getImm() const
Definition: MCInst.h:80
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
bool isImm() const
Definition: MCInst.h:62
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
bool isReg() const
Definition: MCInst.h:61
bool isExpr() const
Definition: MCInst.h:65
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual bool isReg() const =0
isReg - Is this a register operand?
virtual bool isMem() const =0
isMem - Is this a memory operand?
virtual MCRegister getReg() const =0
virtual bool isToken() const =0
isToken - Is this a token operand?
virtual bool isImm() const =0
isImm - Is this an immediate operand?
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
Streaming machine code generation interface.
Definition: MCStreamer.h:212
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Definition: MCStreamer.h:304
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:40
void setVariableValue(const MCExpr *Value)
Definition: MCSymbol.cpp:47
MCTargetAsmParser - Generic interface to target specific assembly parsers.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
const MCInstrInfo & MII
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn't/can't handle effec...
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
ParseInstruction - Parse one assembly instruction.
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
MatchAndEmitInstruction - Recognize a series of operands of a parsed instruction as an actual MCInst ...
Target specific streamer interface.
Definition: MCStreamer.h:93
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Represents a location in source code.
Definition: SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
constexpr const char * getPointer() const
Definition: SMLoc.h:34
constexpr bool isValid() const
Definition: SMLoc.h:29
Represents a range in source code.
Definition: SMLoc.h:48
SMLoc Start
Definition: SMLoc.h:50
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:290
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Register getReg() const
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:845
StringMapEntry - This is used to represent one value that is inserted into a StringMap.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition: StringRef.h:647
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:601
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:269
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
bool contains(StringRef key) const
Check if the set contains the given key.
Definition: StringSet.h:55
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:690
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
Key
PAL metadata keys.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned COMPONENTS[]
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool isGFX9(const MCSubtargetInfo &STI)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition: SIDefines.h:201
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:211
@ OPERAND_REG_INLINE_C_V2INT32
Definition: SIDefines.h:227
@ OPERAND_REG_INLINE_C_FP64
Definition: SIDefines.h:223
@ OPERAND_REG_INLINE_C_BF16
Definition: SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2BF16
Definition: SIDefines.h:225
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:212
@ OPERAND_REG_IMM_BF16
Definition: SIDefines.h:205
@ OPERAND_REG_INLINE_AC_V2FP16
Definition: SIDefines.h:246
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
Definition: SIDefines.h:200
@ OPERAND_REG_IMM_V2BF16
Definition: SIDefines.h:210
@ OPERAND_REG_IMM_BF16_DEFERRED
Definition: SIDefines.h:207
@ OPERAND_REG_IMM_FP16
Definition: SIDefines.h:206
@ OPERAND_REG_INLINE_C_INT64
Definition: SIDefines.h:219
@ OPERAND_REG_INLINE_AC_BF16
Definition: SIDefines.h:240
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition: SIDefines.h:217
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
Definition: SIDefines.h:238
@ OPERAND_REG_IMM_FP64
Definition: SIDefines.h:204
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:226
@ OPERAND_REG_INLINE_AC_V2INT16
Definition: SIDefines.h:244
@ OPERAND_REG_INLINE_AC_FP16
Definition: SIDefines.h:241
@ OPERAND_REG_INLINE_AC_INT32
Definition: SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition: SIDefines.h:242
@ OPERAND_REG_INLINE_AC_V2BF16
Definition: SIDefines.h:245
@ OPERAND_REG_IMM_V2INT32
Definition: SIDefines.h:213
@ OPERAND_REG_IMM_FP32
Definition: SIDefines.h:203
@ OPERAND_INPUT_MODS
Definition: SIDefines.h:251
@ OPERAND_REG_INLINE_C_FP32
Definition: SIDefines.h:222
@ OPERAND_REG_INLINE_C_INT32
Definition: SIDefines.h:218
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:224
@ OPERAND_REG_IMM_V2FP32
Definition: SIDefines.h:214
@ OPERAND_REG_INLINE_AC_FP64
Definition: SIDefines.h:243
@ OPERAND_REG_INLINE_C_FP16
Definition: SIDefines.h:221
@ OPERAND_REG_IMM_INT16
Definition: SIDefines.h:202
@ OPERAND_REG_INLINE_C_V2FP32
Definition: SIDefines.h:228
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition: SIDefines.h:231
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isVI(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1336
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:212
const uint64_t Version
Definition: InstrProf.h:1153
@ OPERAND_IMMEDIATE
Definition: MCInstrDesc.h:60
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Reg
All possible values of the reg field in the ModR/M byte.
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
Format
The format used for serializing/deserializing remarks.
Definition: RemarkFormat.h:25
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition: Error.h:1071
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition: Alignment.h:217
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:428
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:239
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
void PrintError(const Twine &Msg)
Definition: Error.cpp:101
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
Target & getTheR600Target()
The target for R600 GPUs.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:138
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:177
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:143
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:244
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
unsigned M0(unsigned Val)
Definition: VE.h:375
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
bool parseAmdKernelCodeField(StringRef ID, MCAsmParser &Parser, amd_kernel_code_t &C, raw_ostream &Err)
#define N
RegisterKind Kind
StringLiteral Name
AMD Kernel Code Object (amd_kernel_code_t).
Instruction set architecture version.
Definition: TargetParser.h:125
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
Represents the counter values to wait for in an s_waitcnt instruction.
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:249
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:247
static const fltSemantics & BFloat() LLVM_READNONE
Definition: APFloat.cpp:248
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:246
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...