LLVM 19.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
14#include "SIDefines.h"
15#include "SIInstrInfo.h"
16#include "SIRegisterInfo.h"
21#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/StringSet.h"
24#include "llvm/ADT/Twine.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCContext.h"
29#include "llvm/MC/MCExpr.h"
30#include "llvm/MC/MCInst.h"
31#include "llvm/MC/MCInstrDesc.h"
36#include "llvm/MC/MCSymbol.h"
43#include <optional>
44
45using namespace llvm;
46using namespace llvm::AMDGPU;
47using namespace llvm::amdhsa;
48
49namespace {
50
51class AMDGPUAsmParser;
52
53enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
54
55//===----------------------------------------------------------------------===//
56// Operand
57//===----------------------------------------------------------------------===//
58
59class AMDGPUOperand : public MCParsedAsmOperand {
60 enum KindTy {
61 Token,
62 Immediate,
65 } Kind;
66
67 SMLoc StartLoc, EndLoc;
68 const AMDGPUAsmParser *AsmParser;
69
70public:
71 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
72 : Kind(Kind_), AsmParser(AsmParser_) {}
73
74 using Ptr = std::unique_ptr<AMDGPUOperand>;
75
76 struct Modifiers {
77 bool Abs = false;
78 bool Neg = false;
79 bool Sext = false;
80 bool Lit = false;
81
82 bool hasFPModifiers() const { return Abs || Neg; }
83 bool hasIntModifiers() const { return Sext; }
84 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
85
86 int64_t getFPModifiersOperand() const {
87 int64_t Operand = 0;
88 Operand |= Abs ? SISrcMods::ABS : 0u;
89 Operand |= Neg ? SISrcMods::NEG : 0u;
90 return Operand;
91 }
92
93 int64_t getIntModifiersOperand() const {
94 int64_t Operand = 0;
95 Operand |= Sext ? SISrcMods::SEXT : 0u;
96 return Operand;
97 }
98
99 int64_t getModifiersOperand() const {
100 assert(!(hasFPModifiers() && hasIntModifiers())
101 && "fp and int modifiers should not be used simultaneously");
102 if (hasFPModifiers()) {
103 return getFPModifiersOperand();
104 } else if (hasIntModifiers()) {
105 return getIntModifiersOperand();
106 } else {
107 return 0;
108 }
109 }
110
111 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
112 };
113
114 enum ImmTy {
115 ImmTyNone,
116 ImmTyGDS,
117 ImmTyLDS,
118 ImmTyOffen,
119 ImmTyIdxen,
120 ImmTyAddr64,
121 ImmTyOffset,
122 ImmTyInstOffset,
123 ImmTyOffset0,
124 ImmTyOffset1,
125 ImmTySMEMOffsetMod,
126 ImmTyCPol,
127 ImmTyTFE,
128 ImmTyD16,
129 ImmTyClampSI,
130 ImmTyOModSI,
131 ImmTySDWADstSel,
132 ImmTySDWASrc0Sel,
133 ImmTySDWASrc1Sel,
134 ImmTySDWADstUnused,
135 ImmTyDMask,
136 ImmTyDim,
137 ImmTyUNorm,
138 ImmTyDA,
139 ImmTyR128A16,
140 ImmTyA16,
141 ImmTyLWE,
142 ImmTyExpTgt,
143 ImmTyExpCompr,
144 ImmTyExpVM,
145 ImmTyFORMAT,
146 ImmTyHwreg,
147 ImmTyOff,
148 ImmTySendMsg,
149 ImmTyInterpSlot,
150 ImmTyInterpAttr,
151 ImmTyInterpAttrChan,
152 ImmTyOpSel,
153 ImmTyOpSelHi,
154 ImmTyNegLo,
155 ImmTyNegHi,
156 ImmTyIndexKey8bit,
157 ImmTyIndexKey16bit,
158 ImmTyDPP8,
159 ImmTyDppCtrl,
160 ImmTyDppRowMask,
161 ImmTyDppBankMask,
162 ImmTyDppBoundCtrl,
163 ImmTyDppFI,
164 ImmTySwizzle,
165 ImmTyGprIdxMode,
166 ImmTyHigh,
167 ImmTyBLGP,
168 ImmTyCBSZ,
169 ImmTyABID,
170 ImmTyEndpgm,
171 ImmTyWaitVDST,
172 ImmTyWaitEXP,
173 ImmTyWaitVAVDst,
174 ImmTyWaitVMVSrc,
175 ImmTyByteSel,
176 };
177
178 // Immediate operand kind.
179 // It helps to identify the location of an offending operand after an error.
180 // Note that regular literals and mandatory literals (KImm) must be handled
181 // differently. When looking for an offending operand, we should usually
182 // ignore mandatory literals because they are part of the instruction and
183 // cannot be changed. Report location of mandatory operands only for VOPD,
184 // when both OpX and OpY have a KImm and there are no other literals.
185 enum ImmKindTy {
186 ImmKindTyNone,
187 ImmKindTyLiteral,
188 ImmKindTyMandatoryLiteral,
189 ImmKindTyConst,
190 };
191
192private:
193 struct TokOp {
194 const char *Data;
195 unsigned Length;
196 };
197
198 struct ImmOp {
199 int64_t Val;
200 ImmTy Type;
201 bool IsFPImm;
202 mutable ImmKindTy Kind;
203 Modifiers Mods;
204 };
205
206 struct RegOp {
207 unsigned RegNo;
208 Modifiers Mods;
209 };
210
211 union {
212 TokOp Tok;
213 ImmOp Imm;
214 RegOp Reg;
215 const MCExpr *Expr;
216 };
217
218public:
219 bool isToken() const override { return Kind == Token; }
220
221 bool isSymbolRefExpr() const {
222 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
223 }
224
225 bool isImm() const override {
226 return Kind == Immediate;
227 }
228
229 void setImmKindNone() const {
230 assert(isImm());
231 Imm.Kind = ImmKindTyNone;
232 }
233
234 void setImmKindLiteral() const {
235 assert(isImm());
236 Imm.Kind = ImmKindTyLiteral;
237 }
238
239 void setImmKindMandatoryLiteral() const {
240 assert(isImm());
241 Imm.Kind = ImmKindTyMandatoryLiteral;
242 }
243
244 void setImmKindConst() const {
245 assert(isImm());
246 Imm.Kind = ImmKindTyConst;
247 }
248
249 bool IsImmKindLiteral() const {
250 return isImm() && Imm.Kind == ImmKindTyLiteral;
251 }
252
253 bool IsImmKindMandatoryLiteral() const {
254 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
255 }
256
257 bool isImmKindConst() const {
258 return isImm() && Imm.Kind == ImmKindTyConst;
259 }
260
261 bool isInlinableImm(MVT type) const;
262 bool isLiteralImm(MVT type) const;
263
264 bool isRegKind() const {
265 return Kind == Register;
266 }
267
268 bool isReg() const override {
269 return isRegKind() && !hasModifiers();
270 }
271
272 bool isRegOrInline(unsigned RCID, MVT type) const {
273 return isRegClass(RCID) || isInlinableImm(type);
274 }
275
276 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
277 return isRegOrInline(RCID, type) || isLiteralImm(type);
278 }
279
280 bool isRegOrImmWithInt16InputMods() const {
281 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
282 }
283
284 bool isRegOrImmWithIntT16InputMods() const {
285 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
286 }
287
288 bool isRegOrImmWithInt32InputMods() const {
289 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
290 }
291
292 bool isRegOrInlineImmWithInt16InputMods() const {
293 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
294 }
295
296 bool isRegOrInlineImmWithInt32InputMods() const {
297 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
298 }
299
300 bool isRegOrImmWithInt64InputMods() const {
301 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
302 }
303
304 bool isRegOrImmWithFP16InputMods() const {
305 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
306 }
307
308 bool isRegOrImmWithFPT16InputMods() const {
309 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
310 }
311
312 bool isRegOrImmWithFP32InputMods() const {
313 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
314 }
315
316 bool isRegOrImmWithFP64InputMods() const {
317 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
318 }
319
320 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
321 return isRegOrInline(
322 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
323 }
324
325 bool isRegOrInlineImmWithFP32InputMods() const {
326 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
327 }
328
329 bool isPackedFP16InputMods() const {
330 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
331 }
332
333 bool isVReg() const {
334 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
335 isRegClass(AMDGPU::VReg_64RegClassID) ||
336 isRegClass(AMDGPU::VReg_96RegClassID) ||
337 isRegClass(AMDGPU::VReg_128RegClassID) ||
338 isRegClass(AMDGPU::VReg_160RegClassID) ||
339 isRegClass(AMDGPU::VReg_192RegClassID) ||
340 isRegClass(AMDGPU::VReg_256RegClassID) ||
341 isRegClass(AMDGPU::VReg_512RegClassID) ||
342 isRegClass(AMDGPU::VReg_1024RegClassID);
343 }
344
345 bool isVReg32() const {
346 return isRegClass(AMDGPU::VGPR_32RegClassID);
347 }
348
349 bool isVReg32OrOff() const {
350 return isOff() || isVReg32();
351 }
352
353 bool isNull() const {
354 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
355 }
356
357 bool isVRegWithInputMods() const;
358 template <bool IsFake16> bool isT16VRegWithInputMods() const;
359
360 bool isSDWAOperand(MVT type) const;
361 bool isSDWAFP16Operand() const;
362 bool isSDWAFP32Operand() const;
363 bool isSDWAInt16Operand() const;
364 bool isSDWAInt32Operand() const;
365
366 bool isImmTy(ImmTy ImmT) const {
367 return isImm() && Imm.Type == ImmT;
368 }
369
370 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
371
372 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
373
374 bool isImmModifier() const {
375 return isImm() && Imm.Type != ImmTyNone;
376 }
377
378 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
379 bool isDim() const { return isImmTy(ImmTyDim); }
380 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
381 bool isOff() const { return isImmTy(ImmTyOff); }
382 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
383 bool isOffen() const { return isImmTy(ImmTyOffen); }
384 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
385 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
386 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
387 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
388 bool isGDS() const { return isImmTy(ImmTyGDS); }
389 bool isLDS() const { return isImmTy(ImmTyLDS); }
390 bool isCPol() const { return isImmTy(ImmTyCPol); }
391 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
392 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
393 bool isTFE() const { return isImmTy(ImmTyTFE); }
394 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
395 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
396 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
397 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
398 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
399 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
400 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
401 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
402 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
403 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
404 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
405 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
406 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
407
408 bool isRegOrImm() const {
409 return isReg() || isImm();
410 }
411
412 bool isRegClass(unsigned RCID) const;
413
414 bool isInlineValue() const;
415
416 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
417 return isRegOrInline(RCID, type) && !hasModifiers();
418 }
419
420 bool isSCSrcB16() const {
421 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
422 }
423
424 bool isSCSrcV2B16() const {
425 return isSCSrcB16();
426 }
427
428 bool isSCSrc_b32() const {
429 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
430 }
431
432 bool isSCSrc_b64() const {
433 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
434 }
435
436 bool isBoolReg() const;
437
438 bool isSCSrcF16() const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
440 }
441
442 bool isSCSrcV2F16() const {
443 return isSCSrcF16();
444 }
445
446 bool isSCSrcF32() const {
447 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
448 }
449
450 bool isSCSrcF64() const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
452 }
453
454 bool isSSrc_b32() const {
455 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
456 }
457
458 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
459
460 bool isSSrcV2B16() const {
461 llvm_unreachable("cannot happen");
462 return isSSrc_b16();
463 }
464
465 bool isSSrc_b64() const {
466 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
467 // See isVSrc64().
468 return isSCSrc_b64() || isLiteralImm(MVT::i64);
469 }
470
471 bool isSSrc_f32() const {
472 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
473 }
474
475 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
476
477 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
478
479 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
480
481 bool isSSrcV2F16() const {
482 llvm_unreachable("cannot happen");
483 return isSSrc_f16();
484 }
485
486 bool isSSrcV2FP32() const {
487 llvm_unreachable("cannot happen");
488 return isSSrc_f32();
489 }
490
491 bool isSCSrcV2FP32() const {
492 llvm_unreachable("cannot happen");
493 return isSCSrcF32();
494 }
495
496 bool isSSrcV2INT32() const {
497 llvm_unreachable("cannot happen");
498 return isSSrc_b32();
499 }
500
501 bool isSCSrcV2INT32() const {
502 llvm_unreachable("cannot happen");
503 return isSCSrc_b32();
504 }
505
506 bool isSSrcOrLds_b32() const {
507 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
508 isLiteralImm(MVT::i32) || isExpr();
509 }
510
511 bool isVCSrc_b32() const {
512 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
513 }
514
515 bool isVCSrcB64() const {
516 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
517 }
518
519 bool isVCSrcTB16() const {
520 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
521 }
522
523 bool isVCSrcTB16_Lo128() const {
524 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
525 }
526
527 bool isVCSrcFake16B16_Lo128() const {
528 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
529 }
530
531 bool isVCSrc_b16() const {
532 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
533 }
534
535 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
536
537 bool isVCSrc_f32() const {
538 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
539 }
540
541 bool isVCSrcF64() const {
542 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
543 }
544
545 bool isVCSrcTBF16() const {
546 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
547 }
548
549 bool isVCSrcTF16() const {
550 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
551 }
552
553 bool isVCSrcTBF16_Lo128() const {
554 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
555 }
556
557 bool isVCSrcTF16_Lo128() const {
558 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
559 }
560
561 bool isVCSrcFake16BF16_Lo128() const {
562 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
563 }
564
565 bool isVCSrcFake16F16_Lo128() const {
566 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
567 }
568
569 bool isVCSrc_bf16() const {
570 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
571 }
572
573 bool isVCSrc_f16() const {
574 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
575 }
576
577 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
578
579 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
580
581 bool isVSrc_b32() const {
582 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
583 }
584
585 bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
586
587 bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
588
589 bool isVSrcT_b16_Lo128() const {
590 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
591 }
592
593 bool isVSrcFake16_b16_Lo128() const {
594 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
595 }
596
597 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
598
599 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
600
601 bool isVCSrcV2FP32() const {
602 return isVCSrcF64();
603 }
604
605 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
606
607 bool isVCSrcV2INT32() const {
608 return isVCSrcB64();
609 }
610
611 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
612
613 bool isVSrc_f32() const {
614 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
615 }
616
617 bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
618
619 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
620
621 bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
622
623 bool isVSrcT_bf16_Lo128() const {
624 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
625 }
626
627 bool isVSrcT_f16_Lo128() const {
628 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
629 }
630
631 bool isVSrcFake16_bf16_Lo128() const {
632 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
633 }
634
635 bool isVSrcFake16_f16_Lo128() const {
636 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
637 }
638
639 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
640
641 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
642
643 bool isVSrc_v2bf16() const {
644 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
645 }
646
647 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
648
649 bool isVISrcB32() const {
650 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
651 }
652
653 bool isVISrcB16() const {
654 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
655 }
656
657 bool isVISrcV2B16() const {
658 return isVISrcB16();
659 }
660
661 bool isVISrcF32() const {
662 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
663 }
664
665 bool isVISrcF16() const {
666 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
667 }
668
669 bool isVISrcV2F16() const {
670 return isVISrcF16() || isVISrcB32();
671 }
672
673 bool isVISrc_64_bf16() const {
674 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
675 }
676
677 bool isVISrc_64_f16() const {
678 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
679 }
680
681 bool isVISrc_64_b32() const {
682 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
683 }
684
685 bool isVISrc_64B64() const {
686 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
687 }
688
689 bool isVISrc_64_f64() const {
690 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
691 }
692
693 bool isVISrc_64V2FP32() const {
694 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
695 }
696
697 bool isVISrc_64V2INT32() const {
698 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
699 }
700
701 bool isVISrc_256_b32() const {
702 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
703 }
704
705 bool isVISrc_256_f32() const {
706 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
707 }
708
709 bool isVISrc_256B64() const {
710 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
711 }
712
713 bool isVISrc_256_f64() const {
714 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
715 }
716
717 bool isVISrc_128B16() const {
718 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
719 }
720
721 bool isVISrc_128V2B16() const {
722 return isVISrc_128B16();
723 }
724
725 bool isVISrc_128_b32() const {
726 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
727 }
728
729 bool isVISrc_128_f32() const {
730 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
731 }
732
733 bool isVISrc_256V2FP32() const {
734 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
735 }
736
737 bool isVISrc_256V2INT32() const {
738 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
739 }
740
741 bool isVISrc_512_b32() const {
742 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
743 }
744
745 bool isVISrc_512B16() const {
746 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
747 }
748
749 bool isVISrc_512V2B16() const {
750 return isVISrc_512B16();
751 }
752
753 bool isVISrc_512_f32() const {
754 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
755 }
756
757 bool isVISrc_512F16() const {
758 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
759 }
760
761 bool isVISrc_512V2F16() const {
762 return isVISrc_512F16() || isVISrc_512_b32();
763 }
764
765 bool isVISrc_1024_b32() const {
766 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
767 }
768
769 bool isVISrc_1024B16() const {
770 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
771 }
772
773 bool isVISrc_1024V2B16() const {
774 return isVISrc_1024B16();
775 }
776
777 bool isVISrc_1024_f32() const {
778 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
779 }
780
781 bool isVISrc_1024F16() const {
782 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
783 }
784
785 bool isVISrc_1024V2F16() const {
786 return isVISrc_1024F16() || isVISrc_1024_b32();
787 }
788
789 bool isAISrcB32() const {
790 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
791 }
792
793 bool isAISrcB16() const {
794 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
795 }
796
797 bool isAISrcV2B16() const {
798 return isAISrcB16();
799 }
800
801 bool isAISrcF32() const {
802 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
803 }
804
805 bool isAISrcF16() const {
806 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
807 }
808
809 bool isAISrcV2F16() const {
810 return isAISrcF16() || isAISrcB32();
811 }
812
813 bool isAISrc_64B64() const {
814 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
815 }
816
817 bool isAISrc_64_f64() const {
818 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
819 }
820
821 bool isAISrc_128_b32() const {
822 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
823 }
824
825 bool isAISrc_128B16() const {
826 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
827 }
828
829 bool isAISrc_128V2B16() const {
830 return isAISrc_128B16();
831 }
832
833 bool isAISrc_128_f32() const {
834 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
835 }
836
837 bool isAISrc_128F16() const {
838 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
839 }
840
841 bool isAISrc_128V2F16() const {
842 return isAISrc_128F16() || isAISrc_128_b32();
843 }
844
845 bool isVISrc_128_bf16() const {
846 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
847 }
848
849 bool isVISrc_128_f16() const {
850 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
851 }
852
853 bool isVISrc_128V2F16() const {
854 return isVISrc_128_f16() || isVISrc_128_b32();
855 }
856
857 bool isAISrc_256B64() const {
858 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
859 }
860
861 bool isAISrc_256_f64() const {
862 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
863 }
864
865 bool isAISrc_512_b32() const {
866 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
867 }
868
869 bool isAISrc_512B16() const {
870 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
871 }
872
873 bool isAISrc_512V2B16() const {
874 return isAISrc_512B16();
875 }
876
877 bool isAISrc_512_f32() const {
878 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
879 }
880
881 bool isAISrc_512F16() const {
882 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
883 }
884
885 bool isAISrc_512V2F16() const {
886 return isAISrc_512F16() || isAISrc_512_b32();
887 }
888
889 bool isAISrc_1024_b32() const {
890 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
891 }
892
893 bool isAISrc_1024B16() const {
894 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
895 }
896
897 bool isAISrc_1024V2B16() const {
898 return isAISrc_1024B16();
899 }
900
901 bool isAISrc_1024_f32() const {
902 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
903 }
904
905 bool isAISrc_1024F16() const {
906 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
907 }
908
909 bool isAISrc_1024V2F16() const {
910 return isAISrc_1024F16() || isAISrc_1024_b32();
911 }
912
913 bool isKImmFP32() const {
914 return isLiteralImm(MVT::f32);
915 }
916
917 bool isKImmFP16() const {
918 return isLiteralImm(MVT::f16);
919 }
920
921 bool isMem() const override {
922 return false;
923 }
924
925 bool isExpr() const {
926 return Kind == Expression;
927 }
928
929 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
930
931 bool isSWaitCnt() const;
932 bool isDepCtr() const;
933 bool isSDelayALU() const;
934 bool isHwreg() const;
935 bool isSendMsg() const;
936 bool isSplitBarrier() const;
937 bool isSwizzle() const;
938 bool isSMRDOffset8() const;
939 bool isSMEMOffset() const;
940 bool isSMRDLiteralOffset() const;
941 bool isDPP8() const;
942 bool isDPPCtrl() const;
943 bool isBLGP() const;
944 bool isGPRIdxMode() const;
945 bool isS16Imm() const;
946 bool isU16Imm() const;
947 bool isEndpgm() const;
948
949 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
950 return std::bind(P, *this);
951 }
952
953 StringRef getToken() const {
954 assert(isToken());
955 return StringRef(Tok.Data, Tok.Length);
956 }
957
958 int64_t getImm() const {
959 assert(isImm());
960 return Imm.Val;
961 }
962
963 void setImm(int64_t Val) {
964 assert(isImm());
965 Imm.Val = Val;
966 }
967
968 ImmTy getImmTy() const {
969 assert(isImm());
970 return Imm.Type;
971 }
972
973 MCRegister getReg() const override {
974 assert(isRegKind());
975 return Reg.RegNo;
976 }
977
978 SMLoc getStartLoc() const override {
979 return StartLoc;
980 }
981
982 SMLoc getEndLoc() const override {
983 return EndLoc;
984 }
985
986 SMRange getLocRange() const {
987 return SMRange(StartLoc, EndLoc);
988 }
989
990 Modifiers getModifiers() const {
991 assert(isRegKind() || isImmTy(ImmTyNone));
992 return isRegKind() ? Reg.Mods : Imm.Mods;
993 }
994
995 void setModifiers(Modifiers Mods) {
996 assert(isRegKind() || isImmTy(ImmTyNone));
997 if (isRegKind())
998 Reg.Mods = Mods;
999 else
1000 Imm.Mods = Mods;
1001 }
1002
1003 bool hasModifiers() const {
1004 return getModifiers().hasModifiers();
1005 }
1006
1007 bool hasFPModifiers() const {
1008 return getModifiers().hasFPModifiers();
1009 }
1010
1011 bool hasIntModifiers() const {
1012 return getModifiers().hasIntModifiers();
1013 }
1014
1015 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1016
1017 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1018
1019 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1020
1021 void addRegOperands(MCInst &Inst, unsigned N) const;
1022
1023 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1024 if (isRegKind())
1025 addRegOperands(Inst, N);
1026 else
1027 addImmOperands(Inst, N);
1028 }
1029
1030 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1031 Modifiers Mods = getModifiers();
1032 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1033 if (isRegKind()) {
1034 addRegOperands(Inst, N);
1035 } else {
1036 addImmOperands(Inst, N, false);
1037 }
1038 }
1039
1040 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1041 assert(!hasIntModifiers());
1042 addRegOrImmWithInputModsOperands(Inst, N);
1043 }
1044
1045 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1046 assert(!hasFPModifiers());
1047 addRegOrImmWithInputModsOperands(Inst, N);
1048 }
1049
1050 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1051 Modifiers Mods = getModifiers();
1052 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1053 assert(isRegKind());
1054 addRegOperands(Inst, N);
1055 }
1056
1057 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1058 assert(!hasIntModifiers());
1059 addRegWithInputModsOperands(Inst, N);
1060 }
1061
1062 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1063 assert(!hasFPModifiers());
1064 addRegWithInputModsOperands(Inst, N);
1065 }
1066
1067 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1068 // clang-format off
1069 switch (Type) {
1070 case ImmTyNone: OS << "None"; break;
1071 case ImmTyGDS: OS << "GDS"; break;
1072 case ImmTyLDS: OS << "LDS"; break;
1073 case ImmTyOffen: OS << "Offen"; break;
1074 case ImmTyIdxen: OS << "Idxen"; break;
1075 case ImmTyAddr64: OS << "Addr64"; break;
1076 case ImmTyOffset: OS << "Offset"; break;
1077 case ImmTyInstOffset: OS << "InstOffset"; break;
1078 case ImmTyOffset0: OS << "Offset0"; break;
1079 case ImmTyOffset1: OS << "Offset1"; break;
1080 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1081 case ImmTyCPol: OS << "CPol"; break;
1082 case ImmTyIndexKey8bit: OS << "index_key"; break;
1083 case ImmTyIndexKey16bit: OS << "index_key"; break;
1084 case ImmTyTFE: OS << "TFE"; break;
1085 case ImmTyD16: OS << "D16"; break;
1086 case ImmTyFORMAT: OS << "FORMAT"; break;
1087 case ImmTyClampSI: OS << "ClampSI"; break;
1088 case ImmTyOModSI: OS << "OModSI"; break;
1089 case ImmTyDPP8: OS << "DPP8"; break;
1090 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1091 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1092 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1093 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1094 case ImmTyDppFI: OS << "DppFI"; break;
1095 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1096 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1097 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1098 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1099 case ImmTyDMask: OS << "DMask"; break;
1100 case ImmTyDim: OS << "Dim"; break;
1101 case ImmTyUNorm: OS << "UNorm"; break;
1102 case ImmTyDA: OS << "DA"; break;
1103 case ImmTyR128A16: OS << "R128A16"; break;
1104 case ImmTyA16: OS << "A16"; break;
1105 case ImmTyLWE: OS << "LWE"; break;
1106 case ImmTyOff: OS << "Off"; break;
1107 case ImmTyExpTgt: OS << "ExpTgt"; break;
1108 case ImmTyExpCompr: OS << "ExpCompr"; break;
1109 case ImmTyExpVM: OS << "ExpVM"; break;
1110 case ImmTyHwreg: OS << "Hwreg"; break;
1111 case ImmTySendMsg: OS << "SendMsg"; break;
1112 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1113 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1114 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1115 case ImmTyOpSel: OS << "OpSel"; break;
1116 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1117 case ImmTyNegLo: OS << "NegLo"; break;
1118 case ImmTyNegHi: OS << "NegHi"; break;
1119 case ImmTySwizzle: OS << "Swizzle"; break;
1120 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1121 case ImmTyHigh: OS << "High"; break;
1122 case ImmTyBLGP: OS << "BLGP"; break;
1123 case ImmTyCBSZ: OS << "CBSZ"; break;
1124 case ImmTyABID: OS << "ABID"; break;
1125 case ImmTyEndpgm: OS << "Endpgm"; break;
1126 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1127 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1128 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1129 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1130 case ImmTyByteSel: OS << "ByteSel" ; break;
1131 }
1132 // clang-format on
1133 }
1134
1135 void print(raw_ostream &OS) const override {
1136 switch (Kind) {
1137 case Register:
1138 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1139 break;
1140 case Immediate:
1141 OS << '<' << getImm();
1142 if (getImmTy() != ImmTyNone) {
1143 OS << " type: "; printImmTy(OS, getImmTy());
1144 }
1145 OS << " mods: " << Imm.Mods << '>';
1146 break;
1147 case Token:
1148 OS << '\'' << getToken() << '\'';
1149 break;
1150 case Expression:
1151 OS << "<expr " << *Expr << '>';
1152 break;
1153 }
1154 }
1155
1156 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1157 int64_t Val, SMLoc Loc,
1158 ImmTy Type = ImmTyNone,
1159 bool IsFPImm = false) {
1160 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1161 Op->Imm.Val = Val;
1162 Op->Imm.IsFPImm = IsFPImm;
1163 Op->Imm.Kind = ImmKindTyNone;
1164 Op->Imm.Type = Type;
1165 Op->Imm.Mods = Modifiers();
1166 Op->StartLoc = Loc;
1167 Op->EndLoc = Loc;
1168 return Op;
1169 }
1170
1171 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1172 StringRef Str, SMLoc Loc,
1173 bool HasExplicitEncodingSize = true) {
1174 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1175 Res->Tok.Data = Str.data();
1176 Res->Tok.Length = Str.size();
1177 Res->StartLoc = Loc;
1178 Res->EndLoc = Loc;
1179 return Res;
1180 }
1181
1182 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1183 unsigned RegNo, SMLoc S,
1184 SMLoc E) {
1185 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1186 Op->Reg.RegNo = RegNo;
1187 Op->Reg.Mods = Modifiers();
1188 Op->StartLoc = S;
1189 Op->EndLoc = E;
1190 return Op;
1191 }
1192
1193 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1194 const class MCExpr *Expr, SMLoc S) {
1195 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1196 Op->Expr = Expr;
1197 Op->StartLoc = S;
1198 Op->EndLoc = S;
1199 return Op;
1200 }
1201};
1202
1203raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1204 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1205 return OS;
1206}
1207
1208//===----------------------------------------------------------------------===//
1209// AsmParser
1210//===----------------------------------------------------------------------===//
1211
1212// Holds info related to the current kernel, e.g. count of SGPRs used.
1213// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1214// .amdgpu_hsa_kernel or at EOF.
1215class KernelScopeInfo {
1216 int SgprIndexUnusedMin = -1;
1217 int VgprIndexUnusedMin = -1;
1218 int AgprIndexUnusedMin = -1;
1219 MCContext *Ctx = nullptr;
1220 MCSubtargetInfo const *MSTI = nullptr;
1221
1222 void usesSgprAt(int i) {
1223 if (i >= SgprIndexUnusedMin) {
1224 SgprIndexUnusedMin = ++i;
1225 if (Ctx) {
1226 MCSymbol* const Sym =
1227 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1228 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1229 }
1230 }
1231 }
1232
1233 void usesVgprAt(int i) {
1234 if (i >= VgprIndexUnusedMin) {
1235 VgprIndexUnusedMin = ++i;
1236 if (Ctx) {
1237 MCSymbol* const Sym =
1238 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1239 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1240 VgprIndexUnusedMin);
1241 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1242 }
1243 }
1244 }
1245
1246 void usesAgprAt(int i) {
1247 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1248 if (!hasMAIInsts(*MSTI))
1249 return;
1250
1251 if (i >= AgprIndexUnusedMin) {
1252 AgprIndexUnusedMin = ++i;
1253 if (Ctx) {
1254 MCSymbol* const Sym =
1255 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1256 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1257
1258 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1259 MCSymbol* const vSym =
1260 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1261 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1262 VgprIndexUnusedMin);
1263 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1264 }
1265 }
1266 }
1267
1268public:
1269 KernelScopeInfo() = default;
1270
1271 void initialize(MCContext &Context) {
1272 Ctx = &Context;
1273 MSTI = Ctx->getSubtargetInfo();
1274
1275 usesSgprAt(SgprIndexUnusedMin = -1);
1276 usesVgprAt(VgprIndexUnusedMin = -1);
1277 if (hasMAIInsts(*MSTI)) {
1278 usesAgprAt(AgprIndexUnusedMin = -1);
1279 }
1280 }
1281
1282 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1283 unsigned RegWidth) {
1284 switch (RegKind) {
1285 case IS_SGPR:
1286 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1287 break;
1288 case IS_AGPR:
1289 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1290 break;
1291 case IS_VGPR:
1292 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1293 break;
1294 default:
1295 break;
1296 }
1297 }
1298};
1299
1300class AMDGPUAsmParser : public MCTargetAsmParser {
1301 MCAsmParser &Parser;
1302
1303 unsigned ForcedEncodingSize = 0;
1304 bool ForcedDPP = false;
1305 bool ForcedSDWA = false;
1306 KernelScopeInfo KernelScope;
1307
1308 /// @name Auto-generated Match Functions
1309 /// {
1310
1311#define GET_ASSEMBLER_HEADER
1312#include "AMDGPUGenAsmMatcher.inc"
1313
1314 /// }
1315
1316private:
1317 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1318 bool OutOfRangeError(SMRange Range);
1319 /// Calculate VGPR/SGPR blocks required for given target, reserved
1320 /// registers, and user-specified NextFreeXGPR values.
1321 ///
1322 /// \param Features [in] Target features, used for bug corrections.
1323 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1324 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1325 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1326 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1327 /// descriptor field, if valid.
1328 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1329 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1330 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1331 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1332 /// \param VGPRBlocks [out] Result VGPR block count.
1333 /// \param SGPRBlocks [out] Result SGPR block count.
1334 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1335 bool FlatScrUsed, bool XNACKUsed,
1336 std::optional<bool> EnableWavefrontSize32,
1337 unsigned NextFreeVGPR, SMRange VGPRRange,
1338 unsigned NextFreeSGPR, SMRange SGPRRange,
1339 unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1340 bool ParseDirectiveAMDGCNTarget();
1341 bool ParseDirectiveAMDHSACodeObjectVersion();
1342 bool ParseDirectiveAMDHSAKernel();
1343 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1344 bool ParseDirectiveAMDKernelCodeT();
1345 // TODO: Possibly make subtargetHasRegister const.
1346 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1347 bool ParseDirectiveAMDGPUHsaKernel();
1348
1349 bool ParseDirectiveISAVersion();
1350 bool ParseDirectiveHSAMetadata();
1351 bool ParseDirectivePALMetadataBegin();
1352 bool ParseDirectivePALMetadata();
1353 bool ParseDirectiveAMDGPULDS();
1354
1355 /// Common code to parse out a block of text (typically YAML) between start and
1356 /// end directives.
1357 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1358 const char *AssemblerDirectiveEnd,
1359 std::string &CollectString);
1360
1361 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1362 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1363 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1364 unsigned &RegNum, unsigned &RegWidth,
1365 bool RestoreOnFailure = false);
1366 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1367 unsigned &RegNum, unsigned &RegWidth,
1369 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1370 unsigned &RegWidth,
1372 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1373 unsigned &RegWidth,
1375 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1376 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1377 bool ParseRegRange(unsigned& Num, unsigned& Width);
1378 unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
1379 unsigned RegWidth, SMLoc Loc);
1380
1381 bool isRegister();
1382 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1383 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1384 void initializeGprCountSymbol(RegisterKind RegKind);
1385 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1386 unsigned RegWidth);
1387 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1388 bool IsAtomic);
1389
1390public:
1391 enum OperandMode {
1392 OperandMode_Default,
1393 OperandMode_NSA,
1394 };
1395
1396 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1397
1398 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1399 const MCInstrInfo &MII,
1400 const MCTargetOptions &Options)
1401 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1403
1404 if (getFeatureBits().none()) {
1405 // Set default features.
1406 copySTI().ToggleFeature("southern-islands");
1407 }
1408
1409 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1410
1411 {
1412 // TODO: make those pre-defined variables read-only.
1413 // Currently there is none suitable machinery in the core llvm-mc for this.
1414 // MCSymbol::isRedefinable is intended for another purpose, and
1415 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1417 MCContext &Ctx = getContext();
1418 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1419 MCSymbol *Sym =
1420 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1421 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1422 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1423 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1424 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1425 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1426 } else {
1427 MCSymbol *Sym =
1428 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1429 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1430 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1431 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1432 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1433 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1434 }
1435 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1436 initializeGprCountSymbol(IS_VGPR);
1437 initializeGprCountSymbol(IS_SGPR);
1438 } else
1439 KernelScope.initialize(getContext());
1440 }
1441 }
1442
1443 bool hasMIMG_R128() const {
1444 return AMDGPU::hasMIMG_R128(getSTI());
1445 }
1446
1447 bool hasPackedD16() const {
1448 return AMDGPU::hasPackedD16(getSTI());
1449 }
1450
1451 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1452
1453 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1454
1455 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1456
1457 bool isSI() const {
1458 return AMDGPU::isSI(getSTI());
1459 }
1460
1461 bool isCI() const {
1462 return AMDGPU::isCI(getSTI());
1463 }
1464
1465 bool isVI() const {
1466 return AMDGPU::isVI(getSTI());
1467 }
1468
1469 bool isGFX9() const {
1470 return AMDGPU::isGFX9(getSTI());
1471 }
1472
1473 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1474 bool isGFX90A() const {
1475 return AMDGPU::isGFX90A(getSTI());
1476 }
1477
1478 bool isGFX940() const {
1479 return AMDGPU::isGFX940(getSTI());
1480 }
1481
1482 bool isGFX9Plus() const {
1483 return AMDGPU::isGFX9Plus(getSTI());
1484 }
1485
1486 bool isGFX10() const {
1487 return AMDGPU::isGFX10(getSTI());
1488 }
1489
1490 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1491
1492 bool isGFX11() const {
1493 return AMDGPU::isGFX11(getSTI());
1494 }
1495
1496 bool isGFX11Plus() const {
1497 return AMDGPU::isGFX11Plus(getSTI());
1498 }
1499
1500 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1501
1502 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1503
1504 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1505
1506 bool isGFX10_BEncoding() const {
1508 }
1509
1510 bool hasInv2PiInlineImm() const {
1511 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1512 }
1513
1514 bool hasFlatOffsets() const {
1515 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1516 }
1517
1518 bool hasArchitectedFlatScratch() const {
1519 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1520 }
1521
1522 bool hasSGPR102_SGPR103() const {
1523 return !isVI() && !isGFX9();
1524 }
1525
1526 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1527
1528 bool hasIntClamp() const {
1529 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1530 }
1531
1532 bool hasPartialNSAEncoding() const {
1533 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1534 }
1535
1536 unsigned getNSAMaxSize(bool HasSampler = false) const {
1537 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1538 }
1539
1540 unsigned getMaxNumUserSGPRs() const {
1542 }
1543
1544 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1545
1546 AMDGPUTargetStreamer &getTargetStreamer() {
1548 return static_cast<AMDGPUTargetStreamer &>(TS);
1549 }
1550
1551 const MCRegisterInfo *getMRI() const {
1552 // We need this const_cast because for some reason getContext() is not const
1553 // in MCAsmParser.
1554 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1555 }
1556
1557 const MCInstrInfo *getMII() const {
1558 return &MII;
1559 }
1560
1561 const FeatureBitset &getFeatureBits() const {
1562 return getSTI().getFeatureBits();
1563 }
1564
1565 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1566 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1567 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1568
1569 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1570 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1571 bool isForcedDPP() const { return ForcedDPP; }
1572 bool isForcedSDWA() const { return ForcedSDWA; }
1573 ArrayRef<unsigned> getMatchedVariants() const;
1574 StringRef getMatchedVariantName() const;
1575
1576 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1577 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1578 bool RestoreOnFailure);
1579 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1581 SMLoc &EndLoc) override;
1582 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1584 unsigned Kind) override;
1585 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1588 bool MatchingInlineAsm) override;
1589 bool ParseDirective(AsmToken DirectiveID) override;
1590 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1591 OperandMode Mode = OperandMode_Default);
1592 StringRef parseMnemonicSuffix(StringRef Name);
1594 SMLoc NameLoc, OperandVector &Operands) override;
1595 //bool ProcessInstruction(MCInst &Inst);
1596
1598
1599 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1600
1602 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1603 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1604 std::function<bool(int64_t &)> ConvertResult = nullptr);
1605
1606 ParseStatus parseOperandArrayWithPrefix(
1607 const char *Prefix, OperandVector &Operands,
1608 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1609 bool (*ConvertResult)(int64_t &) = nullptr);
1610
1612 parseNamedBit(StringRef Name, OperandVector &Operands,
1613 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1614 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1616 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1617 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1618 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1619 SMLoc &StringLoc);
1620
1621 bool isModifier();
1622 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1623 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1624 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1625 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1626 bool parseSP3NegModifier();
1627 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1628 bool HasLit = false);
1630 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1631 bool HasLit = false);
1632 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1633 bool AllowImm = true);
1634 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1635 bool AllowImm = true);
1636 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1637 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1638 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1639 ParseStatus tryParseIndexKey(OperandVector &Operands,
1640 AMDGPUOperand::ImmTy ImmTy);
1641 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1642 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1643
1644 ParseStatus parseDfmtNfmt(int64_t &Format);
1645 ParseStatus parseUfmt(int64_t &Format);
1646 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1647 int64_t &Format);
1648 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1649 int64_t &Format);
1650 ParseStatus parseFORMAT(OperandVector &Operands);
1651 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1652 ParseStatus parseNumericFormat(int64_t &Format);
1653 ParseStatus parseFlatOffset(OperandVector &Operands);
1654 ParseStatus parseR128A16(OperandVector &Operands);
1656 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1657 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1658
1659 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1660
1661 bool parseCnt(int64_t &IntVal);
1662 ParseStatus parseSWaitCnt(OperandVector &Operands);
1663
1664 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1665 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1666 ParseStatus parseDepCtr(OperandVector &Operands);
1667
1668 bool parseDelay(int64_t &Delay);
1669 ParseStatus parseSDelayALU(OperandVector &Operands);
1670
1671 ParseStatus parseHwreg(OperandVector &Operands);
1672
1673private:
1674 struct OperandInfoTy {
1675 SMLoc Loc;
1676 int64_t Val;
1677 bool IsSymbolic = false;
1678 bool IsDefined = false;
1679
1680 OperandInfoTy(int64_t Val) : Val(Val) {}
1681 };
1682
1683 struct StructuredOpField : OperandInfoTy {
1686 unsigned Width;
1687 bool IsDefined = false;
1688
1689 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1690 int64_t Default)
1691 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1692 virtual ~StructuredOpField() = default;
1693
1694 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1695 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1696 return false;
1697 }
1698
1699 virtual bool validate(AMDGPUAsmParser &Parser) const {
1700 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1701 return Error(Parser, "not supported on this GPU");
1702 if (!isUIntN(Width, Val))
1703 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1704 return true;
1705 }
1706 };
1707
1708 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1709 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1710
1711 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1712 bool validateSendMsg(const OperandInfoTy &Msg,
1713 const OperandInfoTy &Op,
1714 const OperandInfoTy &Stream);
1715
1716 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1717 OperandInfoTy &Width);
1718
1719 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1720 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1721 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1722
1723 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1724 const OperandVector &Operands) const;
1725 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1726 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1727 SMLoc getLitLoc(const OperandVector &Operands,
1728 bool SearchMandatoryLiterals = false) const;
1729 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1730 SMLoc getConstLoc(const OperandVector &Operands) const;
1731 SMLoc getInstLoc(const OperandVector &Operands) const;
1732
1733 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1734 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1735 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1736 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1737 bool validateSOPLiteral(const MCInst &Inst) const;
1738 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1739 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1740 const OperandVector &Operands);
1741 bool validateIntClampSupported(const MCInst &Inst);
1742 bool validateMIMGAtomicDMask(const MCInst &Inst);
1743 bool validateMIMGGatherDMask(const MCInst &Inst);
1744 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1745 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1746 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1747 bool validateMIMGD16(const MCInst &Inst);
1748 bool validateMIMGMSAA(const MCInst &Inst);
1749 bool validateOpSel(const MCInst &Inst);
1750 bool validateNeg(const MCInst &Inst, int OpName);
1751 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1752 bool validateVccOperand(unsigned Reg) const;
1753 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1754 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1755 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1756 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1757 bool validateAGPRLdSt(const MCInst &Inst) const;
1758 bool validateVGPRAlign(const MCInst &Inst) const;
1759 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1760 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1761 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1762 bool validateDivScale(const MCInst &Inst);
1763 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1764 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1765 const SMLoc &IDLoc);
1766 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1767 const unsigned CPol);
1768 bool validateExeczVcczOperands(const OperandVector &Operands);
1769 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1770 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1771 unsigned getConstantBusLimit(unsigned Opcode) const;
1772 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1773 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1774 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1775
1776 bool isSupportedMnemo(StringRef Mnemo,
1777 const FeatureBitset &FBS);
1778 bool isSupportedMnemo(StringRef Mnemo,
1779 const FeatureBitset &FBS,
1780 ArrayRef<unsigned> Variants);
1781 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1782
1783 bool isId(const StringRef Id) const;
1784 bool isId(const AsmToken &Token, const StringRef Id) const;
1785 bool isToken(const AsmToken::TokenKind Kind) const;
1786 StringRef getId() const;
1787 bool trySkipId(const StringRef Id);
1788 bool trySkipId(const StringRef Pref, const StringRef Id);
1789 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1790 bool trySkipToken(const AsmToken::TokenKind Kind);
1791 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1792 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1793 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1794
1795 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1796 AsmToken::TokenKind getTokenKind() const;
1797 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1798 bool parseExpr(OperandVector &Operands);
1799 StringRef getTokenStr() const;
1800 AsmToken peekToken(bool ShouldSkipSpace = true);
1801 AsmToken getToken() const;
1802 SMLoc getLoc() const;
1803 void lex();
1804
1805public:
1806 void onBeginOfFile() override;
1807 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1808
1809 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1810
1811 ParseStatus parseExpTgt(OperandVector &Operands);
1812 ParseStatus parseSendMsg(OperandVector &Operands);
1813 ParseStatus parseInterpSlot(OperandVector &Operands);
1814 ParseStatus parseInterpAttr(OperandVector &Operands);
1815 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1816 ParseStatus parseBoolReg(OperandVector &Operands);
1817
1818 bool parseSwizzleOperand(int64_t &Op,
1819 const unsigned MinVal,
1820 const unsigned MaxVal,
1821 const StringRef ErrMsg,
1822 SMLoc &Loc);
1823 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1824 const unsigned MinVal,
1825 const unsigned MaxVal,
1826 const StringRef ErrMsg);
1827 ParseStatus parseSwizzle(OperandVector &Operands);
1828 bool parseSwizzleOffset(int64_t &Imm);
1829 bool parseSwizzleMacro(int64_t &Imm);
1830 bool parseSwizzleQuadPerm(int64_t &Imm);
1831 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1832 bool parseSwizzleBroadcast(int64_t &Imm);
1833 bool parseSwizzleSwap(int64_t &Imm);
1834 bool parseSwizzleReverse(int64_t &Imm);
1835
1836 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1837 int64_t parseGPRIdxMacro();
1838
1839 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1840 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1841
1842 ParseStatus parseOModSI(OperandVector &Operands);
1843
1844 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1845 OptionalImmIndexMap &OptionalIdx);
1846 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1847 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1848 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1849 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1850
1851 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1852 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1853 OptionalImmIndexMap &OptionalIdx);
1854 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1855 OptionalImmIndexMap &OptionalIdx);
1856
1857 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1858 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1859
1860 bool parseDimId(unsigned &Encoding);
1862 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1864 ParseStatus parseDPPCtrl(OperandVector &Operands);
1865 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1866 int64_t parseDPPCtrlSel(StringRef Ctrl);
1867 int64_t parseDPPCtrlPerm();
1868 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1869 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1870 cvtDPP(Inst, Operands, true);
1871 }
1872 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1873 bool IsDPP8 = false);
1874 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1875 cvtVOP3DPP(Inst, Operands, true);
1876 }
1877
1878 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1879 AMDGPUOperand::ImmTy Type);
1880 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1881 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1882 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1883 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1884 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1885 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1886 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1887 uint64_t BasicInstType,
1888 bool SkipDstVcc = false,
1889 bool SkipSrcVcc = false);
1890
1891 ParseStatus parseEndpgm(OperandVector &Operands);
1892
1894};
1895
1896} // end anonymous namespace
1897
1898// May be called with integer type with equivalent bitwidth.
1899static const fltSemantics *getFltSemantics(unsigned Size) {
1900 switch (Size) {
1901 case 4:
1902 return &APFloat::IEEEsingle();
1903 case 8:
1904 return &APFloat::IEEEdouble();
1905 case 2:
1906 return &APFloat::IEEEhalf();
1907 default:
1908 llvm_unreachable("unsupported fp type");
1909 }
1910}
1911
1913 return getFltSemantics(VT.getSizeInBits() / 8);
1914}
1915
1917 switch (OperandType) {
1918 // When floating-point immediate is used as operand of type i16, the 32-bit
1919 // representation of the constant truncated to the 16 LSBs should be used.
1939 return &APFloat::IEEEsingle();
1945 return &APFloat::IEEEdouble();
1954 return &APFloat::IEEEhalf();
1962 return &APFloat::BFloat();
1963 default:
1964 llvm_unreachable("unsupported fp type");
1965 }
1966}
1967
1968//===----------------------------------------------------------------------===//
1969// Operand
1970//===----------------------------------------------------------------------===//
1971
1972static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1973 bool Lost;
1974
1975 // Convert literal to single precision
1977 APFloat::rmNearestTiesToEven,
1978 &Lost);
1979 // We allow precision lost but not overflow or underflow
1980 if (Status != APFloat::opOK &&
1981 Lost &&
1982 ((Status & APFloat::opOverflow) != 0 ||
1983 (Status & APFloat::opUnderflow) != 0)) {
1984 return false;
1985 }
1986
1987 return true;
1988}
1989
1990static bool isSafeTruncation(int64_t Val, unsigned Size) {
1991 return isUIntN(Size, Val) || isIntN(Size, Val);
1992}
1993
1994static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1995 if (VT.getScalarType() == MVT::i16)
1996 return isInlinableLiteral32(Val, HasInv2Pi);
1997
1998 if (VT.getScalarType() == MVT::f16)
1999 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2000
2001 assert(VT.getScalarType() == MVT::bf16);
2002
2003 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2004}
2005
2006bool AMDGPUOperand::isInlinableImm(MVT type) const {
2007
2008 // This is a hack to enable named inline values like
2009 // shared_base with both 32-bit and 64-bit operands.
2010 // Note that these values are defined as
2011 // 32-bit operands only.
2012 if (isInlineValue()) {
2013 return true;
2014 }
2015
2016 if (!isImmTy(ImmTyNone)) {
2017 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2018 return false;
2019 }
2020 // TODO: We should avoid using host float here. It would be better to
2021 // check the float bit values which is what a few other places do.
2022 // We've had bot failures before due to weird NaN support on mips hosts.
2023
2024 APInt Literal(64, Imm.Val);
2025
2026 if (Imm.IsFPImm) { // We got fp literal token
2027 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2029 AsmParser->hasInv2PiInlineImm());
2030 }
2031
2032 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2033 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2034 return false;
2035
2036 if (type.getScalarSizeInBits() == 16) {
2037 bool Lost = false;
2038 switch (type.getScalarType().SimpleTy) {
2039 default:
2040 llvm_unreachable("unknown 16-bit type");
2041 case MVT::bf16:
2042 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2043 &Lost);
2044 break;
2045 case MVT::f16:
2046 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2047 &Lost);
2048 break;
2049 case MVT::i16:
2050 FPLiteral.convert(APFloatBase::IEEEsingle(),
2051 APFloat::rmNearestTiesToEven, &Lost);
2052 break;
2053 }
2054 // We need to use 32-bit representation here because when a floating-point
2055 // inline constant is used as an i16 operand, its 32-bit representation
2056 // representation will be used. We will need the 32-bit value to check if
2057 // it is FP inline constant.
2058 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2059 return isInlineableLiteralOp16(ImmVal, type,
2060 AsmParser->hasInv2PiInlineImm());
2061 }
2062
2063 // Check if single precision literal is inlinable
2065 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2066 AsmParser->hasInv2PiInlineImm());
2067 }
2068
2069 // We got int literal token.
2070 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2072 AsmParser->hasInv2PiInlineImm());
2073 }
2074
2075 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2076 return false;
2077 }
2078
2079 if (type.getScalarSizeInBits() == 16) {
2081 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2082 type, AsmParser->hasInv2PiInlineImm());
2083 }
2084
2086 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2087 AsmParser->hasInv2PiInlineImm());
2088}
2089
2090bool AMDGPUOperand::isLiteralImm(MVT type) const {
2091 // Check that this immediate can be added as literal
2092 if (!isImmTy(ImmTyNone)) {
2093 return false;
2094 }
2095
2096 if (!Imm.IsFPImm) {
2097 // We got int literal token.
2098
2099 if (type == MVT::f64 && hasFPModifiers()) {
2100 // Cannot apply fp modifiers to int literals preserving the same semantics
2101 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2102 // disable these cases.
2103 return false;
2104 }
2105
2106 unsigned Size = type.getSizeInBits();
2107 if (Size == 64)
2108 Size = 32;
2109
2110 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2111 // types.
2112 return isSafeTruncation(Imm.Val, Size);
2113 }
2114
2115 // We got fp literal token
2116 if (type == MVT::f64) { // Expected 64-bit fp operand
2117 // We would set low 64-bits of literal to zeroes but we accept this literals
2118 return true;
2119 }
2120
2121 if (type == MVT::i64) { // Expected 64-bit int operand
2122 // We don't allow fp literals in 64-bit integer instructions. It is
2123 // unclear how we should encode them.
2124 return false;
2125 }
2126
2127 // We allow fp literals with f16x2 operands assuming that the specified
2128 // literal goes into the lower half and the upper half is zero. We also
2129 // require that the literal may be losslessly converted to f16.
2130 //
2131 // For i16x2 operands, we assume that the specified literal is encoded as a
2132 // single-precision float. This is pretty odd, but it matches SP3 and what
2133 // happens in hardware.
2134 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2135 : (type == MVT::v2i16) ? MVT::f32
2136 : (type == MVT::v2f32) ? MVT::f32
2137 : type;
2138
2139 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2140 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2141}
2142
2143bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2144 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2145}
2146
2147bool AMDGPUOperand::isVRegWithInputMods() const {
2148 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2149 // GFX90A allows DPP on 64-bit operands.
2150 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2151 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2152}
2153
2154template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2155 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2156 : AMDGPU::VGPR_16_Lo128RegClassID);
2157}
2158
2159bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2160 if (AsmParser->isVI())
2161 return isVReg32();
2162 else if (AsmParser->isGFX9Plus())
2163 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2164 else
2165 return false;
2166}
2167
2168bool AMDGPUOperand::isSDWAFP16Operand() const {
2169 return isSDWAOperand(MVT::f16);
2170}
2171
2172bool AMDGPUOperand::isSDWAFP32Operand() const {
2173 return isSDWAOperand(MVT::f32);
2174}
2175
2176bool AMDGPUOperand::isSDWAInt16Operand() const {
2177 return isSDWAOperand(MVT::i16);
2178}
2179
2180bool AMDGPUOperand::isSDWAInt32Operand() const {
2181 return isSDWAOperand(MVT::i32);
2182}
2183
2184bool AMDGPUOperand::isBoolReg() const {
2185 auto FB = AsmParser->getFeatureBits();
2186 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2187 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2188}
2189
2190uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2191{
2192 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2193 assert(Size == 2 || Size == 4 || Size == 8);
2194
2195 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2196
2197 if (Imm.Mods.Abs) {
2198 Val &= ~FpSignMask;
2199 }
2200 if (Imm.Mods.Neg) {
2201 Val ^= FpSignMask;
2202 }
2203
2204 return Val;
2205}
2206
2207void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2208 if (isExpr()) {
2210 return;
2211 }
2212
2213 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2214 Inst.getNumOperands())) {
2215 addLiteralImmOperand(Inst, Imm.Val,
2216 ApplyModifiers &
2217 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2218 } else {
2219 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2221 setImmKindNone();
2222 }
2223}
2224
2225void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2226 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2227 auto OpNum = Inst.getNumOperands();
2228 // Check that this operand accepts literals
2229 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2230
2231 if (ApplyModifiers) {
2232 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2233 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2234 Val = applyInputFPModifiers(Val, Size);
2235 }
2236
2237 APInt Literal(64, Val);
2238 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2239
2240 if (Imm.IsFPImm) { // We got fp literal token
2241 switch (OpTy) {
2247 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2248 AsmParser->hasInv2PiInlineImm())) {
2249 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2250 setImmKindConst();
2251 return;
2252 }
2253
2254 // Non-inlineable
2255 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2256 // For fp operands we check if low 32 bits are zeros
2257 if (Literal.getLoBits(32) != 0) {
2258 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2259 "Can't encode literal as exact 64-bit floating-point operand. "
2260 "Low 32-bits will be set to zero");
2261 Val &= 0xffffffff00000000u;
2262 }
2263
2265 setImmKindLiteral();
2266 return;
2267 }
2268
2269 // We don't allow fp literals in 64-bit integer instructions. It is
2270 // unclear how we should encode them. This case should be checked earlier
2271 // in predicate methods (isLiteralImm())
2272 llvm_unreachable("fp literal in 64-bit integer instruction.");
2273
2281 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2282 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2283 // loss of precision. The constant represents ideomatic fp32 value of
2284 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2285 // bits. Prevent rounding below.
2286 Inst.addOperand(MCOperand::createImm(0x3e22));
2287 setImmKindLiteral();
2288 return;
2289 }
2290 [[fallthrough]];
2291
2319 bool lost;
2320 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2321 // Convert literal to single precision
2322 FPLiteral.convert(*getOpFltSemantics(OpTy),
2323 APFloat::rmNearestTiesToEven, &lost);
2324 // We allow precision lost but not overflow or underflow. This should be
2325 // checked earlier in isLiteralImm()
2326
2327 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2328 Inst.addOperand(MCOperand::createImm(ImmVal));
2329 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2330 setImmKindMandatoryLiteral();
2331 } else {
2332 setImmKindLiteral();
2333 }
2334 return;
2335 }
2336 default:
2337 llvm_unreachable("invalid operand size");
2338 }
2339
2340 return;
2341 }
2342
2343 // We got int literal token.
2344 // Only sign extend inline immediates.
2345 switch (OpTy) {
2361 if (isSafeTruncation(Val, 32) &&
2362 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2363 AsmParser->hasInv2PiInlineImm())) {
2365 setImmKindConst();
2366 return;
2367 }
2368
2369 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2370 setImmKindLiteral();
2371 return;
2372
2378 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2380 setImmKindConst();
2381 return;
2382 }
2383
2384 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2385 : Lo_32(Val);
2386
2388 setImmKindLiteral();
2389 return;
2390
2394 if (isSafeTruncation(Val, 16) &&
2395 AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2396 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2397 setImmKindConst();
2398 return;
2399 }
2400
2401 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2402 setImmKindLiteral();
2403 return;
2404
2409 if (isSafeTruncation(Val, 16) &&
2410 AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2411 AsmParser->hasInv2PiInlineImm())) {
2413 setImmKindConst();
2414 return;
2415 }
2416
2417 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2418 setImmKindLiteral();
2419 return;
2420
2425 if (isSafeTruncation(Val, 16) &&
2426 AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2427 AsmParser->hasInv2PiInlineImm())) {
2429 setImmKindConst();
2430 return;
2431 }
2432
2433 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2434 setImmKindLiteral();
2435 return;
2436
2439 assert(isSafeTruncation(Val, 16));
2440 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2442 return;
2443 }
2446 assert(isSafeTruncation(Val, 16));
2447 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2448 AsmParser->hasInv2PiInlineImm()));
2449
2451 return;
2452 }
2453
2456 assert(isSafeTruncation(Val, 16));
2457 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2458 AsmParser->hasInv2PiInlineImm()));
2459
2461 return;
2462 }
2463
2465 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2466 setImmKindMandatoryLiteral();
2467 return;
2469 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2470 setImmKindMandatoryLiteral();
2471 return;
2472 default:
2473 llvm_unreachable("invalid operand size");
2474 }
2475}
2476
2477void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2478 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2479}
2480
2481bool AMDGPUOperand::isInlineValue() const {
2482 return isRegKind() && ::isInlineValue(getReg());
2483}
2484
2485//===----------------------------------------------------------------------===//
2486// AsmParser
2487//===----------------------------------------------------------------------===//
2488
2489static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2490 if (Is == IS_VGPR) {
2491 switch (RegWidth) {
2492 default: return -1;
2493 case 32:
2494 return AMDGPU::VGPR_32RegClassID;
2495 case 64:
2496 return AMDGPU::VReg_64RegClassID;
2497 case 96:
2498 return AMDGPU::VReg_96RegClassID;
2499 case 128:
2500 return AMDGPU::VReg_128RegClassID;
2501 case 160:
2502 return AMDGPU::VReg_160RegClassID;
2503 case 192:
2504 return AMDGPU::VReg_192RegClassID;
2505 case 224:
2506 return AMDGPU::VReg_224RegClassID;
2507 case 256:
2508 return AMDGPU::VReg_256RegClassID;
2509 case 288:
2510 return AMDGPU::VReg_288RegClassID;
2511 case 320:
2512 return AMDGPU::VReg_320RegClassID;
2513 case 352:
2514 return AMDGPU::VReg_352RegClassID;
2515 case 384:
2516 return AMDGPU::VReg_384RegClassID;
2517 case 512:
2518 return AMDGPU::VReg_512RegClassID;
2519 case 1024:
2520 return AMDGPU::VReg_1024RegClassID;
2521 }
2522 } else if (Is == IS_TTMP) {
2523 switch (RegWidth) {
2524 default: return -1;
2525 case 32:
2526 return AMDGPU::TTMP_32RegClassID;
2527 case 64:
2528 return AMDGPU::TTMP_64RegClassID;
2529 case 128:
2530 return AMDGPU::TTMP_128RegClassID;
2531 case 256:
2532 return AMDGPU::TTMP_256RegClassID;
2533 case 512:
2534 return AMDGPU::TTMP_512RegClassID;
2535 }
2536 } else if (Is == IS_SGPR) {
2537 switch (RegWidth) {
2538 default: return -1;
2539 case 32:
2540 return AMDGPU::SGPR_32RegClassID;
2541 case 64:
2542 return AMDGPU::SGPR_64RegClassID;
2543 case 96:
2544 return AMDGPU::SGPR_96RegClassID;
2545 case 128:
2546 return AMDGPU::SGPR_128RegClassID;
2547 case 160:
2548 return AMDGPU::SGPR_160RegClassID;
2549 case 192:
2550 return AMDGPU::SGPR_192RegClassID;
2551 case 224:
2552 return AMDGPU::SGPR_224RegClassID;
2553 case 256:
2554 return AMDGPU::SGPR_256RegClassID;
2555 case 288:
2556 return AMDGPU::SGPR_288RegClassID;
2557 case 320:
2558 return AMDGPU::SGPR_320RegClassID;
2559 case 352:
2560 return AMDGPU::SGPR_352RegClassID;
2561 case 384:
2562 return AMDGPU::SGPR_384RegClassID;
2563 case 512:
2564 return AMDGPU::SGPR_512RegClassID;
2565 }
2566 } else if (Is == IS_AGPR) {
2567 switch (RegWidth) {
2568 default: return -1;
2569 case 32:
2570 return AMDGPU::AGPR_32RegClassID;
2571 case 64:
2572 return AMDGPU::AReg_64RegClassID;
2573 case 96:
2574 return AMDGPU::AReg_96RegClassID;
2575 case 128:
2576 return AMDGPU::AReg_128RegClassID;
2577 case 160:
2578 return AMDGPU::AReg_160RegClassID;
2579 case 192:
2580 return AMDGPU::AReg_192RegClassID;
2581 case 224:
2582 return AMDGPU::AReg_224RegClassID;
2583 case 256:
2584 return AMDGPU::AReg_256RegClassID;
2585 case 288:
2586 return AMDGPU::AReg_288RegClassID;
2587 case 320:
2588 return AMDGPU::AReg_320RegClassID;
2589 case 352:
2590 return AMDGPU::AReg_352RegClassID;
2591 case 384:
2592 return AMDGPU::AReg_384RegClassID;
2593 case 512:
2594 return AMDGPU::AReg_512RegClassID;
2595 case 1024:
2596 return AMDGPU::AReg_1024RegClassID;
2597 }
2598 }
2599 return -1;
2600}
2601
2604 .Case("exec", AMDGPU::EXEC)
2605 .Case("vcc", AMDGPU::VCC)
2606 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2607 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2608 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2609 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2610 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2611 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2612 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2613 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2614 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2615 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2616 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2617 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2618 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2619 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2620 .Case("m0", AMDGPU::M0)
2621 .Case("vccz", AMDGPU::SRC_VCCZ)
2622 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2623 .Case("execz", AMDGPU::SRC_EXECZ)
2624 .Case("src_execz", AMDGPU::SRC_EXECZ)
2625 .Case("scc", AMDGPU::SRC_SCC)
2626 .Case("src_scc", AMDGPU::SRC_SCC)
2627 .Case("tba", AMDGPU::TBA)
2628 .Case("tma", AMDGPU::TMA)
2629 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2630 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2631 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2632 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2633 .Case("vcc_lo", AMDGPU::VCC_LO)
2634 .Case("vcc_hi", AMDGPU::VCC_HI)
2635 .Case("exec_lo", AMDGPU::EXEC_LO)
2636 .Case("exec_hi", AMDGPU::EXEC_HI)
2637 .Case("tma_lo", AMDGPU::TMA_LO)
2638 .Case("tma_hi", AMDGPU::TMA_HI)
2639 .Case("tba_lo", AMDGPU::TBA_LO)
2640 .Case("tba_hi", AMDGPU::TBA_HI)
2641 .Case("pc", AMDGPU::PC_REG)
2642 .Case("null", AMDGPU::SGPR_NULL)
2643 .Default(AMDGPU::NoRegister);
2644}
2645
2646bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2647 SMLoc &EndLoc, bool RestoreOnFailure) {
2648 auto R = parseRegister();
2649 if (!R) return true;
2650 assert(R->isReg());
2651 RegNo = R->getReg();
2652 StartLoc = R->getStartLoc();
2653 EndLoc = R->getEndLoc();
2654 return false;
2655}
2656
2657bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2658 SMLoc &EndLoc) {
2659 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2660}
2661
2662ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2663 SMLoc &EndLoc) {
2664 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2665 bool PendingErrors = getParser().hasPendingError();
2666 getParser().clearPendingErrors();
2667 if (PendingErrors)
2668 return ParseStatus::Failure;
2669 if (Result)
2670 return ParseStatus::NoMatch;
2671 return ParseStatus::Success;
2672}
2673
2674bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2675 RegisterKind RegKind, unsigned Reg1,
2676 SMLoc Loc) {
2677 switch (RegKind) {
2678 case IS_SPECIAL:
2679 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2680 Reg = AMDGPU::EXEC;
2681 RegWidth = 64;
2682 return true;
2683 }
2684 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2685 Reg = AMDGPU::FLAT_SCR;
2686 RegWidth = 64;
2687 return true;
2688 }
2689 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2690 Reg = AMDGPU::XNACK_MASK;
2691 RegWidth = 64;
2692 return true;
2693 }
2694 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2695 Reg = AMDGPU::VCC;
2696 RegWidth = 64;
2697 return true;
2698 }
2699 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2700 Reg = AMDGPU::TBA;
2701 RegWidth = 64;
2702 return true;
2703 }
2704 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2705 Reg = AMDGPU::TMA;
2706 RegWidth = 64;
2707 return true;
2708 }
2709 Error(Loc, "register does not fit in the list");
2710 return false;
2711 case IS_VGPR:
2712 case IS_SGPR:
2713 case IS_AGPR:
2714 case IS_TTMP:
2715 if (Reg1 != Reg + RegWidth / 32) {
2716 Error(Loc, "registers in a list must have consecutive indices");
2717 return false;
2718 }
2719 RegWidth += 32;
2720 return true;
2721 default:
2722 llvm_unreachable("unexpected register kind");
2723 }
2724}
2725
2726struct RegInfo {
2728 RegisterKind Kind;
2729};
2730
2731static constexpr RegInfo RegularRegisters[] = {
2732 {{"v"}, IS_VGPR},
2733 {{"s"}, IS_SGPR},
2734 {{"ttmp"}, IS_TTMP},
2735 {{"acc"}, IS_AGPR},
2736 {{"a"}, IS_AGPR},
2737};
2738
2739static bool isRegularReg(RegisterKind Kind) {
2740 return Kind == IS_VGPR ||
2741 Kind == IS_SGPR ||
2742 Kind == IS_TTMP ||
2743 Kind == IS_AGPR;
2744}
2745
2747 for (const RegInfo &Reg : RegularRegisters)
2748 if (Str.starts_with(Reg.Name))
2749 return &Reg;
2750 return nullptr;
2751}
2752
2753static bool getRegNum(StringRef Str, unsigned& Num) {
2754 return !Str.getAsInteger(10, Num);
2755}
2756
2757bool
2758AMDGPUAsmParser::isRegister(const AsmToken &Token,
2759 const AsmToken &NextToken) const {
2760
2761 // A list of consecutive registers: [s0,s1,s2,s3]
2762 if (Token.is(AsmToken::LBrac))
2763 return true;
2764
2765 if (!Token.is(AsmToken::Identifier))
2766 return false;
2767
2768 // A single register like s0 or a range of registers like s[0:1]
2769
2770 StringRef Str = Token.getString();
2771 const RegInfo *Reg = getRegularRegInfo(Str);
2772 if (Reg) {
2773 StringRef RegName = Reg->Name;
2774 StringRef RegSuffix = Str.substr(RegName.size());
2775 if (!RegSuffix.empty()) {
2776 RegSuffix.consume_back(".l");
2777 RegSuffix.consume_back(".h");
2778 unsigned Num;
2779 // A single register with an index: rXX
2780 if (getRegNum(RegSuffix, Num))
2781 return true;
2782 } else {
2783 // A range of registers: r[XX:YY].
2784 if (NextToken.is(AsmToken::LBrac))
2785 return true;
2786 }
2787 }
2788
2789 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2790}
2791
2792bool
2793AMDGPUAsmParser::isRegister()
2794{
2795 return isRegister(getToken(), peekToken());
2796}
2797
2798unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2799 unsigned SubReg, unsigned RegWidth,
2800 SMLoc Loc) {
2801 assert(isRegularReg(RegKind));
2802
2803 unsigned AlignSize = 1;
2804 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2805 // SGPR and TTMP registers must be aligned.
2806 // Max required alignment is 4 dwords.
2807 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2808 }
2809
2810 if (RegNum % AlignSize != 0) {
2811 Error(Loc, "invalid register alignment");
2812 return AMDGPU::NoRegister;
2813 }
2814
2815 unsigned RegIdx = RegNum / AlignSize;
2816 int RCID = getRegClass(RegKind, RegWidth);
2817 if (RCID == -1) {
2818 Error(Loc, "invalid or unsupported register size");
2819 return AMDGPU::NoRegister;
2820 }
2821
2822 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2823 const MCRegisterClass RC = TRI->getRegClass(RCID);
2824 if (RegIdx >= RC.getNumRegs()) {
2825 Error(Loc, "register index is out of range");
2826 return AMDGPU::NoRegister;
2827 }
2828
2829 unsigned Reg = RC.getRegister(RegIdx);
2830
2831 if (SubReg) {
2832 Reg = TRI->getSubReg(Reg, SubReg);
2833
2834 // Currently all regular registers have their .l and .h subregisters, so
2835 // we should never need to generate an error here.
2836 assert(Reg && "Invalid subregister!");
2837 }
2838
2839 return Reg;
2840}
2841
2842bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2843 int64_t RegLo, RegHi;
2844 if (!skipToken(AsmToken::LBrac, "missing register index"))
2845 return false;
2846
2847 SMLoc FirstIdxLoc = getLoc();
2848 SMLoc SecondIdxLoc;
2849
2850 if (!parseExpr(RegLo))
2851 return false;
2852
2853 if (trySkipToken(AsmToken::Colon)) {
2854 SecondIdxLoc = getLoc();
2855 if (!parseExpr(RegHi))
2856 return false;
2857 } else {
2858 RegHi = RegLo;
2859 }
2860
2861 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2862 return false;
2863
2864 if (!isUInt<32>(RegLo)) {
2865 Error(FirstIdxLoc, "invalid register index");
2866 return false;
2867 }
2868
2869 if (!isUInt<32>(RegHi)) {
2870 Error(SecondIdxLoc, "invalid register index");
2871 return false;
2872 }
2873
2874 if (RegLo > RegHi) {
2875 Error(FirstIdxLoc, "first register index should not exceed second index");
2876 return false;
2877 }
2878
2879 Num = static_cast<unsigned>(RegLo);
2880 RegWidth = 32 * ((RegHi - RegLo) + 1);
2881 return true;
2882}
2883
2884unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2885 unsigned &RegNum, unsigned &RegWidth,
2886 SmallVectorImpl<AsmToken> &Tokens) {
2887 assert(isToken(AsmToken::Identifier));
2888 unsigned Reg = getSpecialRegForName(getTokenStr());
2889 if (Reg) {
2890 RegNum = 0;
2891 RegWidth = 32;
2892 RegKind = IS_SPECIAL;
2893 Tokens.push_back(getToken());
2894 lex(); // skip register name
2895 }
2896 return Reg;
2897}
2898
2899unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2900 unsigned &RegNum, unsigned &RegWidth,
2901 SmallVectorImpl<AsmToken> &Tokens) {
2902 assert(isToken(AsmToken::Identifier));
2903 StringRef RegName = getTokenStr();
2904 auto Loc = getLoc();
2905
2906 const RegInfo *RI = getRegularRegInfo(RegName);
2907 if (!RI) {
2908 Error(Loc, "invalid register name");
2909 return AMDGPU::NoRegister;
2910 }
2911
2912 Tokens.push_back(getToken());
2913 lex(); // skip register name
2914
2915 RegKind = RI->Kind;
2916 StringRef RegSuffix = RegName.substr(RI->Name.size());
2917 unsigned SubReg = NoSubRegister;
2918 if (!RegSuffix.empty()) {
2919 // We don't know the opcode till we are done parsing, so we don't know if
2920 // registers should be 16 or 32 bit. It is therefore mandatory to put .l or
2921 // .h to correctly specify 16 bit registers. We also can't determine class
2922 // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16.
2923 if (RegSuffix.consume_back(".l"))
2924 SubReg = AMDGPU::lo16;
2925 else if (RegSuffix.consume_back(".h"))
2926 SubReg = AMDGPU::hi16;
2927
2928 // Single 32-bit register: vXX.
2929 if (!getRegNum(RegSuffix, RegNum)) {
2930 Error(Loc, "invalid register index");
2931 return AMDGPU::NoRegister;
2932 }
2933 RegWidth = 32;
2934 } else {
2935 // Range of registers: v[XX:YY]. ":YY" is optional.
2936 if (!ParseRegRange(RegNum, RegWidth))
2937 return AMDGPU::NoRegister;
2938 }
2939
2940 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2941}
2942
2943unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2944 unsigned &RegWidth,
2945 SmallVectorImpl<AsmToken> &Tokens) {
2946 unsigned Reg = AMDGPU::NoRegister;
2947 auto ListLoc = getLoc();
2948
2949 if (!skipToken(AsmToken::LBrac,
2950 "expected a register or a list of registers")) {
2951 return AMDGPU::NoRegister;
2952 }
2953
2954 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2955
2956 auto Loc = getLoc();
2957 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2958 return AMDGPU::NoRegister;
2959 if (RegWidth != 32) {
2960 Error(Loc, "expected a single 32-bit register");
2961 return AMDGPU::NoRegister;
2962 }
2963
2964 for (; trySkipToken(AsmToken::Comma); ) {
2965 RegisterKind NextRegKind;
2966 unsigned NextReg, NextRegNum, NextRegWidth;
2967 Loc = getLoc();
2968
2969 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2970 NextRegNum, NextRegWidth,
2971 Tokens)) {
2972 return AMDGPU::NoRegister;
2973 }
2974 if (NextRegWidth != 32) {
2975 Error(Loc, "expected a single 32-bit register");
2976 return AMDGPU::NoRegister;
2977 }
2978 if (NextRegKind != RegKind) {
2979 Error(Loc, "registers in a list must be of the same kind");
2980 return AMDGPU::NoRegister;
2981 }
2982 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2983 return AMDGPU::NoRegister;
2984 }
2985
2986 if (!skipToken(AsmToken::RBrac,
2987 "expected a comma or a closing square bracket")) {
2988 return AMDGPU::NoRegister;
2989 }
2990
2991 if (isRegularReg(RegKind))
2992 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
2993
2994 return Reg;
2995}
2996
2997bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2998 unsigned &RegNum, unsigned &RegWidth,
2999 SmallVectorImpl<AsmToken> &Tokens) {
3000 auto Loc = getLoc();
3001 Reg = AMDGPU::NoRegister;
3002
3003 if (isToken(AsmToken::Identifier)) {
3004 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3005 if (Reg == AMDGPU::NoRegister)
3006 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3007 } else {
3008 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3009 }
3010
3011 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3012 if (Reg == AMDGPU::NoRegister) {
3013 assert(Parser.hasPendingError());
3014 return false;
3015 }
3016
3017 if (!subtargetHasRegister(*TRI, Reg)) {
3018 if (Reg == AMDGPU::SGPR_NULL) {
3019 Error(Loc, "'null' operand is not supported on this GPU");
3020 } else {
3021 Error(Loc, "register not available on this GPU");
3022 }
3023 return false;
3024 }
3025
3026 return true;
3027}
3028
3029bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3030 unsigned &RegNum, unsigned &RegWidth,
3031 bool RestoreOnFailure /*=false*/) {
3032 Reg = AMDGPU::NoRegister;
3033
3035 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3036 if (RestoreOnFailure) {
3037 while (!Tokens.empty()) {
3038 getLexer().UnLex(Tokens.pop_back_val());
3039 }
3040 }
3041 return true;
3042 }
3043 return false;
3044}
3045
3046std::optional<StringRef>
3047AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3048 switch (RegKind) {
3049 case IS_VGPR:
3050 return StringRef(".amdgcn.next_free_vgpr");
3051 case IS_SGPR:
3052 return StringRef(".amdgcn.next_free_sgpr");
3053 default:
3054 return std::nullopt;
3055 }
3056}
3057
3058void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3059 auto SymbolName = getGprCountSymbolName(RegKind);
3060 assert(SymbolName && "initializing invalid register kind");
3061 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3062 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
3063}
3064
3065bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3066 unsigned DwordRegIndex,
3067 unsigned RegWidth) {
3068 // Symbols are only defined for GCN targets
3069 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3070 return true;
3071
3072 auto SymbolName = getGprCountSymbolName(RegKind);
3073 if (!SymbolName)
3074 return true;
3075 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3076
3077 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3078 int64_t OldCount;
3079
3080 if (!Sym->isVariable())
3081 return !Error(getLoc(),
3082 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3083 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
3084 return !Error(
3085 getLoc(),
3086 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3087
3088 if (OldCount <= NewMax)
3089 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
3090
3091 return true;
3092}
3093
3094std::unique_ptr<AMDGPUOperand>
3095AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3096 const auto &Tok = getToken();
3097 SMLoc StartLoc = Tok.getLoc();
3098 SMLoc EndLoc = Tok.getEndLoc();
3099 RegisterKind RegKind;
3100 unsigned Reg, RegNum, RegWidth;
3101
3102 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3103 return nullptr;
3104 }
3105 if (isHsaAbi(getSTI())) {
3106 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3107 return nullptr;
3108 } else
3109 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3110 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3111}
3112
3113ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3114 bool HasSP3AbsModifier, bool HasLit) {
3115 // TODO: add syntactic sugar for 1/(2*PI)
3116
3117 if (isRegister())
3118 return ParseStatus::NoMatch;
3119 assert(!isModifier());
3120
3121 if (!HasLit) {
3122 HasLit = trySkipId("lit");
3123 if (HasLit) {
3124 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3125 return ParseStatus::Failure;
3126 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
3127 if (S.isSuccess() &&
3128 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3129 return ParseStatus::Failure;
3130 return S;
3131 }
3132 }
3133
3134 const auto& Tok = getToken();
3135 const auto& NextTok = peekToken();
3136 bool IsReal = Tok.is(AsmToken::Real);
3137 SMLoc S = getLoc();
3138 bool Negate = false;
3139
3140 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3141 lex();
3142 IsReal = true;
3143 Negate = true;
3144 }
3145
3146 AMDGPUOperand::Modifiers Mods;
3147 Mods.Lit = HasLit;
3148
3149 if (IsReal) {
3150 // Floating-point expressions are not supported.
3151 // Can only allow floating-point literals with an
3152 // optional sign.
3153
3154 StringRef Num = getTokenStr();
3155 lex();
3156
3157 APFloat RealVal(APFloat::IEEEdouble());
3158 auto roundMode = APFloat::rmNearestTiesToEven;
3159 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3160 return ParseStatus::Failure;
3161 if (Negate)
3162 RealVal.changeSign();
3163
3164 Operands.push_back(
3165 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3166 AMDGPUOperand::ImmTyNone, true));
3167 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3168 Op.setModifiers(Mods);
3169
3170 return ParseStatus::Success;
3171
3172 } else {
3173 int64_t IntVal;
3174 const MCExpr *Expr;
3175 SMLoc S = getLoc();
3176
3177 if (HasSP3AbsModifier) {
3178 // This is a workaround for handling expressions
3179 // as arguments of SP3 'abs' modifier, for example:
3180 // |1.0|
3181 // |-1|
3182 // |1+x|
3183 // This syntax is not compatible with syntax of standard
3184 // MC expressions (due to the trailing '|').
3185 SMLoc EndLoc;
3186 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3187 return ParseStatus::Failure;
3188 } else {
3189 if (Parser.parseExpression(Expr))
3190 return ParseStatus::Failure;
3191 }
3192
3193 if (Expr->evaluateAsAbsolute(IntVal)) {
3194 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3195 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3196 Op.setModifiers(Mods);
3197 } else {
3198 if (HasLit)
3199 return ParseStatus::NoMatch;
3200 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3201 }
3202
3203 return ParseStatus::Success;
3204 }
3205
3206 return ParseStatus::NoMatch;
3207}
3208
3209ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3210 if (!isRegister())
3211 return ParseStatus::NoMatch;
3212
3213 if (auto R = parseRegister()) {
3214 assert(R->isReg());
3215 Operands.push_back(std::move(R));
3216 return ParseStatus::Success;
3217 }
3218 return ParseStatus::Failure;
3219}
3220
3221ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3222 bool HasSP3AbsMod, bool HasLit) {
3223 ParseStatus Res = parseReg(Operands);
3224 if (!Res.isNoMatch())
3225 return Res;
3226 if (isModifier())
3227 return ParseStatus::NoMatch;
3228 return parseImm(Operands, HasSP3AbsMod, HasLit);
3229}
3230
3231bool
3232AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3233 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3234 const auto &str = Token.getString();
3235 return str == "abs" || str == "neg" || str == "sext";
3236 }
3237 return false;
3238}
3239
3240bool
3241AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3242 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3243}
3244
3245bool
3246AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3247 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3248}
3249
3250bool
3251AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3252 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3253}
3254
3255// Check if this is an operand modifier or an opcode modifier
3256// which may look like an expression but it is not. We should
3257// avoid parsing these modifiers as expressions. Currently
3258// recognized sequences are:
3259// |...|
3260// abs(...)
3261// neg(...)
3262// sext(...)
3263// -reg
3264// -|...|
3265// -abs(...)
3266// name:...
3267//
3268bool
3269AMDGPUAsmParser::isModifier() {
3270
3271 AsmToken Tok = getToken();
3272 AsmToken NextToken[2];
3273 peekTokens(NextToken);
3274
3275 return isOperandModifier(Tok, NextToken[0]) ||
3276 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3277 isOpcodeModifierWithVal(Tok, NextToken[0]);
3278}
3279
3280// Check if the current token is an SP3 'neg' modifier.
3281// Currently this modifier is allowed in the following context:
3282//
3283// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3284// 2. Before an 'abs' modifier: -abs(...)
3285// 3. Before an SP3 'abs' modifier: -|...|
3286//
3287// In all other cases "-" is handled as a part
3288// of an expression that follows the sign.
3289//
3290// Note: When "-" is followed by an integer literal,
3291// this is interpreted as integer negation rather
3292// than a floating-point NEG modifier applied to N.
3293// Beside being contr-intuitive, such use of floating-point
3294// NEG modifier would have resulted in different meaning
3295// of integer literals used with VOP1/2/C and VOP3,
3296// for example:
3297// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3298// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3299// Negative fp literals with preceding "-" are
3300// handled likewise for uniformity
3301//
3302bool
3303AMDGPUAsmParser::parseSP3NegModifier() {
3304
3305 AsmToken NextToken[2];
3306 peekTokens(NextToken);
3307
3308 if (isToken(AsmToken::Minus) &&
3309 (isRegister(NextToken[0], NextToken[1]) ||
3310 NextToken[0].is(AsmToken::Pipe) ||
3311 isId(NextToken[0], "abs"))) {
3312 lex();
3313 return true;
3314 }
3315
3316 return false;
3317}
3318
3320AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3321 bool AllowImm) {
3322 bool Neg, SP3Neg;
3323 bool Abs, SP3Abs;
3324 bool Lit;
3325 SMLoc Loc;
3326
3327 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3328 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3329 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3330
3331 SP3Neg = parseSP3NegModifier();
3332
3333 Loc = getLoc();
3334 Neg = trySkipId("neg");
3335 if (Neg && SP3Neg)
3336 return Error(Loc, "expected register or immediate");
3337 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3338 return ParseStatus::Failure;
3339
3340 Abs = trySkipId("abs");
3341 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3342 return ParseStatus::Failure;
3343
3344 Lit = trySkipId("lit");
3345 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3346 return ParseStatus::Failure;
3347
3348 Loc = getLoc();
3349 SP3Abs = trySkipToken(AsmToken::Pipe);
3350 if (Abs && SP3Abs)
3351 return Error(Loc, "expected register or immediate");
3352
3353 ParseStatus Res;
3354 if (AllowImm) {
3355 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3356 } else {
3357 Res = parseReg(Operands);
3358 }
3359 if (!Res.isSuccess())
3360 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3361
3362 if (Lit && !Operands.back()->isImm())
3363 Error(Loc, "expected immediate with lit modifier");
3364
3365 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3366 return ParseStatus::Failure;
3367 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3368 return ParseStatus::Failure;
3369 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3370 return ParseStatus::Failure;
3371 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3372 return ParseStatus::Failure;
3373
3374 AMDGPUOperand::Modifiers Mods;
3375 Mods.Abs = Abs || SP3Abs;
3376 Mods.Neg = Neg || SP3Neg;
3377 Mods.Lit = Lit;
3378
3379 if (Mods.hasFPModifiers() || Lit) {
3380 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3381 if (Op.isExpr())
3382 return Error(Op.getStartLoc(), "expected an absolute expression");
3383 Op.setModifiers(Mods);
3384 }
3385 return ParseStatus::Success;
3386}
3387
3389AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3390 bool AllowImm) {
3391 bool Sext = trySkipId("sext");
3392 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3393 return ParseStatus::Failure;
3394
3395 ParseStatus Res;
3396 if (AllowImm) {
3397 Res = parseRegOrImm(Operands);
3398 } else {
3399 Res = parseReg(Operands);
3400 }
3401 if (!Res.isSuccess())
3402 return Sext ? ParseStatus::Failure : Res;
3403
3404 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3405 return ParseStatus::Failure;
3406
3407 AMDGPUOperand::Modifiers Mods;
3408 Mods.Sext = Sext;
3409
3410 if (Mods.hasIntModifiers()) {
3411 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3412 if (Op.isExpr())
3413 return Error(Op.getStartLoc(), "expected an absolute expression");
3414 Op.setModifiers(Mods);
3415 }
3416
3417 return ParseStatus::Success;
3418}
3419
3420ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3421 return parseRegOrImmWithFPInputMods(Operands, false);
3422}
3423
3424ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3425 return parseRegOrImmWithIntInputMods(Operands, false);
3426}
3427
3428ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3429 auto Loc = getLoc();
3430 if (trySkipId("off")) {
3431 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3432 AMDGPUOperand::ImmTyOff, false));
3433 return ParseStatus::Success;
3434 }
3435
3436 if (!isRegister())
3437 return ParseStatus::NoMatch;
3438
3439 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3440 if (Reg) {
3441 Operands.push_back(std::move(Reg));
3442 return ParseStatus::Success;
3443 }
3444
3445 return ParseStatus::Failure;
3446}
3447
3448unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3449 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3450
3451 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3452 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3453 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3454 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3455 return Match_InvalidOperand;
3456
3457 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3458 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3459 // v_mac_f32/16 allow only dst_sel == DWORD;
3460 auto OpNum =
3461 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3462 const auto &Op = Inst.getOperand(OpNum);
3463 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3464 return Match_InvalidOperand;
3465 }
3466 }
3467
3468 return Match_Success;
3469}
3470
3472 static const unsigned Variants[] = {
3476 };
3477
3478 return ArrayRef(Variants);
3479}
3480
3481// What asm variants we should check
3482ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3483 if (isForcedDPP() && isForcedVOP3()) {
3484 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3485 return ArrayRef(Variants);
3486 }
3487 if (getForcedEncodingSize() == 32) {
3488 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3489 return ArrayRef(Variants);
3490 }
3491
3492 if (isForcedVOP3()) {
3493 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3494 return ArrayRef(Variants);
3495 }
3496
3497 if (isForcedSDWA()) {
3498 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3500 return ArrayRef(Variants);
3501 }
3502
3503 if (isForcedDPP()) {
3504 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3505 return ArrayRef(Variants);
3506 }
3507
3508 return getAllVariants();
3509}
3510
3511StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3512 if (isForcedDPP() && isForcedVOP3())
3513 return "e64_dpp";
3514
3515 if (getForcedEncodingSize() == 32)
3516 return "e32";
3517
3518 if (isForcedVOP3())
3519 return "e64";
3520
3521 if (isForcedSDWA())
3522 return "sdwa";
3523
3524 if (isForcedDPP())
3525 return "dpp";
3526
3527 return "";
3528}
3529
3530unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3531 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3532 for (MCPhysReg Reg : Desc.implicit_uses()) {
3533 switch (Reg) {
3534 case AMDGPU::FLAT_SCR:
3535 case AMDGPU::VCC:
3536 case AMDGPU::VCC_LO:
3537 case AMDGPU::VCC_HI:
3538 case AMDGPU::M0:
3539 return Reg;
3540 default:
3541 break;
3542 }
3543 }
3544 return AMDGPU::NoRegister;
3545}
3546
3547// NB: This code is correct only when used to check constant
3548// bus limitations because GFX7 support no f16 inline constants.
3549// Note that there are no cases when a GFX7 opcode violates
3550// constant bus limitations due to the use of an f16 constant.
3551bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3552 unsigned OpIdx) const {
3553 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3554
3555 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3556 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3557 return false;
3558 }
3559
3560 const MCOperand &MO = Inst.getOperand(OpIdx);
3561
3562 int64_t Val = MO.getImm();
3563 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3564
3565 switch (OpSize) { // expected operand size
3566 case 8:
3567 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3568 case 4:
3569 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3570 case 2: {
3571 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3575 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3576
3581
3586
3591
3596 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3597
3602 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3603
3604 llvm_unreachable("invalid operand type");
3605 }
3606 default:
3607 llvm_unreachable("invalid operand size");
3608 }
3609}
3610
3611unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3612 if (!isGFX10Plus())
3613 return 1;
3614
3615 switch (Opcode) {
3616 // 64-bit shift instructions can use only one scalar value input
3617 case AMDGPU::V_LSHLREV_B64_e64:
3618 case AMDGPU::V_LSHLREV_B64_gfx10:
3619 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3620 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3621 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3622 case AMDGPU::V_LSHRREV_B64_e64:
3623 case AMDGPU::V_LSHRREV_B64_gfx10:
3624 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3625 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3626 case AMDGPU::V_ASHRREV_I64_e64:
3627 case AMDGPU::V_ASHRREV_I64_gfx10:
3628 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3629 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3630 case AMDGPU::V_LSHL_B64_e64:
3631 case AMDGPU::V_LSHR_B64_e64:
3632 case AMDGPU::V_ASHR_I64_e64:
3633 return 1;
3634 default:
3635 return 2;
3636 }
3637}
3638
3639constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3641
3642// Get regular operand indices in the same order as specified
3643// in the instruction (but append mandatory literals to the end).
3645 bool AddMandatoryLiterals = false) {
3646
3647 int16_t ImmIdx =
3648 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3649
3650 if (isVOPD(Opcode)) {
3651 int16_t ImmDeferredIdx =
3652 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3653 : -1;
3654
3655 return {getNamedOperandIdx(Opcode, OpName::src0X),
3656 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3657 getNamedOperandIdx(Opcode, OpName::src0Y),
3658 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3659 ImmDeferredIdx,
3660 ImmIdx};
3661 }
3662
3663 return {getNamedOperandIdx(Opcode, OpName::src0),
3664 getNamedOperandIdx(Opcode, OpName::src1),
3665 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3666}
3667
3668bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3669 const MCOperand &MO = Inst.getOperand(OpIdx);
3670 if (MO.isImm()) {
3671 return !isInlineConstant(Inst, OpIdx);
3672 } else if (MO.isReg()) {
3673 auto Reg = MO.getReg();
3674 if (!Reg) {
3675 return false;
3676 }
3677 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3678 auto PReg = mc2PseudoReg(Reg);
3679 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3680 } else {
3681 return true;
3682 }
3683}
3684
3685// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3686// Writelane is special in that it can use SGPR and M0 (which would normally
3687// count as using the constant bus twice - but in this case it is allowed since
3688// the lane selector doesn't count as a use of the constant bus). However, it is
3689// still required to abide by the 1 SGPR rule.
3690static bool checkWriteLane(const MCInst &Inst) {
3691 const unsigned Opcode = Inst.getOpcode();
3692 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3693 return false;
3694 const MCOperand &LaneSelOp = Inst.getOperand(2);
3695 if (!LaneSelOp.isReg())
3696 return false;
3697 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3698 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3699}
3700
3701bool AMDGPUAsmParser::validateConstantBusLimitations(
3702 const MCInst &Inst, const OperandVector &Operands) {
3703 const unsigned Opcode = Inst.getOpcode();
3704 const MCInstrDesc &Desc = MII.get(Opcode);
3705 unsigned LastSGPR = AMDGPU::NoRegister;
3706 unsigned ConstantBusUseCount = 0;
3707 unsigned NumLiterals = 0;
3708 unsigned LiteralSize;
3709
3710 if (!(Desc.TSFlags &
3713 !isVOPD(Opcode))
3714 return true;
3715
3716 if (checkWriteLane(Inst))
3717 return true;
3718
3719 // Check special imm operands (used by madmk, etc)
3720 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3721 ++NumLiterals;
3722 LiteralSize = 4;
3723 }
3724
3725 SmallDenseSet<unsigned> SGPRsUsed;
3726 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3727 if (SGPRUsed != AMDGPU::NoRegister) {
3728 SGPRsUsed.insert(SGPRUsed);
3729 ++ConstantBusUseCount;
3730 }
3731
3732 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3733
3734 for (int OpIdx : OpIndices) {
3735 if (OpIdx == -1)
3736 continue;
3737
3738 const MCOperand &MO = Inst.getOperand(OpIdx);
3739 if (usesConstantBus(Inst, OpIdx)) {
3740 if (MO.isReg()) {
3741 LastSGPR = mc2PseudoReg(MO.getReg());
3742 // Pairs of registers with a partial intersections like these
3743 // s0, s[0:1]
3744 // flat_scratch_lo, flat_scratch
3745 // flat_scratch_lo, flat_scratch_hi
3746 // are theoretically valid but they are disabled anyway.
3747 // Note that this code mimics SIInstrInfo::verifyInstruction
3748 if (SGPRsUsed.insert(LastSGPR).second) {
3749 ++ConstantBusUseCount;
3750 }
3751 } else { // Expression or a literal
3752
3753 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3754 continue; // special operand like VINTERP attr_chan
3755
3756 // An instruction may use only one literal.
3757 // This has been validated on the previous step.
3758 // See validateVOPLiteral.
3759 // This literal may be used as more than one operand.
3760 // If all these operands are of the same size,
3761 // this literal counts as one scalar value.
3762 // Otherwise it counts as 2 scalar values.
3763 // See "GFX10 Shader Programming", section 3.6.2.3.
3764
3765 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3766 if (Size < 4)
3767 Size = 4;
3768
3769 if (NumLiterals == 0) {
3770 NumLiterals = 1;
3771 LiteralSize = Size;
3772 } else if (LiteralSize != Size) {
3773 NumLiterals = 2;
3774 }
3775 }
3776 }
3777 }
3778 ConstantBusUseCount += NumLiterals;
3779
3780 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3781 return true;
3782
3783 SMLoc LitLoc = getLitLoc(Operands);
3784 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3785 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3786 Error(Loc, "invalid operand (violates constant bus restrictions)");
3787 return false;
3788}
3789
3790bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3791 const MCInst &Inst, const OperandVector &Operands) {
3792
3793 const unsigned Opcode = Inst.getOpcode();
3794 if (!isVOPD(Opcode))
3795 return true;
3796
3797 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3798
3799 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3800 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3801 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3802 ? Opr.getReg()
3804 };
3805
3806 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3807 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3808
3809 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3810 auto InvalidCompOprIdx =
3811 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3812 if (!InvalidCompOprIdx)
3813 return true;
3814
3815 auto CompOprIdx = *InvalidCompOprIdx;
3816 auto ParsedIdx =
3817 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3818 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3819 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3820
3821 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3822 if (CompOprIdx == VOPD::Component::DST) {
3823 Error(Loc, "one dst register must be even and the other odd");
3824 } else {
3825 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3826 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3827 " operands must use different VGPR banks");
3828 }
3829
3830 return false;
3831}
3832
3833bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3834
3835 const unsigned Opc = Inst.getOpcode();
3836 const MCInstrDesc &Desc = MII.get(Opc);
3837
3838 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3839 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3840 assert(ClampIdx != -1);
3841 return Inst.getOperand(ClampIdx).getImm() == 0;
3842 }
3843
3844 return true;
3845}
3846
3849
3850bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3851 const SMLoc &IDLoc) {
3852
3853 const unsigned Opc = Inst.getOpcode();
3854 const MCInstrDesc &Desc = MII.get(Opc);
3855
3856 if ((Desc.TSFlags & MIMGFlags) == 0)
3857 return true;
3858
3859 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3860 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3861 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3862
3863 assert(VDataIdx != -1);
3864
3865 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3866 return true;
3867
3868 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3869 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3870 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3871 if (DMask == 0)
3872 DMask = 1;
3873
3874 bool IsPackedD16 = false;
3875 unsigned DataSize =
3876 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3877 if (hasPackedD16()) {
3878 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3879 IsPackedD16 = D16Idx >= 0;
3880 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3881 DataSize = (DataSize + 1) / 2;
3882 }
3883
3884 if ((VDataSize / 4) == DataSize + TFESize)
3885 return true;
3886
3887 StringRef Modifiers;
3888 if (isGFX90A())
3889 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3890 else
3891 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3892
3893 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3894 return false;
3895}
3896
3897bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3898 const SMLoc &IDLoc) {
3899 const unsigned Opc = Inst.getOpcode();
3900 const MCInstrDesc &Desc = MII.get(Opc);
3901
3902 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3903 return true;
3904
3906
3907 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3909 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3910 int RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
3911 : AMDGPU::OpName::rsrc;
3912 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3913 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3914 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3915
3916 assert(VAddr0Idx != -1);
3917 assert(SrsrcIdx != -1);
3918 assert(SrsrcIdx > VAddr0Idx);
3919
3920 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3921 if (BaseOpcode->BVH) {
3922 if (IsA16 == BaseOpcode->A16)
3923 return true;
3924 Error(IDLoc, "image address size does not match a16");
3925 return false;
3926 }
3927
3928 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3930 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3931 unsigned ActualAddrSize =
3932 IsNSA ? SrsrcIdx - VAddr0Idx
3933 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3934
3935 unsigned ExpectedAddrSize =
3936 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3937
3938 if (IsNSA) {
3939 if (hasPartialNSAEncoding() &&
3940 ExpectedAddrSize >
3942 int VAddrLastIdx = SrsrcIdx - 1;
3943 unsigned VAddrLastSize =
3944 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3945
3946 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3947 }
3948 } else {
3949 if (ExpectedAddrSize > 12)
3950 ExpectedAddrSize = 16;
3951
3952 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3953 // This provides backward compatibility for assembly created
3954 // before 160b/192b/224b types were directly supported.
3955 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3956 return true;
3957 }
3958
3959 if (ActualAddrSize == ExpectedAddrSize)
3960 return true;
3961
3962 Error(IDLoc, "image address size does not match dim and a16");
3963 return false;
3964}
3965
3966bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3967
3968 const unsigned Opc = Inst.getOpcode();
3969 const MCInstrDesc &Desc = MII.get(Opc);
3970
3971 if ((Desc.TSFlags & MIMGFlags) == 0)
3972 return true;
3973 if (!Desc.mayLoad() || !Desc.mayStore())
3974 return true; // Not atomic
3975
3976 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3977 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3978
3979 // This is an incomplete check because image_atomic_cmpswap
3980 // may only use 0x3 and 0xf while other atomic operations
3981 // may use 0x1 and 0x3. However these limitations are
3982 // verified when we check that dmask matches dst size.
3983 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3984}
3985
3986bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3987
3988 const unsigned Opc = Inst.getOpcode();
3989 const MCInstrDesc &Desc = MII.get(Opc);
3990
3991 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3992 return true;
3993
3994 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3995 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3996
3997 // GATHER4 instructions use dmask in a different fashion compared to
3998 // other MIMG instructions. The only useful DMASK values are
3999 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4000 // (red,red,red,red) etc.) The ISA document doesn't mention
4001 // this.
4002 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4003}
4004
4005bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4006 const unsigned Opc = Inst.getOpcode();
4007 const MCInstrDesc &Desc = MII.get(Opc);
4008
4009 if ((Desc.TSFlags & MIMGFlags) == 0)
4010 return true;
4011
4013 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4015
4016 if (!BaseOpcode->MSAA)
4017 return true;
4018
4019 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4020 assert(DimIdx != -1);
4021
4022 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4024
4025 return DimInfo->MSAA;
4026}
4027
4028static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4029{
4030 switch (Opcode) {
4031 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4032 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4033 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4034 return true;
4035 default:
4036 return false;
4037 }
4038}
4039
4040// movrels* opcodes should only allow VGPRS as src0.
4041// This is specified in .td description for vop1/vop3,
4042// but sdwa is handled differently. See isSDWAOperand.
4043bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4044 const OperandVector &Operands) {
4045
4046 const unsigned Opc = Inst.getOpcode();
4047 const MCInstrDesc &Desc = MII.get(Opc);
4048
4049 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4050 return true;
4051
4052 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4053 assert(Src0Idx != -1);
4054
4055 SMLoc ErrLoc;
4056 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4057 if (Src0.isReg()) {
4058 auto Reg = mc2PseudoReg(Src0.getReg());
4059 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4060 if (!isSGPR(Reg, TRI))
4061 return true;
4062 ErrLoc = getRegLoc(Reg, Operands);
4063 } else {
4064 ErrLoc = getConstLoc(Operands);
4065 }
4066
4067 Error(ErrLoc, "source operand must be a VGPR");
4068 return false;
4069}
4070
4071bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4072 const OperandVector &Operands) {
4073
4074 const unsigned Opc = Inst.getOpcode();
4075
4076 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4077 return true;
4078
4079 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4080 assert(Src0Idx != -1);
4081
4082 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4083 if (!Src0.isReg())
4084 return true;
4085
4086 auto Reg = mc2PseudoReg(Src0.getReg());
4087 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4088 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4089 Error(getRegLoc(Reg, Operands),
4090 "source operand must be either a VGPR or an inline constant");
4091 return false;
4092 }
4093
4094 return true;
4095}
4096
4097bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4098 const OperandVector &Operands) {
4099 unsigned Opcode = Inst.getOpcode();
4100 const MCInstrDesc &Desc = MII.get(Opcode);
4101
4102 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4103 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4104 return true;
4105
4106 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4107 if (Src2Idx == -1)
4108 return true;
4109
4110 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4111 Error(getConstLoc(Operands),
4112 "inline constants are not allowed for this operand");
4113 return false;
4114 }
4115
4116 return true;
4117}
4118
4119bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4120 const OperandVector &Operands) {
4121 const unsigned Opc = Inst.getOpcode();
4122 const MCInstrDesc &Desc = MII.get(Opc);
4123
4124 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4125 return true;
4126
4127 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4128 if (Src2Idx == -1)
4129 return true;
4130
4131 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4132 if (!Src2.isReg())
4133 return true;
4134
4135 MCRegister Src2Reg = Src2.getReg();
4136 MCRegister DstReg = Inst.getOperand(0).getReg();
4137 if (Src2Reg == DstReg)
4138 return true;
4139
4140 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4141 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4142 return true;
4143
4144 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4145 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
4146 "source 2 operand must not partially overlap with dst");
4147 return false;
4148 }
4149
4150 return true;
4151}
4152
4153bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4154 switch (Inst.getOpcode()) {
4155 default:
4156 return true;
4157 case V_DIV_SCALE_F32_gfx6_gfx7:
4158 case V_DIV_SCALE_F32_vi:
4159 case V_DIV_SCALE_F32_gfx10:
4160 case V_DIV_SCALE_F64_gfx6_gfx7:
4161 case V_DIV_SCALE_F64_vi:
4162 case V_DIV_SCALE_F64_gfx10:
4163 break;
4164 }
4165
4166 // TODO: Check that src0 = src1 or src2.
4167
4168 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4169 AMDGPU::OpName::src2_modifiers,
4170 AMDGPU::OpName::src2_modifiers}) {
4172 .getImm() &
4174 return false;
4175 }
4176 }
4177
4178 return true;
4179}
4180
4181bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4182
4183 const unsigned Opc = Inst.getOpcode();
4184 const MCInstrDesc &Desc = MII.get(Opc);
4185
4186 if ((Desc.TSFlags & MIMGFlags) == 0)
4187 return true;
4188
4189 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4190 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4191 if (isCI() || isSI())
4192 return false;
4193 }
4194
4195 return true;
4196}
4197
4198static bool IsRevOpcode(const unsigned Opcode)
4199{
4200 switch (Opcode) {
4201 case AMDGPU::V_SUBREV_F32_e32:
4202 case AMDGPU::V_SUBREV_F32_e64:
4203 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4204 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4205 case AMDGPU::V_SUBREV_F32_e32_vi:
4206 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4207 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4208 case AMDGPU::V_SUBREV_F32_e64_vi:
4209
4210 case AMDGPU::V_SUBREV_CO_U32_e32:
4211 case AMDGPU::V_SUBREV_CO_U32_e64:
4212 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4213 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4214
4215 case AMDGPU::V_SUBBREV_U32_e32:
4216 case AMDGPU::V_SUBBREV_U32_e64:
4217 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4218 case AMDGPU::V_SUBBREV_U32_e32_vi:
4219 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4220 case AMDGPU::V_SUBBREV_U32_e64_vi:
4221
4222 case AMDGPU::V_SUBREV_U32_e32:
4223 case AMDGPU::V_SUBREV_U32_e64:
4224 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4225 case AMDGPU::V_SUBREV_U32_e32_vi:
4226 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4227 case AMDGPU::V_SUBREV_U32_e64_vi:
4228
4229 case AMDGPU::V_SUBREV_F16_e32:
4230 case AMDGPU::V_SUBREV_F16_e64:
4231 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4232 case AMDGPU::V_SUBREV_F16_e32_vi:
4233 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4234 case AMDGPU::V_SUBREV_F16_e64_vi:
4235
4236 case AMDGPU::V_SUBREV_U16_e32:
4237 case AMDGPU::V_SUBREV_U16_e64:
4238 case AMDGPU::V_SUBREV_U16_e32_vi:
4239 case AMDGPU::V_SUBREV_U16_e64_vi:
4240
4241 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4242 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4243 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4244
4245 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4246 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4247
4248 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4249 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4250
4251 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4252 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4253
4254 case AMDGPU::V_LSHRREV_B32_e32:
4255 case AMDGPU::V_LSHRREV_B32_e64:
4256 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4257 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4258 case AMDGPU::V_LSHRREV_B32_e32_vi:
4259 case AMDGPU::V_LSHRREV_B32_e64_vi:
4260 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4261 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4262
4263 case AMDGPU::V_ASHRREV_I32_e32:
4264 case AMDGPU::V_ASHRREV_I32_e64:
4265 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4266 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4267 case AMDGPU::V_ASHRREV_I32_e32_vi:
4268 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4269 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4270 case AMDGPU::V_ASHRREV_I32_e64_vi:
4271
4272 case AMDGPU::V_LSHLREV_B32_e32:
4273 case AMDGPU::V_LSHLREV_B32_e64:
4274 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4275 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4276 case AMDGPU::V_LSHLREV_B32_e32_vi:
4277 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4278 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4279 case AMDGPU::V_LSHLREV_B32_e64_vi:
4280
4281 case AMDGPU::V_LSHLREV_B16_e32:
4282 case AMDGPU::V_LSHLREV_B16_e64:
4283 case AMDGPU::V_LSHLREV_B16_e32_vi:
4284 case AMDGPU::V_LSHLREV_B16_e64_vi:
4285 case AMDGPU::V_LSHLREV_B16_gfx10:
4286
4287 case AMDGPU::V_LSHRREV_B16_e32:
4288 case AMDGPU::V_LSHRREV_B16_e64:
4289 case AMDGPU::V_LSHRREV_B16_e32_vi:
4290 case AMDGPU::V_LSHRREV_B16_e64_vi:
4291 case AMDGPU::V_LSHRREV_B16_gfx10:
4292
4293 case AMDGPU::V_ASHRREV_I16_e32:
4294 case AMDGPU::V_ASHRREV_I16_e64:
4295 case AMDGPU::V_ASHRREV_I16_e32_vi:
4296 case AMDGPU::V_ASHRREV_I16_e64_vi:
4297 case AMDGPU::V_ASHRREV_I16_gfx10:
4298
4299 case AMDGPU::V_LSHLREV_B64_e64:
4300 case AMDGPU::V_LSHLREV_B64_gfx10:
4301 case AMDGPU::V_LSHLREV_B64_vi:
4302
4303 case AMDGPU::V_LSHRREV_B64_e64:
4304 case AMDGPU::V_LSHRREV_B64_gfx10:
4305 case AMDGPU::V_LSHRREV_B64_vi:
4306
4307 case AMDGPU::V_ASHRREV_I64_e64:
4308 case AMDGPU::V_ASHRREV_I64_gfx10:
4309 case AMDGPU::V_ASHRREV_I64_vi:
4310
4311 case AMDGPU::V_PK_LSHLREV_B16:
4312 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4313 case AMDGPU::V_PK_LSHLREV_B16_vi:
4314
4315 case AMDGPU::V_PK_LSHRREV_B16:
4316 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4317 case AMDGPU::V_PK_LSHRREV_B16_vi:
4318 case AMDGPU::V_PK_ASHRREV_I16:
4319 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4320 case AMDGPU::V_PK_ASHRREV_I16_vi:
4321 return true;
4322 default:
4323 return false;
4324 }
4325}
4326
4327std::optional<StringRef>
4328AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4329
4330 using namespace SIInstrFlags;
4331 const unsigned Opcode = Inst.getOpcode();
4332 const MCInstrDesc &Desc = MII.get(Opcode);
4333
4334 // lds_direct register is defined so that it can be used
4335 // with 9-bit operands only. Ignore encodings which do not accept these.
4336 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4337 if ((Desc.TSFlags & Enc) == 0)
4338 return std::nullopt;
4339
4340 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4341 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4342 if (SrcIdx == -1)
4343 break;
4344 const auto &Src = Inst.getOperand(SrcIdx);
4345 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4346
4347 if (isGFX90A() || isGFX11Plus())
4348 return StringRef("lds_direct is not supported on this GPU");
4349
4350 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4351 return StringRef("lds_direct cannot be used with this instruction");
4352
4353 if (SrcName != OpName::src0)
4354 return StringRef("lds_direct may be used as src0 only");
4355 }
4356 }
4357
4358 return std::nullopt;
4359}
4360
4361SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4362 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4363 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4364 if (Op.isFlatOffset())
4365 return Op.getStartLoc();
4366 }
4367 return getLoc();
4368}
4369
4370bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4371 const OperandVector &Operands) {
4372 auto Opcode = Inst.getOpcode();
4373 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4374 if (OpNum == -1)
4375 return true;
4376
4377 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4378 if ((TSFlags & SIInstrFlags::FLAT))
4379 return validateFlatOffset(Inst, Operands);
4380
4381 if ((TSFlags & SIInstrFlags::SMRD))
4382 return validateSMEMOffset(Inst, Operands);
4383
4384 const auto &Op = Inst.getOperand(OpNum);
4385 if (isGFX12Plus() &&
4386 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4387 const unsigned OffsetSize = 24;
4388 if (!isIntN(OffsetSize, Op.getImm())) {
4389 Error(getFlatOffsetLoc(Operands),
4390 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4391 return false;
4392 }
4393 } else {
4394 const unsigned OffsetSize = 16;
4395 if (!isUIntN(OffsetSize, Op.getImm())) {
4396 Error(getFlatOffsetLoc(Operands),
4397 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4398 return false;
4399 }
4400 }
4401 return true;
4402}
4403
4404bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4405 const OperandVector &Operands) {
4406 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4407 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4408 return true;
4409
4410 auto Opcode = Inst.getOpcode();
4411 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4412 assert(OpNum != -1);
4413
4414 const auto &Op = Inst.getOperand(OpNum);
4415 if (!hasFlatOffsets() && Op.getImm() != 0) {
4416 Error(getFlatOffsetLoc(Operands),
4417 "flat offset modifier is not supported on this GPU");
4418 return false;
4419 }
4420
4421 // For pre-GFX12 FLAT instructions the offset must be positive;
4422 // MSB is ignored and forced to zero.
4423 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4424 bool AllowNegative =
4426 isGFX12Plus();
4427 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4428 Error(getFlatOffsetLoc(Operands),
4429 Twine("expected a ") +
4430 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4431 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4432 return false;
4433 }
4434
4435 return true;
4436}
4437
4438SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4439 // Start with second operand because SMEM Offset cannot be dst or src0.
4440 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4441 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4442 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4443 return Op.getStartLoc();
4444 }
4445 return getLoc();
4446}
4447
4448bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4449 const OperandVector &Operands) {
4450 if (isCI() || isSI())
4451 return true;
4452
4453 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4454 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4455 return true;
4456
4457 auto Opcode = Inst.getOpcode();
4458 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4459 if (OpNum == -1)
4460 return true;
4461
4462 const auto &Op = Inst.getOperand(OpNum);
4463 if (!Op.isImm())
4464 return true;
4465
4466 uint64_t Offset = Op.getImm();
4467 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4470 return true;
4471
4472 Error(getSMEMOffsetLoc(Operands),
4473 isGFX12Plus() ? "expected a 24-bit signed offset"
4474 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4475 : "expected a 21-bit signed offset");
4476
4477 return false;
4478}
4479
4480bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4481 unsigned Opcode = Inst.getOpcode();
4482 const MCInstrDesc &Desc = MII.get(Opcode);
4483 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4484 return true;
4485
4486 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4487 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4488
4489 const int OpIndices[] = { Src0Idx, Src1Idx };
4490
4491 unsigned NumExprs = 0;
4492 unsigned NumLiterals = 0;
4494
4495 for (int OpIdx : OpIndices) {
4496 if (OpIdx == -1) break;
4497
4498 const MCOperand &MO = Inst.getOperand(OpIdx);
4499 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4500 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4501 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4502 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4503 if (NumLiterals == 0 || LiteralValue != Value) {
4505 ++NumLiterals;
4506 }
4507 } else if (MO.isExpr()) {
4508 ++NumExprs;
4509 }
4510 }
4511 }
4512
4513 return NumLiterals + NumExprs <= 1;
4514}
4515
4516bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4517 const unsigned Opc = Inst.getOpcode();
4518 if (isPermlane16(Opc)) {
4519 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4520 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4521
4522 if (OpSel & ~3)
4523 return false;
4524 }
4525
4526 uint64_t TSFlags = MII.get(Opc).TSFlags;
4527
4528 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4529 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4530 if (OpSelIdx != -1) {
4531 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4532 return false;
4533 }
4534 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4535 if (OpSelHiIdx != -1) {
4536 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4537 return false;
4538 }
4539 }
4540
4541 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4542 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4543 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4544 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4545 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4546 if (OpSel & 3)
4547 return false;
4548 }
4549
4550 return true;
4551}
4552
4553bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
4554 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4555
4556 const unsigned Opc = Inst.getOpcode();
4557 uint64_t TSFlags = MII.get(Opc).TSFlags;
4558
4559 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4560 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4561 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4562 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4563 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4564 !(TSFlags & SIInstrFlags::IsSWMMAC))
4565 return true;
4566
4567 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4568 if (NegIdx == -1)
4569 return true;
4570
4571 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4572
4573 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4574 // on some src operands but not allowed on other.
4575 // It is convenient that such instructions don't have src_modifiers operand
4576 // for src operands that don't allow neg because they also don't allow opsel.
4577
4578 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4579 AMDGPU::OpName::src1_modifiers,
4580 AMDGPU::OpName::src2_modifiers};
4581
4582 for (unsigned i = 0; i < 3; ++i) {
4583 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4584 if (Neg & (1 << i))
4585 return false;
4586 }
4587 }
4588
4589 return true;
4590}
4591
4592bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4593 const OperandVector &Operands) {
4594 const unsigned Opc = Inst.getOpcode();
4595 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4596 if (DppCtrlIdx >= 0) {
4597 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4598
4599 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4600 AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4601 // DP ALU DPP is supported for row_newbcast only on GFX9*
4602 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4603 Error(S, "DP ALU dpp only supports row_newbcast");
4604 return false;
4605 }
4606 }
4607
4608 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4609 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4610
4611 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4612 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4613 if (Src1Idx >= 0) {
4614 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4615 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4616 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
4617 auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());
4618 SMLoc S = getRegLoc(Reg, Operands);
4619 Error(S, "invalid operand for instruction");
4620 return false;
4621 }
4622 if (Src1.isImm()) {
4623 Error(getInstLoc(Operands),
4624 "src1 immediate operand invalid for instruction");
4625 return false;
4626 }
4627 }
4628 }
4629
4630 return true;
4631}
4632
4633// Check if VCC register matches wavefront size
4634bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4635 auto FB = getFeatureBits();
4636 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4637 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4638}
4639
4640// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4641bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4642 const OperandVector &Operands) {
4643 unsigned Opcode = Inst.getOpcode();
4644 const MCInstrDesc &Desc = MII.get(Opcode);
4645 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4646 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4647 !HasMandatoryLiteral && !isVOPD(Opcode))
4648 return true;
4649
4650 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4651
4652 unsigned NumExprs = 0;
4653 unsigned NumLiterals = 0;
4655
4656 for (int OpIdx : OpIndices) {
4657 if (OpIdx == -1)
4658 continue;
4659
4660 const MCOperand &MO = Inst.getOperand(OpIdx);
4661 if (!MO.isImm() && !MO.isExpr())
4662 continue;
4663 if (!isSISrcOperand(Desc, OpIdx))
4664 continue;
4665
4666 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4667 uint64_t Value = static_cast<uint64_t>(MO.getImm());
4668 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4669 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4670 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4671
4672 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4673 Error(getLitLoc(Operands), "invalid operand for instruction");
4674 return false;
4675 }
4676
4677 if (IsFP64 && IsValid32Op)
4678 Value = Hi_32(Value);
4679
4680 if (NumLiterals == 0 || LiteralValue != Value) {
4682 ++NumLiterals;
4683 }
4684 } else if (MO.isExpr()) {
4685 ++NumExprs;
4686 }
4687 }
4688 NumLiterals += NumExprs;
4689
4690 if (!NumLiterals)
4691 return true;
4692
4693 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4694 Error(getLitLoc(Operands), "literal operands are not supported");
4695 return false;
4696 }
4697
4698 if (NumLiterals > 1) {
4699 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4700 return false;
4701 }
4702
4703 return true;
4704}
4705
4706// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4707static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4708 const MCRegisterInfo *MRI) {
4709 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4710 if (OpIdx < 0)
4711 return -1;
4712
4713 const MCOperand &Op = Inst.getOperand(OpIdx);
4714 if (!Op.isReg())
4715 return -1;
4716
4717 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4718 auto Reg = Sub ? Sub : Op.getReg();
4719 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4720 return AGPR32.contains(Reg) ? 1 : 0;
4721}
4722
4723bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4724 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4725 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4727 SIInstrFlags::DS)) == 0)
4728 return true;
4729
4730 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4731 : AMDGPU::OpName::vdata;
4732
4733 const MCRegisterInfo *MRI = getMRI();
4734 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4735 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4736
4737 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4738 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4739 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4740 return false;
4741 }
4742
4743 auto FB = getFeatureBits();
4744 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4745 if (DataAreg < 0 || DstAreg < 0)
4746 return true;
4747 return DstAreg == DataAreg;
4748 }
4749
4750 return DstAreg < 1 && DataAreg < 1;
4751}
4752
4753bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4754 auto FB = getFeatureBits();
4755 if (!FB[AMDGPU::FeatureGFX90AInsts])
4756 return true;
4757
4758 const MCRegisterInfo *MRI = getMRI();
4759 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4760 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4761 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4762 const MCOperand &Op = Inst.getOperand(I);
4763 if (!Op.isReg())
4764 continue;
4765
4766 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4767 if (!Sub)
4768 continue;
4769
4770 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4771 return false;
4772 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4773 return false;
4774 }
4775
4776 return true;
4777}
4778
4779SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4780 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4781 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4782 if (Op.isBLGP())
4783 return Op.getStartLoc();
4784 }
4785 return SMLoc();
4786}
4787
4788bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4789 const OperandVector &Operands) {
4790 unsigned Opc = Inst.getOpcode();
4791 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4792 if (BlgpIdx == -1)
4793 return true;
4794 SMLoc BLGPLoc = getBLGPLoc(Operands);
4795 if (!BLGPLoc.isValid())
4796 return true;
4797 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4798 auto FB = getFeatureBits();
4799 bool UsesNeg = false;
4800 if (FB[AMDGPU::FeatureGFX940Insts]) {
4801 switch (Opc) {
4802 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4803 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4804 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4805 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4806 UsesNeg = true;
4807 }
4808 }
4809
4810 if (IsNeg == UsesNeg)
4811 return true;
4812
4813 Error(BLGPLoc,
4814 UsesNeg ? "invalid modifier: blgp is not supported"
4815 : "invalid modifier: neg is not supported");
4816
4817 return false;
4818}
4819
4820bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4821 const OperandVector &Operands) {
4822 if (!isGFX11Plus())
4823 return true;
4824
4825 unsigned Opc = Inst.getOpcode();
4826 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4827 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4828 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4829 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4830 return true;
4831
4832 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4833 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4834 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4835 if (Reg == AMDGPU::SGPR_NULL)
4836 return true;
4837
4838 SMLoc RegLoc = getRegLoc(Reg, Operands);
4839 Error(RegLoc, "src0 must be null");
4840 return false;
4841}
4842
4843bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4844 const OperandVector &Operands) {
4845 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4846 if ((TSFlags & SIInstrFlags::DS) == 0)
4847 return true;
4848 if (TSFlags & SIInstrFlags::GWS)
4849 return validateGWS(Inst, Operands);
4850 // Only validate GDS for non-GWS instructions.
4851 if (hasGDS())
4852 return true;
4853 int GDSIdx =
4854 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4855 if (GDSIdx < 0)
4856 return true;
4857 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4858 if (GDS) {
4859 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4860 Error(S, "gds modifier is not supported on this GPU");
4861 return false;
4862 }
4863 return true;
4864}
4865
4866// gfx90a has an undocumented limitation:
4867// DS_GWS opcodes must use even aligned registers.
4868bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4869 const OperandVector &Operands) {
4870 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4871 return true;
4872
4873 int Opc = Inst.getOpcode();
4874 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4875 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4876 return true;
4877
4878 const MCRegisterInfo *MRI = getMRI();
4879 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4880 int Data0Pos =
4881 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4882 assert(Data0Pos != -1);
4883 auto Reg = Inst.getOperand(Data0Pos).getReg();
4884 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4885 if (RegIdx & 1) {
4886 SMLoc RegLoc = getRegLoc(Reg, Operands);
4887 Error(RegLoc, "vgpr must be even aligned");
4888 return false;
4889 }
4890
4891 return true;
4892}
4893
4894bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4895 const OperandVector &Operands,
4896 const SMLoc &IDLoc) {
4897 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4898 AMDGPU::OpName::cpol);
4899 if (CPolPos == -1)
4900 return true;
4901
4902 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4903
4904 if (isGFX12Plus())
4905 return validateTHAndScopeBits(Inst, Operands, CPol);
4906
4907 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4908 if (TSFlags & SIInstrFlags::SMRD) {
4909 if (CPol && (isSI() || isCI())) {
4910 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4911 Error(S, "cache policy is not supported for SMRD instructions");
4912 return false;
4913 }
4914 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4915 Error(IDLoc, "invalid cache policy for SMEM instruction");
4916 return false;
4917 }
4918 }
4919
4920 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4921 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4924 if (!(TSFlags & AllowSCCModifier)) {
4925 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4926 StringRef CStr(S.getPointer());
4927 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4928 Error(S,
4929 "scc modifier is not supported for this instruction on this GPU");
4930 return false;
4931 }
4932 }
4933
4935 return true;
4936
4937 if (TSFlags & SIInstrFlags::IsAtomicRet) {
4938 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4939 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4940 : "instruction must use glc");
4941 return false;
4942 }
4943 } else {
4944 if (CPol & CPol::GLC) {
4945 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4946 StringRef CStr(S.getPointer());
4948 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4949 Error(S, isGFX940() ? "instruction must not use sc0"
4950 : "instruction must not use glc");
4951 return false;
4952 }
4953 }
4954
4955 return true;
4956}
4957
4958bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4959 const OperandVector &Operands,
4960 const unsigned CPol) {
4961 const unsigned TH = CPol & AMDGPU::CPol::TH;
4962 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
4963
4964 const unsigned Opcode = Inst.getOpcode();
4965 const MCInstrDesc &TID = MII.get(Opcode);
4966
4967 auto PrintError = [&](StringRef Msg) {
4968 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4969 Error(S, Msg);
4970 return false;
4971 };
4972
4973 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
4976 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
4977
4978 if (TH == 0)
4979 return true;
4980
4981 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
4982 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
4983 (TH == AMDGPU::CPol::TH_NT_HT)))
4984 return PrintError("invalid th value for SMEM instruction");
4985
4986 if (TH == AMDGPU::CPol::TH_BYPASS) {
4987 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
4989 (Scope == AMDGPU::CPol::SCOPE_SYS &&
4991 return PrintError("scope and th combination is not valid");
4992 }
4993
4994 bool IsStore = TID.mayStore();
4995 bool IsAtomic =
4997
4998 if (IsAtomic) {
4999 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5000 return PrintError("invalid th value for atomic instructions");
5001 } else if (IsStore) {
5002 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5003 return PrintError("invalid th value for store instructions");
5004 } else {
5005 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5006 return PrintError("invalid th value for load instructions");
5007 }
5008
5009 return true;
5010}
5011
5012bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
5013 if (!isGFX11Plus())
5014 return true;
5015 for (auto &Operand : Operands) {
5016 if (!Operand->isReg())
5017 continue;
5018 unsigned Reg = Operand->getReg();
5019 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
5020 Error(getRegLoc(Reg, Operands),
5021 "execz and vccz are not supported on this GPU");
5022 return false;
5023 }
5024 }
5025 return true;
5026}
5027
5028bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5029 const OperandVector &Operands) {
5030 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5031 if (Desc.mayStore() &&
5033 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5034 if (Loc != getInstLoc(Operands)) {
5035 Error(Loc, "TFE modifier has no meaning for store instructions");
5036 return false;
5037 }
5038 }
5039
5040 return true;
5041}
5042
5043bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5044 const SMLoc &IDLoc,
5045 const OperandVector &Operands) {
5046 if (auto ErrMsg = validateLdsDirect(Inst)) {
5047 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
5048 return false;
5049 }
5050 if (!validateSOPLiteral(Inst)) {
5051 Error(getLitLoc(Operands),
5052 "only one unique literal operand is allowed");
5053 return false;
5054 }
5055 if (!validateVOPLiteral(Inst, Operands)) {
5056 return false;
5057 }
5058 if (!validateConstantBusLimitations(Inst, Operands)) {
5059 return false;
5060 }
5061 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5062 return false;
5063 }
5064 if (!validateIntClampSupported(Inst)) {
5065 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
5066 "integer clamping is not supported on this GPU");
5067 return false;
5068 }
5069 if (!validateOpSel(Inst)) {
5070 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5071 "invalid op_sel operand");
5072 return false;
5073 }
5074 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5075 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5076 "invalid neg_lo operand");
5077 return false;
5078 }
5079 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5080 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5081 "invalid neg_hi operand");
5082 return false;
5083 }
5084 if (!validateDPP(Inst, Operands)) {
5085 return false;
5086 }
5087 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5088 if (!validateMIMGD16(Inst)) {
5089 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5090 "d16 modifier is not supported on this GPU");
5091 return false;
5092 }
5093 if (!validateMIMGMSAA(Inst)) {
5094 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5095 "invalid dim; must be MSAA type");
5096 return false;
5097 }
5098 if (!validateMIMGDataSize(Inst, IDLoc)) {
5099 return false;
5100 }
5101 if (!validateMIMGAddrSize(Inst, IDLoc))
5102 return false;
5103 if (!validateMIMGAtomicDMask(Inst)) {
5104 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5105 "invalid atomic image dmask");
5106 return false;
5107 }
5108 if (!validateMIMGGatherDMask(Inst)) {
5109 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5110 "invalid image_gather dmask: only one bit must be set");
5111 return false;
5112 }
5113 if (!validateMovrels(Inst, Operands)) {
5114 return false;
5115 }
5116 if (!validateOffset(Inst, Operands)) {
5117 return false;
5118 }
5119 if (!validateMAIAccWrite(Inst, Operands)) {
5120 return false;
5121 }
5122 if (!validateMAISrc2(Inst, Operands)) {
5123 return false;
5124 }
5125 if (!validateMFMA(Inst, Operands)) {
5126 return false;
5127 }
5128 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5129 return false;
5130 }
5131
5132 if (!validateAGPRLdSt(Inst)) {
5133 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5134 ? "invalid register class: data and dst should be all VGPR or AGPR"
5135 : "invalid register class: agpr loads and stores not supported on this GPU"
5136 );
5137 return false;
5138 }
5139 if (!validateVGPRAlign(Inst)) {
5140 Error(IDLoc,
5141 "invalid register class: vgpr tuples must be 64 bit aligned");
5142 return false;
5143 }
5144 if (!validateDS(Inst, Operands)) {
5145 return false;
5146 }
5147
5148 if (!validateBLGP(Inst, Operands)) {
5149 return false;
5150 }
5151
5152 if (!validateDivScale(Inst)) {
5153 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5154 return false;
5155 }
5156 if (!validateWaitCnt(Inst, Operands)) {
5157 return false;
5158 }
5159 if (!validateExeczVcczOperands(Operands)) {
5160 return false;
5161 }
5162 if (!validateTFE(Inst, Operands)) {
5163 return false;
5164 }
5165
5166 return true;
5167}
5168
5170 const FeatureBitset &FBS,
5171 unsigned VariantID = 0);
5172
5173static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5174 const FeatureBitset &AvailableFeatures,
5175 unsigned VariantID);
5176
5177bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5178 const FeatureBitset &FBS) {
5179 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5180}
5181
5182bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5183 const FeatureBitset &FBS,
5184 ArrayRef<unsigned> Variants) {
5185 for (auto Variant : Variants) {
5186 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5187 return true;
5188 }
5189
5190 return false;
5191}
5192
5193bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5194 const SMLoc &IDLoc) {
5195 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5196
5197 // Check if requested instruction variant is supported.
5198 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5199 return false;
5200
5201 // This instruction is not supported.
5202 // Clear any other pending errors because they are no longer relevant.
5203 getParser().clearPendingErrors();
5204
5205 // Requested instruction variant is not supported.
5206 // Check if any other variants are supported.
5207 StringRef VariantName = getMatchedVariantName();
5208 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5209 return Error(IDLoc,
5210 Twine(VariantName,
5211 " variant of this instruction is not supported"));
5212 }
5213
5214 // Check if this instruction may be used with a different wavesize.
5215 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5216 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5217
5218 FeatureBitset FeaturesWS32 = getFeatureBits();
5219 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5220 .flip(AMDGPU::FeatureWavefrontSize32);
5221 FeatureBitset AvailableFeaturesWS32 =
5222 ComputeAvailableFeatures(FeaturesWS32);
5223
5224 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5225 return Error(IDLoc, "instruction requires wavesize=32");
5226 }
5227
5228 // Finally check if this instruction is supported on any other GPU.
5229 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5230 return Error(IDLoc, "instruction not supported on this GPU");
5231 }
5232
5233 // Instruction not supported on any GPU. Probably a typo.
5234 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5235 return Error(IDLoc, "invalid instruction" + Suggestion);
5236}
5237
5239 uint64_t InvalidOprIdx) {
5240 assert(InvalidOprIdx < Operands.size());
5241 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5242 if (Op.isToken() && InvalidOprIdx > 1) {
5243 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5244 return PrevOp.isToken() && PrevOp.getToken() == "::";
5245 }
5246 return false;
5247}
5248
5249bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5251 MCStreamer &Out,
5253 bool MatchingInlineAsm) {
5254 MCInst Inst;
5255 unsigned Result = Match_Success;
5256 for (auto Variant : getMatchedVariants()) {
5257 uint64_t EI;
5258 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5259 Variant);
5260 // We order match statuses from least to most specific. We use most specific
5261 // status as resulting
5262 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5263 if (R == Match_Success || R == Match_MissingFeature ||
5264 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5265 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5266 Result != Match_MissingFeature)) {
5267 Result = R;
5268 ErrorInfo = EI;
5269 }
5270 if (R == Match_Success)
5271 break;
5272 }
5273
5274 if (Result == Match_Success) {
5275 if (!validateInstruction(Inst, IDLoc, Operands)) {
5276 return true;
5277 }
5278 Inst.setLoc(IDLoc);
5279 Out.emitInstruction(Inst, getSTI());
5280 return false;
5281 }
5282
5283 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5284 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5285 return true;
5286 }
5287
5288 switch (Result) {
5289 default: break;
5290 case Match_MissingFeature:
5291 // It has been verified that the specified instruction
5292 // mnemonic is valid. A match was found but it requires
5293 // features which are not supported on this GPU.
5294 return Error(IDLoc, "operands are not valid for this GPU or mode");
5295
5296 case Match_InvalidOperand: {
5297 SMLoc ErrorLoc = IDLoc;
5298 if (ErrorInfo != ~0ULL) {
5299 if (ErrorInfo >= Operands.size()) {
5300 return Error(IDLoc, "too few operands for instruction");
5301 }
5302 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5303 if (ErrorLoc == SMLoc())
5304 ErrorLoc = IDLoc;
5305
5307 return Error(ErrorLoc, "invalid VOPDY instruction");
5308 }
5309 return Error(ErrorLoc, "invalid operand for instruction");
5310 }
5311
5312 case Match_MnemonicFail:
5313 llvm_unreachable("Invalid instructions should have been handled already");
5314 }
5315 llvm_unreachable("Implement any new match types added!");
5316}
5317
5318bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5319 int64_t Tmp = -1;
5320 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5321 return true;
5322 }
5323 if (getParser().parseAbsoluteExpression(Tmp)) {
5324 return true;
5325 }
5326 Ret = static_cast<uint32_t>(Tmp);
5327 return false;
5328}
5329
5330bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5331 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5332 return TokError("directive only supported for amdgcn architecture");
5333
5334 std::string TargetIDDirective;
5335 SMLoc TargetStart = getTok().getLoc();
5336 if (getParser().parseEscapedString(TargetIDDirective))
5337 return true;
5338
5339 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5340 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5341 return getParser().Error(TargetRange.Start,
5342 (Twine(".amdgcn_target directive's target id ") +
5343 Twine(TargetIDDirective) +
5344 Twine(" does not match the specified target id ") +
5345 Twine(getTargetStreamer().getTargetID()->toString())).str());
5346
5347 return false;
5348}
5349
5350bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5351 return Error(Range.Start, "value out of range", Range);
5352}
5353
5354bool AMDGPUAsmParser::calculateGPRBlocks(
5355 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
5356 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
5357 unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
5358 SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
5359 // TODO(scott.linder): These calculations are duplicated from
5360 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5361 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5362
5363 unsigned NumVGPRs = NextFreeVGPR;
5364 unsigned NumSGPRs = NextFreeSGPR;
5365
5366 if (Version.Major >= 10)
5367 NumSGPRs = 0;
5368 else {
5369 unsigned MaxAddressableNumSGPRs =
5371
5372 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
5373 NumSGPRs > MaxAddressableNumSGPRs)
5374 return OutOfRangeError(SGPRRange);
5375
5376 NumSGPRs +=
5377 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
5378
5379 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5380 NumSGPRs > MaxAddressableNumSGPRs)
5381 return OutOfRangeError(SGPRRange);
5382
5383 if (Features.test(FeatureSGPRInitBug))
5385 }
5386
5387 VGPRBlocks = IsaInfo::getEncodedNumVGPRBlocks(&getSTI(), NumVGPRs,
5388 EnableWavefrontSize32);
5389 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
5390
5391 return false;
5392}
5393
5394bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5395 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5396 return TokError("directive only supported for amdgcn architecture");
5397
5398 if (!isHsaAbi(getSTI()))
5399 return TokError("directive only supported for amdhsa OS");
5400
5401 StringRef KernelName;
5402 if (getParser().parseIdentifier(KernelName))
5403 return true;
5404
5407 &getSTI(), getContext());
5408
5409 StringSet<> Seen;
5410
5411 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5412
5413 SMRange VGPRRange;
5414 uint64_t NextFreeVGPR = 0;
5415 uint64_t AccumOffset = 0;
5416 uint64_t SharedVGPRCount = 0;
5417 uint64_t PreloadLength = 0;
5418 uint64_t PreloadOffset = 0;
5419 SMRange SGPRRange;
5420 uint64_t NextFreeSGPR = 0;
5421
5422 // Count the number of user SGPRs implied from the enabled feature bits.
5423 unsigned ImpliedUserSGPRCount = 0;
5424
5425 // Track if the asm explicitly contains the directive for the user SGPR
5426 // count.
5427 std::optional<unsigned> ExplicitUserSGPRCount;
5428 bool ReserveVCC = true;
5429 bool ReserveFlatScr = true;
5430 std::optional<bool> EnableWavefrontSize32;
5431
5432 while (true) {
5433 while (trySkipToken(AsmToken::EndOfStatement));
5434
5435 StringRef ID;
5436 SMRange IDRange = getTok().getLocRange();
5437 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5438 return true;
5439
5440 if (ID == ".end_amdhsa_kernel")
5441 break;
5442
5443 if (!Seen.insert(ID).second)
5444 return TokError(".amdhsa_ directives cannot be repeated");
5445
5446 SMLoc ValStart = getLoc();
5447 const MCExpr *ExprVal;
5448 if (getParser().parseExpression(ExprVal))
5449 return true;
5450 SMLoc ValEnd = getLoc();
5451 SMRange ValRange = SMRange(ValStart, ValEnd);
5452
5453 int64_t IVal = 0;
5454 uint64_t Val = IVal;
5455 bool EvaluatableExpr;
5456 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5457 if (IVal < 0)
5458 return OutOfRangeError(ValRange);
5459 Val = IVal;
5460 }
5461
5462#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5463 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5464 return OutOfRangeError(RANGE); \
5465 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5466 getContext());
5467
5468// Some fields use the parsed value immediately which requires the expression to
5469// be solvable.
5470#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5471 if (!(RESOLVED)) \
5472 return Error(IDRange.Start, "directive should have resolvable expression", \
5473 IDRange);
5474
5475 if (ID == ".amdhsa_group_segment_fixed_size") {
5477 CHAR_BIT>(Val))
5478 return OutOfRangeError(ValRange);
5479 KD.group_segment_fixed_size = ExprVal;
5480 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5482 CHAR_BIT>(Val))
5483 return OutOfRangeError(ValRange);
5484 KD.private_segment_fixed_size = ExprVal;
5485 } else if (ID == ".amdhsa_kernarg_size") {
5486 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
5487 return OutOfRangeError(ValRange);
5488 KD.kernarg_size = ExprVal;
5489 } else if (ID == ".amdhsa_user_sgpr_count") {
5490 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5491 ExplicitUserSGPRCount = Val;
5492 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5493 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5495 return Error(IDRange.Start,
5496 "directive is not supported with architected flat scratch",
5497 IDRange);
5499 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5500 ExprVal, ValRange);
5501 if (Val)
5502 ImpliedUserSGPRCount += 4;
5503 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5504 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5505 if (!hasKernargPreload())
5506 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5507
5508 if (Val > getMaxNumUserSGPRs())
5509 return OutOfRangeError(ValRange);
5510 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
5511 ValRange);
5512 if (Val) {
5513 ImpliedUserSGPRCount += Val;
5514 PreloadLength = Val;
5515 }
5516 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5517 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5518 if (!hasKernargPreload())
5519 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5520
5521 if (Val >= 1024)
5522 return OutOfRangeError(ValRange);
5523 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
5524 ValRange);
5525 if (Val)
5526 PreloadOffset = Val;
5527 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5528 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5530 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5531 ValRange);
5532 if (Val)
5533 ImpliedUserSGPRCount += 2;
5534 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5535 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5537 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5538 ValRange);
5539 if (Val)
5540 ImpliedUserSGPRCount += 2;
5541 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5542 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5544 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5545 ExprVal, ValRange);
5546 if (Val)
5547 ImpliedUserSGPRCount += 2;
5548 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5549 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5551 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5552 ValRange);
5553 if (Val)
5554 ImpliedUserSGPRCount += 2;
5555 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5557 return Error(IDRange.Start,
5558 "directive is not supported with architected flat scratch",
5559 IDRange);
5560 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5562 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5563 ExprVal, ValRange);
5564 if (Val)
5565 ImpliedUserSGPRCount += 2;
5566 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5567 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5569 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5570 ExprVal, ValRange);
5571 if (Val)
5572 ImpliedUserSGPRCount += 1;
5573 } else if (ID == ".amdhsa_wavefront_size32") {
5574 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5575 if (IVersion.Major < 10)
5576 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5577 EnableWavefrontSize32 = Val;
5579 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5580 ValRange);
5581 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5583 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5584 ValRange);
5585 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5587 return Error(IDRange.Start,
5588 "directive is not supported with architected flat scratch",
5589 IDRange);
5591 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5592 ValRange);
5593 } else if (ID == ".amdhsa_enable_private_segment") {
5595 return Error(
5596 IDRange.Start,
5597 "directive is not supported without architected flat scratch",
5598 IDRange);
5600 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5601 ValRange);
5602 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5604 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5605 ValRange);
5606 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5608 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5609 ValRange);
5610 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5612 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5613 ValRange);
5614 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5616 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5617 ValRange);
5618 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5620 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5621 ValRange);
5622 } else if (ID == ".amdhsa_next_free_vgpr") {
5623 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5624 VGPRRange = ValRange;
5625 NextFreeVGPR = Val;
5626 } else if (ID == ".amdhsa_next_free_sgpr") {
5627 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5628 SGPRRange = ValRange;
5629 NextFreeSGPR = Val;
5630 } else if (ID == ".amdhsa_accum_offset") {
5631 if (!isGFX90A())
5632 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5633 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5634 AccumOffset = Val;
5635 } else if (ID == ".amdhsa_reserve_vcc") {
5636 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5637 if (!isUInt<1>(Val))
5638 return OutOfRangeError(ValRange);
5639 ReserveVCC = Val;
5640 } else if (ID == ".amdhsa_reserve_flat_scratch") {
5641 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5642 if (IVersion.Major < 7)
5643 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5645 return Error(IDRange.Start,
5646 "directive is not supported with architected flat scratch",
5647 IDRange);
5648 if (!isUInt<1>(Val))
5649 return OutOfRangeError(ValRange);
5650 ReserveFlatScr = Val;
5651 } else if (ID == ".amdhsa_reserve_xnack_mask") {
5652 if (IVersion.Major < 8)
5653 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5654 if (!isUInt<1>(Val))
5655 return OutOfRangeError(ValRange);
5656 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5657 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5658 IDRange);
5659 } else if (ID == ".amdhsa_float_round_mode_32") {
5661 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5662 ValRange);
5663 } else if (ID == ".amdhsa_float_round_mode_16_64") {
5665 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5666 ValRange);
5667 } else if (ID == ".amdhsa_float_denorm_mode_32") {
5669 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5670 ValRange);
5671 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5673 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5674 ValRange);
5675 } else if (ID == ".amdhsa_dx10_clamp") {
5676 if (IVersion.Major >= 12)
5677 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5679 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5680 ValRange);
5681 } else if (ID == ".amdhsa_ieee_mode") {
5682 if (IVersion.Major >= 12)
5683 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5685 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5686 ValRange);
5687 } else if (ID == ".amdhsa_fp16_overflow") {
5688 if (IVersion.Major < 9)
5689 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5691 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5692 ValRange);
5693 } else if (ID == ".amdhsa_tg_split") {
5694 if (!isGFX90A())
5695 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5696 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
5697 ExprVal, ValRange);
5698 } else if (ID == ".amdhsa_workgroup_processor_mode") {
5699 if (IVersion.Major < 10)
5700 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5702 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5703 ValRange);
5704 } else if (ID == ".amdhsa_memory_ordered") {
5705 if (IVersion.Major < 10)
5706 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5708 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5709 ValRange);
5710 } else if (ID == ".amdhsa_forward_progress") {
5711 if (IVersion.Major < 10)
5712 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5714 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5715 ValRange);
5716 } else if (ID == ".amdhsa_shared_vgpr_count") {
5717 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5718 if (IVersion.Major < 10 || IVersion.Major >= 12)
5719 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
5720 IDRange);
5721 SharedVGPRCount = Val;
5723 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5724 ValRange);
5725 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5728 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5729 ExprVal, ValRange);
5730 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5732 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5733 ExprVal, ValRange);
5734 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5737 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5738 ExprVal, ValRange);
5739 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5741 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5742 ExprVal, ValRange);
5743 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5745 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5746 ExprVal, ValRange);
5747 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5749 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5750 ExprVal, ValRange);
5751 } else if (ID == ".amdhsa_exception_int_div_zero") {
5753 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5754 ExprVal, ValRange);
5755 } else if (ID == ".amdhsa_round_robin_scheduling") {
5756 if (IVersion.Major < 12)
5757 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
5759 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5760 ValRange);
5761 } else {
5762 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5763 }
5764
5765#undef PARSE_BITS_ENTRY
5766 }
5767
5768 if (!Seen.contains(".amdhsa_next_free_vgpr"))
5769 return TokError(".amdhsa_next_free_vgpr directive is required");
5770
5771 if (!Seen.contains(".amdhsa_next_free_sgpr"))
5772 return TokError(".amdhsa_next_free_sgpr directive is required");
5773
5774 unsigned VGPRBlocks;
5775 unsigned SGPRBlocks;
5776 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5777 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5778 EnableWavefrontSize32, NextFreeVGPR,
5779 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5780 SGPRBlocks))
5781 return true;
5782
5783 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5784 VGPRBlocks))
5785 return OutOfRangeError(VGPRRange);
5787 KD.compute_pgm_rsrc1, MCConstantExpr::create(VGPRBlocks, getContext()),
5788 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5789 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
5790
5791 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5792 SGPRBlocks))
5793 return OutOfRangeError(SGPRRange);
5795 KD.compute_pgm_rsrc1, MCConstantExpr::create(SGPRBlocks, getContext()),
5796 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5797 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
5798
5799 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5800 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5801 "enabled user SGPRs");
5802
5803 unsigned UserSGPRCount =
5804 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5805
5806 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5807 return TokError("too many user SGPRs enabled");
5809 KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
5810 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5811 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
5812
5813 int64_t IVal = 0;
5814 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
5815 return TokError("Kernarg size should be resolvable");
5816 uint64_t kernarg_size = IVal;
5817 if (PreloadLength && kernarg_size &&
5818 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5819 return TokError("Kernarg preload length + offset is larger than the "
5820 "kernarg segment size");
5821
5822 if (isGFX90A()) {
5823 if (!Seen.contains(".amdhsa_accum_offset"))
5824 return TokError(".amdhsa_accum_offset directive is required");
5825 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5826 return TokError("accum_offset should be in range [4..256] in "
5827 "increments of 4");
5828 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5829 return TokError("accum_offset exceeds total VGPR allocation");
5832 MCConstantExpr::create(AccumOffset / 4 - 1, getContext()),
5833 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5834 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
5835 }
5836
5837 if (IVersion.Major >= 10 && IVersion.Major < 12) {
5838 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5839 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5840 return TokError("shared_vgpr_count directive not valid on "
5841 "wavefront size 32");
5842 }
5843 if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5844 return TokError("shared_vgpr_count*2 + "
5845 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5846 "exceed 63\n");
5847 }
5848 }
5849
5850 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5851 NextFreeVGPR, NextFreeSGPR,
5852 ReserveVCC, ReserveFlatScr);
5853 return false;
5854}
5855
5856bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5858 if (ParseAsAbsoluteExpression(Version))
5859 return true;
5860
5861 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5862 return false;
5863}
5864
5865bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5866 amd_kernel_code_t &Header) {
5867 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5868 // assembly for backwards compatibility.
5869 if (ID == "max_scratch_backing_memory_byte_size") {
5870 Parser.eatToEndOfStatement();
5871 return false;
5872 }
5873
5874 SmallString<40> ErrStr;
5875 raw_svector_ostream Err(ErrStr);
5876 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5877 return TokError(Err.str());
5878 }
5879 Lex();
5880
5881 if (ID == "enable_dx10_clamp") {
5882 if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
5883 isGFX12Plus())
5884 return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
5885 }
5886
5887 if (ID == "enable_ieee_mode") {
5888 if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
5889 isGFX12Plus())
5890 return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
5891 }
5892
5893 if (ID == "enable_wavefront_size32") {
5894 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5895 if (!isGFX10Plus())
5896 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5897 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5898 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5899 } else {
5900 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5901 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5902 }
5903 }
5904
5905 if (ID == "wavefront_size") {
5906 if (Header.wavefront_size == 5) {
5907 if (!isGFX10Plus())
5908 return TokError("wavefront_size=5 is only allowed on GFX10+");
5909 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5910 return TokError("wavefront_size=5 requires +WavefrontSize32");
5911 } else if (Header.wavefront_size == 6) {
5912 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5913 return TokError("wavefront_size=6 requires +WavefrontSize64");
5914 }
5915 }
5916
5917 if (ID == "enable_wgp_mode") {
5918 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5919 !isGFX10Plus())
5920 return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5921 }
5922
5923 if (ID == "enable_mem_ordered") {
5924 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5925 !isGFX10Plus())
5926 return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5927 }
5928
5929 if (ID == "enable_fwd_progress") {
5930 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5931 !isGFX10Plus())
5932 return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5933 }
5934
5935 return false;
5936}
5937
5938bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5939 amd_kernel_code_t Header;
5940 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5941
5942 while (true) {
5943 // Lex EndOfStatement. This is in a while loop, because lexing a comment
5944 // will set the current token to EndOfStatement.
5945 while(trySkipToken(AsmToken::EndOfStatement));
5946
5947 StringRef ID;
5948 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5949 return true;
5950
5951 if (ID == ".end_amd_kernel_code_t")
5952 break;
5953
5954 if (ParseAMDKernelCodeTValue(ID, Header))
5955 return true;
5956 }
5957
5958 getTargetStreamer().EmitAMDKernelCodeT(Header);
5959
5960 return false;
5961}
5962
5963bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5964 StringRef KernelName;
5965 if (!parseId(KernelName, "expected symbol name"))
5966 return true;
5967
5968 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5970
5971 KernelScope.initialize(getContext());
5972 return false;
5973}
5974
5975bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5976 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5977 return Error(getLoc(),
5978 ".amd_amdgpu_isa directive is not available on non-amdgcn "
5979 "architectures");
5980 }
5981
5982 auto TargetIDDirective = getLexer().getTok().getStringContents();
5983 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5984 return Error(getParser().getTok().getLoc(), "target id must match options");
5985
5986 getTargetStreamer().EmitISAVersion();
5987 Lex();
5988
5989 return false;
5990}
5991
5992bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5993 assert(isHsaAbi(getSTI()));
5994
5995 std::string HSAMetadataString;
5996 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
5997 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
5998 return true;
5999
6000 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6001 return Error(getLoc(), "invalid HSA metadata");
6002
6003 return false;
6004}
6005
6006/// Common code to parse out a block of text (typically YAML) between start and
6007/// end directives.
6008bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6009 const char *AssemblerDirectiveEnd,
6010 std::string &CollectString) {
6011
6012 raw_string_ostream CollectStream(CollectString);
6013
6014 getLexer().setSkipSpace(false);
6015
6016 bool FoundEnd = false;
6017 while (!isToken(AsmToken::Eof)) {
6018 while (isToken(AsmToken::Space)) {
6019 CollectStream << getTokenStr();
6020 Lex();
6021 }
6022
6023 if (trySkipId(AssemblerDirectiveEnd)) {
6024 FoundEnd = true;
6025 break;
6026 }
6027
6028 CollectStream << Parser.parseStringToEndOfStatement()
6029 << getContext().getAsmInfo()->getSeparatorString();
6030
6031 Parser.eatToEndOfStatement();
6032 }
6033
6034 getLexer().setSkipSpace(true);
6035
6036 if (isToken(AsmToken::Eof) && !FoundEnd) {
6037 return TokError(Twine("expected directive ") +
6038 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6039 }
6040
6041 CollectStream.flush();
6042 return false;
6043}
6044
6045/// Parse the assembler directive for new MsgPack-format PAL metadata.
6046bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6047 std::string String;
6048 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6050 return true;
6051
6052 auto PALMetadata = getTargetStreamer().getPALMetadata();
6053 if (!PALMetadata->setFromString(String))
6054 return Error(getLoc(), "invalid PAL metadata");
6055 return false;
6056}
6057
6058/// Parse the assembler directive for old linear-format PAL metadata.
6059bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6060 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6061 return Error(getLoc(),
6062 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6063 "not available on non-amdpal OSes")).str());
6064 }
6065
6066 auto PALMetadata = getTargetStreamer().getPALMetadata();
6067 PALMetadata->setLegacy();
6068 for (;;) {
6070 if (ParseAsAbsoluteExpression(Key)) {
6071 return TokError(Twine("invalid value in ") +
6073 }
6074 if (!trySkipToken(AsmToken::Comma)) {
6075 return TokError(Twine("expected an even number of values in ") +
6077 }
6078 if (ParseAsAbsoluteExpression(Value)) {
6079 return TokError(Twine("invalid value in ") +
6081 }
6082 PALMetadata->setRegister(Key, Value);
6083 if (!trySkipToken(AsmToken::Comma))
6084 break;
6085 }
6086 return false;
6087}
6088
6089/// ParseDirectiveAMDGPULDS
6090/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6091bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6092 if (getParser().checkForValidSection())
6093 return true;
6094
6096 SMLoc NameLoc = getLoc();
6097 if (getParser().parseIdentifier(Name))
6098 return TokError("expected identifier in directive");
6099
6100 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6101 if (getParser().parseComma())
6102 return true;
6103
6104 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6105
6106 int64_t Size;
6107 SMLoc SizeLoc = getLoc();
6108 if (getParser().parseAbsoluteExpression(Size))
6109 return true;
6110 if (Size < 0)
6111 return Error(SizeLoc, "size must be non-negative");
6112 if (Size > LocalMemorySize)
6113 return Error(SizeLoc, "size is too large");
6114
6115 int64_t Alignment = 4;
6116 if (trySkipToken(AsmToken::Comma)) {
6117 SMLoc AlignLoc = getLoc();
6118 if (getParser().parseAbsoluteExpression(Alignment))
6119 return true;
6120 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6121 return Error(AlignLoc, "alignment must be a power of two");
6122
6123 // Alignment larger than the size of LDS is possible in theory, as long
6124 // as the linker manages to place to symbol at address 0, but we do want
6125 // to make sure the alignment fits nicely into a 32-bit integer.
6126 if (Alignment >= 1u << 31)
6127 return Error(AlignLoc, "alignment is too large");
6128 }
6129
6130 if (parseEOL())
6131 return true;
6132
6133 Symbol->redefineIfPossible();
6134 if (!Symbol->isUndefined())
6135 return Error(NameLoc, "invalid symbol redefinition");
6136
6137 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6138 return false;
6139}
6140
6141bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6142 StringRef IDVal = DirectiveID.getString();
6143
6144 if (isHsaAbi(getSTI())) {
6145 if (IDVal == ".amdhsa_kernel")
6146 return ParseDirectiveAMDHSAKernel();
6147
6148 if (IDVal == ".amdhsa_code_object_version")
6149 return ParseDirectiveAMDHSACodeObjectVersion();
6150
6151 // TODO: Restructure/combine with PAL metadata directive.
6153 return ParseDirectiveHSAMetadata();
6154 } else {
6155 if (IDVal == ".amd_kernel_code_t")
6156 return ParseDirectiveAMDKernelCodeT();
6157
6158 if (IDVal == ".amdgpu_hsa_kernel")
6159 return ParseDirectiveAMDGPUHsaKernel();
6160
6161 if (IDVal == ".amd_amdgpu_isa")
6162 return ParseDirectiveISAVersion();
6163
6165 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6166 Twine(" directive is "
6167 "not available on non-amdhsa OSes"))
6168 .str());
6169 }
6170 }
6171
6172 if (IDVal == ".amdgcn_target")
6173 return ParseDirectiveAMDGCNTarget();
6174
6175 if (IDVal == ".amdgpu_lds")
6176 return ParseDirectiveAMDGPULDS();
6177
6178 if (IDVal == PALMD::AssemblerDirectiveBegin)
6179 return ParseDirectivePALMetadataBegin();
6180
6181 if (IDVal == PALMD::AssemblerDirective)
6182 return ParseDirectivePALMetadata();
6183
6184 return true;
6185}
6186
6187bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6188 unsigned RegNo) {
6189
6190 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
6191 return isGFX9Plus();
6192
6193 // GFX10+ has 2 more SGPRs 104 and 105.
6194 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
6195 return hasSGPR104_SGPR105();
6196
6197 switch (RegNo) {
6198 case AMDGPU::SRC_SHARED_BASE_LO:
6199 case AMDGPU::SRC_SHARED_BASE:
6200 case AMDGPU::SRC_SHARED_LIMIT_LO:
6201 case AMDGPU::SRC_SHARED_LIMIT:
6202 case AMDGPU::SRC_PRIVATE_BASE_LO:
6203 case AMDGPU::SRC_PRIVATE_BASE:
6204 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
6205 case AMDGPU::SRC_PRIVATE_LIMIT:
6206 return isGFX9Plus();
6207 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
6208 return isGFX9Plus() && !isGFX11Plus();
6209 case AMDGPU::TBA:
6210 case AMDGPU::TBA_LO:
6211 case AMDGPU::TBA_HI:
6212 case AMDGPU::TMA:
6213 case AMDGPU::TMA_LO:
6214 case AMDGPU::TMA_HI:
6215 return !isGFX9Plus();
6216 case AMDGPU::XNACK_MASK:
6217 case AMDGPU::XNACK_MASK_LO:
6218 case AMDGPU::XNACK_MASK_HI:
6219 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6220 case AMDGPU::SGPR_NULL:
6221 return isGFX10Plus();
6222 default:
6223 break;
6224 }
6225
6226 if (isCI())
6227 return true;
6228
6229 if (isSI() || isGFX10Plus()) {
6230 // No flat_scr on SI.
6231 // On GFX10Plus flat scratch is not a valid register operand and can only be
6232 // accessed with s_setreg/s_getreg.
6233 switch (RegNo) {
6234 case AMDGPU::FLAT_SCR:
6235 case AMDGPU::FLAT_SCR_LO:
6236 case AMDGPU::FLAT_SCR_HI:
6237 return false;
6238 default:
6239 return true;
6240 }
6241 }
6242
6243 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6244 // SI/CI have.
6245 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
6246 return hasSGPR102_SGPR103();
6247
6248 return true;
6249}
6250
6251ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6252 StringRef Mnemonic,
6253 OperandMode Mode) {
6254 ParseStatus Res = parseVOPD(Operands);
6255 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6256 return Res;
6257
6258 // Try to parse with a custom parser
6259 Res = MatchOperandParserImpl(Operands, Mnemonic);
6260
6261 // If we successfully parsed the operand or if there as an error parsing,
6262 // we are done.
6263 //
6264 // If we are parsing after we reach EndOfStatement then this means we
6265 // are appending default values to the Operands list. This is only done
6266 // by custom parser, so we shouldn't continue on to the generic parsing.
6267 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6268 return Res;
6269
6270 SMLoc RBraceLoc;
6271 SMLoc LBraceLoc = getLoc();
6272 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6273 unsigned Prefix = Operands.size();
6274
6275 for (;;) {
6276 auto Loc = getLoc();
6277 Res = parseReg(Operands);
6278 if (Res.isNoMatch())
6279 Error(Loc, "expected a register");
6280 if (!Res.isSuccess())
6281 return ParseStatus::Failure;
6282
6283 RBraceLoc = getLoc();
6284 if (trySkipToken(AsmToken::RBrac))
6285 break;
6286
6287 if (!skipToken(AsmToken::Comma,
6288 "expected a comma or a closing square bracket"))
6289 return ParseStatus::Failure;
6290 }
6291
6292 if (Operands.size() - Prefix > 1) {
6293 Operands.insert(Operands.begin() + Prefix,
6294 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6295 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6296 }
6297
6298 return ParseStatus::Success;
6299 }
6300
6301 return parseRegOrImm(Operands);
6302}
6303
6304StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6305 // Clear any forced encodings from the previous instruction.
6306 setForcedEncodingSize(0);
6307 setForcedDPP(false);
6308 setForcedSDWA(false);
6309
6310 if (Name.ends_with("_e64_dpp")) {
6311 setForcedDPP(true);
6312 setForcedEncodingSize(64);
6313 return Name.substr(0, Name.size() - 8);
6314 } else if (Name.ends_with("_e64")) {
6315 setForcedEncodingSize(64);
6316 return Name.substr(0, Name.size() - 4);
6317 } else if (Name.ends_with("_e32")) {
6318 setForcedEncodingSize(32);
6319 return Name.substr(0, Name.size() - 4);
6320 } else if (Name.ends_with("_dpp")) {
6321 setForcedDPP(true);
6322 return Name.substr(0, Name.size() - 4);
6323 } else if (Name.ends_with("_sdwa")) {
6324 setForcedSDWA(true);
6325 return Name.substr(0, Name.size() - 5);
6326 }
6327 return Name;
6328}
6329
6330static void applyMnemonicAliases(StringRef &Mnemonic,
6331 const FeatureBitset &Features,
6332 unsigned VariantID);
6333
6334bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
6336 SMLoc NameLoc, OperandVector &Operands) {
6337 // Add the instruction mnemonic
6338 Name = parseMnemonicSuffix(Name);
6339
6340 // If the target architecture uses MnemonicAlias, call it here to parse
6341 // operands correctly.
6342 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6343
6344 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6345
6346 bool IsMIMG = Name.starts_with("image_");
6347
6348 while (!trySkipToken(AsmToken::EndOfStatement)) {
6349 OperandMode Mode = OperandMode_Default;
6350 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6351 Mode = OperandMode_NSA;
6352 ParseStatus Res = parseOperand(Operands, Name, Mode);
6353
6354 if (!Res.isSuccess()) {
6355 checkUnsupportedInstruction(Name, NameLoc);
6356 if (!Parser.hasPendingError()) {
6357 // FIXME: use real operand location rather than the current location.
6358 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6359 : "not a valid operand.";
6360 Error(getLoc(), Msg);
6361 }
6362 while (!trySkipToken(AsmToken::EndOfStatement)) {
6363 lex();
6364 }
6365 return true;
6366 }
6367
6368 // Eat the comma or space if there is one.
6369 trySkipToken(AsmToken::Comma);
6370 }
6371
6372 return false;
6373}
6374
6375//===----------------------------------------------------------------------===//
6376// Utility functions
6377//===----------------------------------------------------------------------===//
6378
6379ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6381 SMLoc S = getLoc();
6382 if (!trySkipId(Name))
6383 return ParseStatus::NoMatch;
6384
6385 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6386 return ParseStatus::Success;
6387}
6388
6389ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6390 int64_t &IntVal) {
6391
6392 if (!trySkipId(Prefix, AsmToken::Colon))
6393 return ParseStatus::NoMatch;
6394
6395 return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;
6396}
6397
6398ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6399 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6400 std::function<bool(int64_t &)> ConvertResult) {
6401 SMLoc S = getLoc();
6402 int64_t Value = 0;
6403
6404 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6405 if (!Res.isSuccess())
6406 return Res;
6407
6408 if (ConvertResult && !ConvertResult(Value)) {
6409 Error(S, "invalid " + StringRef(Prefix) + " value.");
6410 }
6411
6412 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6413 return ParseStatus::Success;
6414}
6415
6416ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6417 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6418 bool (*ConvertResult)(int64_t &)) {
6419 SMLoc S = getLoc();
6420 if (!trySkipId(Prefix, AsmToken::Colon))
6421 return ParseStatus::NoMatch;
6422
6423 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6424 return ParseStatus::Failure;
6425
6426 unsigned Val = 0;
6427 const unsigned MaxSize = 4;
6428
6429 // FIXME: How to verify the number of elements matches the number of src
6430 // operands?
6431 for (int I = 0; ; ++I) {
6432 int64_t Op;
6433 SMLoc Loc = getLoc();
6434 if (!parseExpr(Op))
6435 return ParseStatus::Failure;
6436
6437 if (Op != 0 && Op != 1)
6438 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6439
6440 Val |= (Op << I);
6441
6442 if (trySkipToken(AsmToken::RBrac))
6443 break;
6444
6445 if (I + 1 == MaxSize)
6446 return Error(getLoc(), "expected a closing square bracket");
6447
6448 if (!skipToken(AsmToken::Comma, "expected a comma"))
6449 return ParseStatus::Failure;
6450 }
6451
6452 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6453 return ParseStatus::Success;
6454}
6455
6456ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6458 AMDGPUOperand::ImmTy ImmTy) {
6459 int64_t Bit;
6460 SMLoc S = getLoc();
6461
6462 if (trySkipId(Name)) {
6463 Bit = 1;
6464 } else if (trySkipId("no", Name)) {
6465 Bit = 0;
6466 } else {
6467 return ParseStatus::NoMatch;
6468 }
6469
6470 if (Name == "r128" && !hasMIMG_R128())
6471 return Error(S, "r128 modifier is not supported on this GPU");
6472 if (Name == "a16" && !hasA16())
6473 return Error(S, "a16 modifier is not supported on this GPU");
6474
6475 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6476 ImmTy = AMDGPUOperand::ImmTyR128A16;
6477
6478 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6479 return ParseStatus::Success;
6480}
6481
6482unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6483 bool &Disabling) const {
6484 Disabling = Id.consume_front("no");
6485
6486 if (isGFX940() && !Mnemo.starts_with("s_")) {
6487 return StringSwitch<unsigned>(Id)
6488 .Case("nt", AMDGPU::CPol::NT)
6489 .Case("sc0", AMDGPU::CPol::SC0)
6490 .Case("sc1", AMDGPU::CPol::SC1)
6491 .Default(0);
6492 }
6493
6494 return StringSwitch<unsigned>(Id)
6495 .Case("dlc", AMDGPU::CPol::DLC)
6496 .Case("glc", AMDGPU::CPol::GLC)
6497 .Case("scc", AMDGPU::CPol::SCC)
6498 .Case("slc", AMDGPU::CPol::SLC)
6499 .Default(0);
6500}
6501
6502ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6503 if (isGFX12Plus()) {
6504 SMLoc StringLoc = getLoc();
6505
6506 int64_t CPolVal = 0;
6509
6510 for (;;) {
6511 if (ResTH.isNoMatch()) {
6512 int64_t TH;
6513 ResTH = parseTH(Operands, TH);
6514 if (ResTH.isFailure())
6515 return ResTH;
6516 if (ResTH.isSuccess()) {
6517 CPolVal |= TH;
6518 continue;
6519 }
6520 }
6521
6522 if (ResScope.isNoMatch()) {
6523 int64_t Scope;
6524 ResScope = parseScope(Operands, Scope);
6525 if (ResScope.isFailure())
6526 return ResScope;
6527 if (ResScope.isSuccess()) {
6528 CPolVal |= Scope;
6529 continue;
6530 }
6531 }
6532
6533 break;
6534 }
6535
6536 if (ResTH.isNoMatch() && ResScope.isNoMatch())
6537 return ParseStatus::NoMatch;
6538
6539 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6540 AMDGPUOperand::ImmTyCPol));
6541 return ParseStatus::Success;
6542 }
6543
6544 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6545 SMLoc OpLoc = getLoc();
6546 unsigned Enabled = 0, Seen = 0;
6547 for (;;) {
6548 SMLoc S = getLoc();
6549 bool Disabling;
6550 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6551 if (!CPol)
6552 break;
6553
6554 lex();
6555
6556 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6557 return Error(S, "dlc modifier is not supported on this GPU");
6558
6559 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6560 return Error(S, "scc modifier is not supported on this GPU");
6561
6562 if (Seen & CPol)
6563 return Error(S, "duplicate cache policy modifier");
6564
6565 if (!Disabling)
6566 Enabled |= CPol;
6567
6568 Seen |= CPol;
6569 }
6570
6571 if (!Seen)
6572 return ParseStatus::NoMatch;
6573
6574 Operands.push_back(
6575 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6576 return ParseStatus::Success;
6577}
6578
6579ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6580 int64_t &Scope) {
6581 Scope = AMDGPU::CPol::SCOPE_CU; // default;
6582
6584 SMLoc StringLoc;
6585 ParseStatus Res;
6586
6587 Res = parseStringWithPrefix("scope", Value, StringLoc);
6588 if (!Res.isSuccess())
6589 return Res;
6590
6592 .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU)
6593 .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE)
6594 .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV)
6595 .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS)
6596 .Default(0xffffffff);
6597
6598 if (Scope == 0xffffffff)
6599 return Error(StringLoc, "invalid scope value");
6600
6601 return ParseStatus::Success;
6602}
6603
6604ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6605 TH = AMDGPU::CPol::TH_RT; // default
6606
6608 SMLoc StringLoc;
6609 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
6610 if (!Res.isSuccess())
6611 return Res;
6612
6613 if (Value == "TH_DEFAULT")
6615 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
6616 Value == "TH_LOAD_NT_WB") {
6617 return Error(StringLoc, "invalid th value");
6618 } else if (Value.consume_front("TH_ATOMIC_")) {
6620 } else if (Value.consume_front("TH_LOAD_")) {
6622 } else if (Value.consume_front("TH_STORE_")) {
6624 } else {
6625 return Error(StringLoc, "invalid th value");
6626 }
6627
6628 if (Value == "BYPASS")
6630
6631 if (TH != 0) {
6638 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
6641 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
6643 .Default(0xffffffff);
6644 else
6650 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
6651 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
6652 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
6653 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
6654 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
6655 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
6656 .Default(0xffffffff);
6657 }
6658
6659 if (TH == 0xffffffff)
6660 return Error(StringLoc, "invalid th value");
6661
6662 return ParseStatus::Success;
6663}
6664
6666 MCInst& Inst, const OperandVector& Operands,
6667 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6668 AMDGPUOperand::ImmTy ImmT,
6669 int64_t Default = 0) {
6670 auto i = OptionalIdx.find(ImmT);
6671 if (i != OptionalIdx.end()) {
6672 unsigned Idx = i->second;
6673 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6674 } else {
6676 }
6677}
6678
6679ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6681 SMLoc &StringLoc) {
6682 if (!trySkipId(Prefix, AsmToken::Colon))
6683 return ParseStatus::NoMatch;
6684
6685 StringLoc = getLoc();
6686 return parseId(Value, "expected an identifier") ? ParseStatus::Success
6688}
6689
6690//===----------------------------------------------------------------------===//
6691// MTBUF format
6692//===----------------------------------------------------------------------===//
6693
6694bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6695 int64_t MaxVal,
6696 int64_t &Fmt) {
6697 int64_t Val;
6698 SMLoc Loc = getLoc();
6699
6700 auto Res = parseIntWithPrefix(Pref, Val);
6701 if (Res.isFailure())
6702 return false;
6703 if (Res.isNoMatch())
6704 return true;
6705
6706 if (Val < 0 || Val > MaxVal) {
6707 Error(Loc, Twine("out of range ", StringRef(Pref)));
6708 return false;
6709 }
6710
6711 Fmt = Val;
6712 return true;
6713}
6714
6715ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
6716 AMDGPUOperand::ImmTy ImmTy) {
6717 const char *Pref = "index_key";
6718 int64_t ImmVal = 0;
6719 SMLoc Loc = getLoc();
6720 auto Res = parseIntWithPrefix(Pref, ImmVal);
6721 if (!Res.isSuccess())
6722 return Res;
6723
6724 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6725 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6726
6727 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6728 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6729
6730 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
6731 return ParseStatus::Success;
6732}
6733
6734ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
6735 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6736}
6737
6738ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
6739 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6740}
6741
6742// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6743// values to live in a joint format operand in the MCInst encoding.
6744ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6745 using namespace llvm::AMDGPU::MTBUFFormat;
6746
6747 int64_t Dfmt = DFMT_UNDEF;
6748 int64_t Nfmt = NFMT_UNDEF;
6749
6750 // dfmt and nfmt can appear in either order, and each is optional.
6751 for (int I = 0; I < 2; ++I) {
6752 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6753 return ParseStatus::Failure;
6754
6755 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6756 return ParseStatus::Failure;
6757
6758 // Skip optional comma between dfmt/nfmt
6759 // but guard against 2 commas following each other.
6760 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6761 !peekToken().is(AsmToken::Comma)) {
6762 trySkipToken(AsmToken::Comma);
6763 }
6764 }
6765
6766 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6767 return ParseStatus::NoMatch;
6768
6769 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6770 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6771
6772 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6773 return ParseStatus::Success;
6774}
6775
6776ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6777 using namespace llvm::AMDGPU::MTBUFFormat;
6778
6779 int64_t Fmt = UFMT_UNDEF;
6780
6781 if (!tryParseFmt("format", UFMT_MAX, Fmt))
6782 return ParseStatus::Failure;
6783
6784 if (Fmt == UFMT_UNDEF)
6785 return ParseStatus::NoMatch;
6786
6787 Format = Fmt;
6788 return ParseStatus::Success;
6789}
6790
6791bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6792 int64_t &Nfmt,
6793 StringRef FormatStr,
6794 SMLoc Loc) {
6795 using namespace llvm::AMDGPU::MTBUFFormat;
6796 int64_t Format;
6797
6798 Format = getDfmt(FormatStr);
6799 if (Format != DFMT_UNDEF) {
6800 Dfmt = Format;
6801 return true;
6802 }
6803
6804 Format = getNfmt(FormatStr, getSTI());
6805 if (Format != NFMT_UNDEF) {
6806 Nfmt = Format;
6807 return true;
6808 }
6809
6810 Error(Loc, "unsupported format");
6811 return false;
6812}
6813
6814ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6815 SMLoc FormatLoc,
6816 int64_t &Format) {
6817 using namespace llvm::AMDGPU::MTBUFFormat;
6818
6819 int64_t Dfmt = DFMT_UNDEF;
6820 int64_t Nfmt = NFMT_UNDEF;
6821 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6822 return ParseStatus::Failure;
6823
6824 if (trySkipToken(AsmToken::Comma)) {
6825 StringRef Str;
6826 SMLoc Loc = getLoc();
6827 if (!parseId(Str, "expected a format string") ||
6828 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6829 return ParseStatus::Failure;
6830 if (Dfmt == DFMT_UNDEF)
6831 return Error(Loc, "duplicate numeric format");
6832 if (Nfmt == NFMT_UNDEF)
6833 return Error(Loc, "duplicate data format");
6834 }
6835
6836 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6837 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6838
6839 if (isGFX10Plus()) {
6840 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6841 if (Ufmt == UFMT_UNDEF)
6842 return Error(FormatLoc, "unsupported format");
6843 Format = Ufmt;
6844 } else {
6845 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6846 }
6847
6848 return ParseStatus::Success;
6849}
6850
6851ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6852 SMLoc Loc,
6853 int64_t &Format) {
6854 using namespace llvm::AMDGPU::MTBUFFormat;
6855
6856 auto Id = getUnifiedFormat(FormatStr, getSTI());
6857 if (Id == UFMT_UNDEF)
6858 return ParseStatus::NoMatch;
6859
6860 if (!isGFX10Plus())
6861 return Error(Loc, "unified format is not supported on this GPU");
6862
6863 Format = Id;
6864 return ParseStatus::Success;
6865}
6866
6867ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6868 using namespace llvm::AMDGPU::MTBUFFormat;
6869 SMLoc Loc = getLoc();
6870
6871 if (!parseExpr(Format))
6872 return ParseStatus::Failure;
6873 if (!isValidFormatEncoding(Format, getSTI()))
6874 return Error(Loc, "out of range format");
6875
6876 return ParseStatus::Success;
6877}
6878
6879ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6880 using namespace llvm::AMDGPU::MTBUFFormat;
6881
6882 if (!trySkipId("format", AsmToken::Colon))
6883 return ParseStatus::NoMatch;
6884
6885 if (trySkipToken(AsmToken::LBrac)) {
6886 StringRef FormatStr;
6887 SMLoc Loc = getLoc();
6888 if (!parseId(FormatStr, "expected a format string"))
6889 return ParseStatus::Failure;
6890
6891 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6892 if (Res.isNoMatch())
6893 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6894 if (!Res.isSuccess())
6895 return Res;
6896
6897 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6898 return ParseStatus::Failure;
6899
6900 return ParseStatus::Success;
6901 }
6902
6903 return parseNumericFormat(Format);
6904}
6905
6906ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6907 using namespace llvm::AMDGPU::MTBUFFormat;
6908
6909 int64_t Format = getDefaultFormatEncoding(getSTI());
6910 ParseStatus Res;
6911 SMLoc Loc = getLoc();
6912
6913 // Parse legacy format syntax.
6914 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6915 if (Res.isFailure())
6916 return Res;
6917
6918 bool FormatFound = Res.isSuccess();
6919
6920 Operands.push_back(
6921 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6922
6923 if (FormatFound)
6924 trySkipToken(AsmToken::Comma);
6925
6926 if (isToken(AsmToken::EndOfStatement)) {
6927 // We are expecting an soffset operand,
6928 // but let matcher handle the error.
6929 return ParseStatus::Success;
6930 }
6931
6932 // Parse soffset.
6933 Res = parseRegOrImm(Operands);
6934 if (!Res.isSuccess())
6935 return Res;
6936
6937 trySkipToken(AsmToken::Comma);
6938
6939 if (!FormatFound) {
6940 Res = parseSymbolicOrNumericFormat(Format);
6941 if (Res.isFailure())
6942 return Res;
6943 if (Res.isSuccess()) {
6944 auto Size = Operands.size();
6945 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6946 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6947 Op.setImm(Format);
6948 }
6949 return ParseStatus::Success;
6950 }
6951
6952 if (isId("format") && peekToken().is(AsmToken::Colon))
6953 return Error(getLoc(), "duplicate format");
6954 return ParseStatus::Success;
6955}
6956
6957ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
6958 ParseStatus Res =
6959 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
6960 if (Res.isNoMatch()) {
6961 Res = parseIntWithPrefix("inst_offset", Operands,
6962 AMDGPUOperand::ImmTyInstOffset);
6963 }
6964 return Res;
6965}
6966
6967ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
6968 ParseStatus Res =
6969 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
6970 if (Res.isNoMatch())
6971 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
6972 return Res;
6973}
6974
6975ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
6976 ParseStatus Res =
6977 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
6978 if (Res.isNoMatch()) {
6979 Res =
6980 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
6981 }
6982 return Res;
6983}
6984
6985//===----------------------------------------------------------------------===//
6986// Exp
6987//===----------------------------------------------------------------------===//
6988
6989void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6990 OptionalImmIndexMap OptionalIdx;
6991
6992 unsigned OperandIdx[4];
6993 unsigned EnMask = 0;
6994 int SrcIdx = 0;
6995
6996 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6997 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6998
6999 // Add the register arguments
7000 if (Op.isReg()) {
7001 assert(SrcIdx < 4);
7002 OperandIdx[SrcIdx] = Inst.size();
7003 Op.addRegOperands(Inst, 1);
7004 ++SrcIdx;
7005 continue;
7006 }
7007
7008 if (Op.isOff()) {
7009 assert(SrcIdx < 4);
7010 OperandIdx[SrcIdx] = Inst.size();
7011 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
7012 ++SrcIdx;
7013 continue;
7014 }
7015
7016 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7017 Op.addImmOperands(Inst, 1);
7018 continue;
7019 }
7020
7021 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7022 continue;
7023
7024 // Handle optional arguments
7025 OptionalIdx[Op.getImmTy()] = i;
7026 }
7027
7028 assert(SrcIdx == 4);
7029
7030 bool Compr = false;
7031 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7032 Compr = true;
7033 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7034 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
7035 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
7036 }
7037
7038 for (auto i = 0; i < SrcIdx; ++i) {
7039 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
7040 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7041 }
7042 }
7043
7044 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7045 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7046
7047 Inst.addOperand(MCOperand::createImm(EnMask));
7048}
7049
7050//===----------------------------------------------------------------------===//
7051// s_waitcnt
7052//===----------------------------------------------------------------------===//
7053
7054static bool
7056 const AMDGPU::IsaVersion ISA,
7057 int64_t &IntVal,
7058 int64_t CntVal,
7059 bool Saturate,
7060 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7061 unsigned (*decode)(const IsaVersion &Version, unsigned))
7062{
7063 bool Failed = false;
7064
7065 IntVal = encode(ISA, IntVal, CntVal);
7066 if (CntVal != decode(ISA, IntVal)) {
7067 if (Saturate) {
7068 IntVal = encode(ISA, IntVal, -1);
7069 } else {
7070 Failed = true;
7071 }
7072 }
7073 return Failed;
7074}
7075
7076bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7077
7078 SMLoc CntLoc = getLoc();
7079 StringRef CntName = getTokenStr();
7080
7081 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7082 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7083 return false;
7084
7085 int64_t CntVal;
7086 SMLoc ValLoc = getLoc();
7087 if (!parseExpr(CntVal))
7088 return false;
7089
7091
7092 bool Failed = true;
7093 bool Sat = CntName.ends_with("_sat");
7094
7095 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7096 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7097 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7098 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7099 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7100 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7101 } else {
7102 Error(CntLoc, "invalid counter name " + CntName);
7103 return false;
7104 }
7105
7106 if (Failed) {
7107 Error(ValLoc, "too large value for " + CntName);
7108 return false;
7109 }
7110
7111 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7112 return false;
7113
7114 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7115 if (isToken(AsmToken::EndOfStatement)) {
7116 Error(getLoc(), "expected a counter name");
7117 return false;
7118 }
7119 }
7120
7121 return true;
7122}
7123
7124ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7126 int64_t Waitcnt = getWaitcntBitMask(ISA);
7127 SMLoc S = getLoc();
7128
7129 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7130 while (!isToken(AsmToken::EndOfStatement)) {
7131 if (!parseCnt(Waitcnt))
7132 return ParseStatus::Failure;
7133 }
7134 } else {
7135 if (!parseExpr(Waitcnt))
7136 return ParseStatus::Failure;
7137 }
7138
7139 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7140 return ParseStatus::Success;
7141}
7142
7143bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7144 SMLoc FieldLoc = getLoc();
7145 StringRef FieldName = getTokenStr();
7146 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7147 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7148 return false;
7149
7150 SMLoc ValueLoc = getLoc();
7151 StringRef ValueName = getTokenStr();
7152 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7153 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7154 return false;
7155
7156 unsigned Shift;
7157 if (FieldName == "instid0") {
7158 Shift = 0;
7159 } else if (FieldName == "instskip") {
7160 Shift = 4;
7161 } else if (FieldName == "instid1") {
7162 Shift = 7;
7163 } else {
7164 Error(FieldLoc, "invalid field name " + FieldName);
7165 return false;
7166 }
7167
7168 int Value;
7169 if (Shift == 4) {
7170 // Parse values for instskip.
7172 .Case("SAME", 0)
7173 .Case("NEXT", 1)
7174 .Case("SKIP_1", 2)
7175 .Case("SKIP_2", 3)
7176 .Case("SKIP_3", 4)
7177 .Case("SKIP_4", 5)
7178 .Default(-1);
7179 } else {
7180 // Parse values for instid0 and instid1.
7182 .Case("NO_DEP", 0)
7183 .Case("VALU_DEP_1", 1)
7184 .Case("VALU_DEP_2", 2)
7185 .Case("VALU_DEP_3", 3)
7186 .Case("VALU_DEP_4", 4)
7187 .Case("TRANS32_DEP_1", 5)
7188 .Case("TRANS32_DEP_2", 6)
7189 .Case("TRANS32_DEP_3", 7)
7190 .Case("FMA_ACCUM_CYCLE_1", 8)
7191 .Case("SALU_CYCLE_1", 9)
7192 .Case("SALU_CYCLE_2", 10)
7193 .Case("SALU_CYCLE_3", 11)
7194 .Default(-1);
7195 }
7196 if (Value < 0) {
7197 Error(ValueLoc, "invalid value name " + ValueName);
7198 return false;
7199 }
7200
7201 Delay |= Value << Shift;
7202 return true;
7203}
7204
7205ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7206 int64_t Delay = 0;
7207 SMLoc S = getLoc();
7208
7209 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7210 do {
7211 if (!parseDelay(Delay))
7212 return ParseStatus::Failure;
7213 } while (trySkipToken(AsmToken::Pipe));
7214 } else {
7215 if (!parseExpr(Delay))
7216 return ParseStatus::Failure;
7217 }
7218
7219 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7220 return ParseStatus::Success;
7221}
7222
7223bool
7224AMDGPUOperand::isSWaitCnt() const {
7225 return isImm();
7226}
7227
7228bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7229
7230//===----------------------------------------------------------------------===//
7231// DepCtr
7232//===----------------------------------------------------------------------===//
7233
7234void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7235 StringRef DepCtrName) {
7236 switch (ErrorId) {
7237 case OPR_ID_UNKNOWN:
7238 Error(Loc, Twine("invalid counter name ", DepCtrName));
7239 return;
7240 case OPR_ID_UNSUPPORTED:
7241 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7242 return;
7243 case OPR_ID_DUPLICATE:
7244 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7245 return;
7246 case OPR_VAL_INVALID:
7247 Error(Loc, Twine("invalid value for ", DepCtrName));
7248 return;
7249 default:
7250 assert(false);
7251 }
7252}
7253
7254bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7255
7256 using namespace llvm::AMDGPU::DepCtr;
7257
7258 SMLoc DepCtrLoc = getLoc();
7259 StringRef DepCtrName = getTokenStr();
7260
7261 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7262 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7263 return false;
7264
7265 int64_t ExprVal;
7266 if (!parseExpr(ExprVal))
7267 return false;
7268
7269 unsigned PrevOprMask = UsedOprMask;
7270 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7271
7272 if (CntVal < 0) {
7273 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7274 return false;
7275 }
7276
7277 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7278 return false;
7279
7280 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7281 if (isToken(AsmToken::EndOfStatement)) {
7282 Error(getLoc(), "expected a counter name");
7283 return false;
7284 }
7285 }
7286
7287 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7288 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7289 return true;
7290}
7291
7292ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7293 using namespace llvm::AMDGPU::DepCtr;
7294
7295 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7296 SMLoc Loc = getLoc();
7297
7298 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7299 unsigned UsedOprMask = 0;
7300 while (!isToken(AsmToken::EndOfStatement)) {
7301 if (!parseDepCtr(DepCtr, UsedOprMask))
7302 return ParseStatus::Failure;
7303 }
7304 } else {
7305 if (!parseExpr(DepCtr))
7306 return ParseStatus::Failure;
7307 }
7308
7309 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7310 return ParseStatus::Success;
7311}
7312
7313bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7314
7315//===----------------------------------------------------------------------===//
7316// hwreg
7317//===----------------------------------------------------------------------===//
7318
7319ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7320 OperandInfoTy &Offset,
7321 OperandInfoTy &Width) {
7322 using namespace llvm::AMDGPU::Hwreg;
7323
7324 if (!trySkipId("hwreg", AsmToken::LParen))
7325 return ParseStatus::NoMatch;
7326
7327 // The register may be specified by name or using a numeric code
7328 HwReg.Loc = getLoc();
7329 if (isToken(AsmToken::Identifier) &&
7330 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7331 HwReg.IsSymbolic = true;
7332 lex(); // skip register name
7333 } else if (!parseExpr(HwReg.Val, "a register name")) {
7334 return ParseStatus::Failure;
7335 }
7336
7337 if (trySkipToken(AsmToken::RParen))
7338 return ParseStatus::Success;
7339
7340 // parse optional params
7341 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7342 return ParseStatus::Failure;
7343
7344 Offset.Loc = getLoc();
7345 if (!parseExpr(Offset.Val))
7346 return ParseStatus::Failure;
7347
7348 if (!skipToken(AsmToken::Comma, "expected a comma"))
7349 return ParseStatus::Failure;
7350
7351 Width.Loc = getLoc();
7352 if (!parseExpr(Width.Val) ||
7353 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7354 return ParseStatus::Failure;
7355
7356 return ParseStatus::Success;
7357}
7358
7359ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7360 using namespace llvm::AMDGPU::Hwreg;
7361
7362 int64_t ImmVal = 0;
7363 SMLoc Loc = getLoc();
7364
7365 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
7366 HwregId::Default);
7367 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
7368 HwregOffset::Default);
7369 struct : StructuredOpField {
7370 using StructuredOpField::StructuredOpField;
7371 bool validate(AMDGPUAsmParser &Parser) const override {
7372 if (!isUIntN(Width, Val - 1))
7373 return Error(Parser, "only values from 1 to 32 are legal");
7374 return true;
7375 }
7376 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
7377 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
7378
7379 if (Res.isNoMatch())
7380 Res = parseHwregFunc(HwReg, Offset, Width);
7381
7382 if (Res.isSuccess()) {
7383 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
7384 return ParseStatus::Failure;
7385 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
7386 }
7387
7388 if (Res.isNoMatch() &&
7389 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
7391
7392 if (!Res.isSuccess())
7393 return ParseStatus::Failure;
7394
7395 if (!isUInt<16>(ImmVal))
7396 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7397 Operands.push_back(
7398 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7399 return ParseStatus::Success;
7400}
7401
7402bool AMDGPUOperand::isHwreg() const {
7403 return isImmTy(ImmTyHwreg);
7404}
7405
7406//===----------------------------------------------------------------------===//
7407// sendmsg
7408//===----------------------------------------------------------------------===//
7409
7410bool
7411AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7412 OperandInfoTy &Op,
7413 OperandInfoTy &Stream) {
7414 using namespace llvm::AMDGPU::SendMsg;
7415
7416 Msg.Loc = getLoc();
7417 if (isToken(AsmToken::Identifier) &&
7418 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7419 Msg.IsSymbolic = true;
7420 lex(); // skip message name
7421 } else if (!parseExpr(Msg.Val, "a message name")) {
7422 return false;
7423 }
7424
7425 if (trySkipToken(AsmToken::Comma)) {
7426 Op.IsDefined = true;
7427 Op.Loc = getLoc();
7428 if (isToken(AsmToken::Identifier) &&
7429 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
7431 lex(); // skip operation name
7432 } else if (!parseExpr(Op.Val, "an operation name")) {
7433 return false;
7434 }
7435
7436 if (trySkipToken(AsmToken::Comma)) {
7437 Stream.IsDefined = true;
7438 Stream.Loc = getLoc();
7439 if (!parseExpr(Stream.Val))
7440 return false;
7441 }
7442 }
7443
7444 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7445}
7446
7447bool
7448AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7449 const OperandInfoTy &Op,
7450 const OperandInfoTy &Stream) {
7451 using namespace llvm::AMDGPU::SendMsg;
7452
7453 // Validation strictness depends on whether message is specified
7454 // in a symbolic or in a numeric form. In the latter case
7455 // only encoding possibility is checked.
7456 bool Strict = Msg.IsSymbolic;
7457
7458 if (Strict) {
7459 if (Msg.Val == OPR_ID_UNSUPPORTED) {
7460 Error(Msg.Loc, "specified message id is not supported on this GPU");
7461 return false;
7462 }
7463 } else {
7464 if (!isValidMsgId(Msg.Val, getSTI())) {
7465 Error(Msg.Loc, "invalid message id");
7466 return false;
7467 }
7468 }
7469 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
7470 if (Op.IsDefined) {
7471 Error(Op.Loc, "message does not support operations");
7472 } else {
7473 Error(Msg.Loc, "missing message operation");
7474 }
7475 return false;
7476 }
7477 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
7478 if (Op.Val == OPR_ID_UNSUPPORTED)
7479 Error(Op.Loc, "specified operation id is not supported on this GPU");
7480 else
7481 Error(Op.Loc, "invalid operation id");
7482 return false;
7483 }
7484 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
7485 Stream.IsDefined) {
7486 Error(Stream.Loc, "message operation does not support streams");
7487 return false;
7488 }
7489 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
7490 Error(Stream.Loc, "invalid message stream id");
7491 return false;
7492 }
7493 return true;
7494}
7495
7496ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7497 using namespace llvm::AMDGPU::SendMsg;
7498
7499 int64_t ImmVal = 0;
7500 SMLoc Loc = getLoc();
7501
7502 if (trySkipId("sendmsg", AsmToken::LParen)) {
7503 OperandInfoTy Msg(OPR_ID_UNKNOWN);
7504 OperandInfoTy Op(OP_NONE_);
7505 OperandInfoTy Stream(STREAM_ID_NONE_);
7506 if (parseSendMsgBody(Msg, Op, Stream) &&
7507 validateSendMsg(Msg, Op, Stream)) {
7508 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
7509 } else {
7510 return ParseStatus::Failure;
7511 }
7512 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7513 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7514 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7515 } else {
7516 return ParseStatus::Failure;
7517 }
7518
7519 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7520 return ParseStatus::Success;
7521}
7522
7523bool AMDGPUOperand::isSendMsg() const {
7524 return isImmTy(ImmTySendMsg);
7525}
7526
7527//===----------------------------------------------------------------------===//
7528// v_interp
7529//===----------------------------------------------------------------------===//
7530
7531ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7532 StringRef Str;
7533 SMLoc S = getLoc();
7534
7535 if (!parseId(Str))
7536 return ParseStatus::NoMatch;
7537
7538 int Slot = StringSwitch<int>(Str)
7539 .Case("p10", 0)
7540 .Case("p20", 1)
7541 .Case("p0", 2)
7542 .Default(-1);
7543
7544 if (Slot == -1)
7545 return Error(S, "invalid interpolation slot");
7546
7547 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
7548 AMDGPUOperand::ImmTyInterpSlot));
7549 return ParseStatus::Success;
7550}
7551
7552ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7553 StringRef Str;
7554 SMLoc S = getLoc();
7555
7556 if (!parseId(Str))
7557 return ParseStatus::NoMatch;
7558
7559 if (!Str.starts_with("attr"))
7560 return Error(S, "invalid interpolation attribute");
7561
7562 StringRef Chan = Str.take_back(2);
7563 int AttrChan = StringSwitch<int>(Chan)
7564 .Case(".x", 0)
7565 .Case(".y", 1)
7566 .Case(".z", 2)
7567 .Case(".w", 3)
7568 .Default(-1);
7569 if (AttrChan == -1)
7570 return Error(S, "invalid or missing interpolation attribute channel");
7571
7572 Str = Str.drop_back(2).drop_front(4);
7573
7574 uint8_t Attr;
7575 if (Str.getAsInteger(10, Attr))
7576 return Error(S, "invalid or missing interpolation attribute number");
7577
7578 if (Attr > 32)
7579 return Error(S, "out of bounds interpolation attribute number");
7580
7581 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
7582
7583 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
7584 AMDGPUOperand::ImmTyInterpAttr));
7585 Operands.push_back(AMDGPUOperand::CreateImm(
7586 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7587 return ParseStatus::Success;
7588}
7589
7590//===----------------------------------------------------------------------===//
7591// exp
7592//===----------------------------------------------------------------------===//
7593
7594ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7595 using namespace llvm::AMDGPU::Exp;
7596
7597 StringRef Str;
7598 SMLoc S = getLoc();
7599
7600 if (!parseId(Str))
7601 return ParseStatus::NoMatch;
7602
7603 unsigned Id = getTgtId(Str);
7604 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
7605 return Error(S, (Id == ET_INVALID)
7606 ? "invalid exp target"
7607 : "exp target is not supported on this GPU");
7608
7609 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
7610 AMDGPUOperand::ImmTyExpTgt));
7611 return ParseStatus::Success;
7612}
7613
7614//===----------------------------------------------------------------------===//
7615// parser helpers
7616//===----------------------------------------------------------------------===//
7617
7618bool
7619AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7620 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
7621}
7622
7623bool
7624AMDGPUAsmParser::isId(const StringRef Id) const {
7625 return isId(getToken(), Id);
7626}
7627
7628bool
7629AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7630 return getTokenKind() == Kind;
7631}
7632
7633StringRef AMDGPUAsmParser::getId() const {
7634 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7635}
7636
7637bool
7638AMDGPUAsmParser::trySkipId(const StringRef Id) {
7639 if (isId(Id)) {
7640 lex();
7641 return true;
7642 }
7643 return false;
7644}
7645
7646bool
7647AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7648 if (isToken(AsmToken::Identifier)) {
7649 StringRef Tok = getTokenStr();
7650 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
7651 lex();
7652 return true;
7653 }
7654 }
7655 return false;
7656}
7657
7658bool
7659AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7660 if (isId(Id) && peekToken().is(Kind)) {
7661 lex();
7662 lex();
7663 return true;
7664 }
7665 return false;
7666}
7667
7668bool
7669AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7670 if (isToken(Kind)) {
7671 lex();
7672 return true;
7673 }
7674 return false;
7675}
7676
7677bool
7678AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7679 const StringRef ErrMsg) {
7680 if (!trySkipToken(Kind)) {
7681 Error(getLoc(), ErrMsg);
7682 return false;
7683 }
7684 return true;
7685}
7686
7687bool
7688AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7689 SMLoc S = getLoc();
7690
7691 const MCExpr *Expr;
7692 if (Parser.parseExpression(Expr))
7693 return false;
7694
7695 if (Expr->evaluateAsAbsolute(Imm))
7696 return true;
7697
7698 if (Expected.empty()) {
7699 Error(S, "expected absolute expression");
7700 } else {
7701 Error(S, Twine("expected ", Expected) +
7702 Twine(" or an absolute expression"));
7703 }
7704 return false;
7705}
7706
7707bool
7708AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7709 SMLoc S = getLoc();
7710
7711 const MCExpr *Expr;
7712 if (Parser.parseExpression(Expr))
7713 return false;
7714
7715 int64_t IntVal;
7716 if (Expr->evaluateAsAbsolute(IntVal)) {
7717 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7718 } else {
7719 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7720 }
7721 return true;
7722}
7723
7724bool
7725AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7726 if (isToken(AsmToken::String)) {
7727 Val = getToken().getStringContents();
7728 lex();
7729 return true;
7730 } else {
7731 Error(getLoc(), ErrMsg);
7732 return false;
7733 }
7734}
7735
7736bool
7737AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7738 if (isToken(AsmToken::Identifier)) {
7739 Val = getTokenStr();
7740 lex();
7741 return true;
7742 } else {
7743 if (!ErrMsg.empty())
7744 Error(getLoc(), ErrMsg);
7745 return false;
7746 }
7747}
7748
7750AMDGPUAsmParser::getToken() const {
7751 return Parser.getTok();
7752}
7753
7754AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7755 return isToken(AsmToken::EndOfStatement)
7756 ? getToken()
7757 : getLexer().peekTok(ShouldSkipSpace);
7758}
7759
7760void
7761AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7762 auto TokCount = getLexer().peekTokens(Tokens);
7763
7764 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7765 Tokens[Idx] = AsmToken(AsmToken::Error, "");
7766}
7767
7769AMDGPUAsmParser::getTokenKind() const {
7770 return getLexer().getKind();
7771}
7772
7773SMLoc
7774AMDGPUAsmParser::getLoc() const {
7775 return getToken().getLoc();
7776}
7777
7779AMDGPUAsmParser::getTokenStr() const {
7780 return getToken().getString();
7781}
7782
7783void
7784AMDGPUAsmParser::lex() {
7785 Parser.Lex();
7786}
7787
7788SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7789 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7790}
7791
7792SMLoc
7793AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7794 const OperandVector &Operands) const {
7795 for (unsigned i = Operands.size() - 1; i > 0; --i) {
7796 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7797 if (Test(Op))
7798 return Op.getStartLoc();
7799 }
7800 return getInstLoc(Operands);
7801}
7802
7803SMLoc
7804AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7805 const OperandVector &Operands) const {
7806 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7807 return getOperandLoc(Test, Operands);
7808}
7809
7810SMLoc
7811AMDGPUAsmParser::getRegLoc(unsigned Reg,
7812 const OperandVector &Operands) const {
7813 auto Test = [=](const AMDGPUOperand& Op) {
7814 return Op.isRegKind() && Op.getReg() == Reg;
7815 };
7816 return getOperandLoc(Test, Operands);
7817}
7818
7819SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7820 bool SearchMandatoryLiterals) const {
7821 auto Test = [](const AMDGPUOperand& Op) {
7822 return Op.IsImmKindLiteral() || Op.isExpr();
7823 };
7824 SMLoc Loc = getOperandLoc(Test, Operands);
7825 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7826 Loc = getMandatoryLitLoc(Operands);
7827 return Loc;
7828}
7829
7830SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7831 auto Test = [](const AMDGPUOperand &Op) {
7832 return Op.IsImmKindMandatoryLiteral();
7833 };
7834 return getOperandLoc(Test, Operands);
7835}
7836
7837SMLoc
7838AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7839 auto Test = [](const AMDGPUOperand& Op) {
7840 return Op.isImmKindConst();
7841 };
7842 return getOperandLoc(Test, Operands);
7843}
7844
7846AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
7847 if (!trySkipToken(AsmToken::LCurly))
7848 return ParseStatus::NoMatch;
7849
7850 bool First = true;
7851 while (!trySkipToken(AsmToken::RCurly)) {
7852 if (!First &&
7853 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
7854 return ParseStatus::Failure;
7855
7856 StringRef Id = getTokenStr();
7857 SMLoc IdLoc = getLoc();
7858 if (!skipToken(AsmToken::Identifier, "field name expected") ||
7859 !skipToken(AsmToken::Colon, "colon expected"))
7860 return ParseStatus::Failure;
7861
7862 auto I =
7863 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
7864 if (I == Fields.end())
7865 return Error(IdLoc, "unknown field");
7866 if ((*I)->IsDefined)
7867 return Error(IdLoc, "duplicate field");
7868
7869 // TODO: Support symbolic values.
7870 (*I)->Loc = getLoc();
7871 if (!parseExpr((*I)->Val))
7872 return ParseStatus::Failure;
7873 (*I)->IsDefined = true;
7874
7875 First = false;
7876 }
7877 return ParseStatus::Success;
7878}
7879
7880bool AMDGPUAsmParser::validateStructuredOpFields(
7882 return all_of(Fields, [this](const StructuredOpField *F) {
7883 return F->validate(*this);
7884 });
7885}
7886
7887//===----------------------------------------------------------------------===//
7888// swizzle
7889//===----------------------------------------------------------------------===//
7890
7892static unsigned
7893encodeBitmaskPerm(const unsigned AndMask,
7894 const unsigned OrMask,
7895 const unsigned XorMask) {
7896 using namespace llvm::AMDGPU::Swizzle;
7897
7898 return BITMASK_PERM_ENC |
7899 (AndMask << BITMASK_AND_SHIFT) |
7900 (OrMask << BITMASK_OR_SHIFT) |
7901 (XorMask << BITMASK_XOR_SHIFT);
7902}
7903
7904bool
7905AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7906 const unsigned MinVal,
7907 const unsigned MaxVal,
7908 const StringRef ErrMsg,
7909 SMLoc &Loc) {
7910 if (!skipToken(AsmToken::Comma, "expected a comma")) {
7911 return false;
7912 }
7913 Loc = getLoc();
7914 if (!parseExpr(Op)) {
7915 return false;
7916 }
7917 if (Op < MinVal || Op > MaxVal) {
7918 Error(Loc, ErrMsg);
7919 return false;
7920 }
7921
7922 return true;
7923}
7924
7925bool
7926AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7927 const unsigned MinVal,
7928 const unsigned MaxVal,
7929 const StringRef ErrMsg) {
7930 SMLoc Loc;
7931 for (unsigned i = 0; i < OpNum; ++i) {
7932 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7933 return false;
7934 }
7935
7936 return true;
7937}
7938
7939bool
7940AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7941 using namespace llvm::AMDGPU::Swizzle;
7942
7943 int64_t Lane[LANE_NUM];
7944 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7945 "expected a 2-bit lane id")) {
7947 for (unsigned I = 0; I < LANE_NUM; ++I) {
7948 Imm |= Lane[I] << (LANE_SHIFT * I);
7949 }
7950 return true;
7951 }
7952 return false;
7953}
7954
7955bool
7956AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7957 using namespace llvm::AMDGPU::Swizzle;
7958
7959 SMLoc Loc;
7960 int64_t GroupSize;
7961 int64_t LaneIdx;
7962
7963 if (!parseSwizzleOperand(GroupSize,
7964 2, 32,
7965 "group size must be in the interval [2,32]",
7966 Loc)) {
7967 return false;
7968 }
7969 if (!isPowerOf2_64(GroupSize)) {
7970 Error(Loc, "group size must be a power of two");
7971 return false;
7972 }
7973 if (parseSwizzleOperand(LaneIdx,
7974 0, GroupSize - 1,
7975 "lane id must be in the interval [0,group size - 1]",
7976 Loc)) {
7977 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7978 return true;
7979 }
7980 return false;
7981}
7982
7983bool
7984AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7985 using namespace llvm::AMDGPU::Swizzle;
7986
7987 SMLoc Loc;
7988 int64_t GroupSize;
7989
7990 if (!parseSwizzleOperand(GroupSize,
7991 2, 32,
7992 "group size must be in the interval [2,32]",
7993 Loc)) {
7994 return false;
7995 }
7996 if (!isPowerOf2_64(GroupSize)) {
7997 Error(Loc, "group size must be a power of two");
7998 return false;
7999 }
8000
8001 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8002 return true;
8003}
8004
8005bool
8006AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8007 using namespace llvm::AMDGPU::Swizzle;
8008
8009 SMLoc Loc;
8010 int64_t GroupSize;
8011
8012 if (!parseSwizzleOperand(GroupSize,
8013 1, 16,
8014 "group size must be in the interval [1,16]",
8015 Loc)) {
8016 return false;
8017 }
8018 if (!isPowerOf2_64(GroupSize)) {
8019 Error(Loc, "group size must be a power of two");
8020 return false;
8021 }
8022
8023 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8024 return true;
8025}
8026
8027bool
8028AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8029 using namespace llvm::AMDGPU::Swizzle;
8030
8031 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8032 return false;
8033 }
8034
8035 StringRef Ctl;
8036 SMLoc StrLoc = getLoc();
8037 if (!parseString(Ctl)) {
8038 return false;
8039 }
8040 if (Ctl.size() != BITMASK_WIDTH) {
8041 Error(StrLoc, "expected a 5-character mask");
8042 return false;
8043 }
8044
8045 unsigned AndMask = 0;
8046 unsigned OrMask = 0;
8047 unsigned XorMask = 0;
8048
8049 for (size_t i = 0; i < Ctl.size(); ++i) {
8050 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8051 switch(Ctl[i]) {
8052 default:
8053 Error(StrLoc, "invalid mask");
8054 return false;
8055 case '0':
8056 break;
8057 case '1':
8058 OrMask |= Mask;
8059 break;
8060 case 'p':
8061 AndMask |= Mask;
8062 break;
8063 case 'i':
8064 AndMask |= Mask;
8065 XorMask |= Mask;
8066 break;
8067 }
8068 }
8069
8070 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8071 return true;
8072}
8073
8074bool
8075AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8076
8077 SMLoc OffsetLoc = getLoc();
8078
8079 if (!parseExpr(Imm, "a swizzle macro")) {
8080 return false;
8081 }
8082 if (!isUInt<16>(Imm)) {
8083 Error(OffsetLoc, "expected a 16-bit offset");
8084 return false;
8085 }
8086 return true;
8087}
8088
8089bool
8090AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8091 using namespace llvm::AMDGPU::Swizzle;
8092
8093 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8094
8095 SMLoc ModeLoc = getLoc();
8096 bool Ok = false;
8097
8098 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8099 Ok = parseSwizzleQuadPerm(Imm);
8100 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8101 Ok = parseSwizzleBitmaskPerm(Imm);
8102 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8103 Ok = parseSwizzleBroadcast(Imm);
8104 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8105 Ok = parseSwizzleSwap(Imm);
8106 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8107 Ok = parseSwizzleReverse(Imm);
8108 } else {
8109 Error(ModeLoc, "expected a swizzle mode");
8110 }
8111
8112 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8113 }
8114
8115 return false;
8116}
8117
8118ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8119 SMLoc S = getLoc();
8120 int64_t Imm = 0;
8121
8122 if (trySkipId("offset")) {
8123
8124 bool Ok = false;
8125 if (skipToken(AsmToken::Colon, "expected a colon")) {
8126 if (trySkipId("swizzle")) {
8127 Ok = parseSwizzleMacro(Imm);
8128 } else {
8129 Ok = parseSwizzleOffset(Imm);
8130 }
8131 }
8132
8133 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8134
8136 }
8137 return ParseStatus::NoMatch;
8138}
8139
8140bool
8141AMDGPUOperand::isSwizzle() const {
8142 return isImmTy(ImmTySwizzle);
8143}
8144
8145//===----------------------------------------------------------------------===//
8146// VGPR Index Mode
8147//===----------------------------------------------------------------------===//
8148
8149int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8150
8151 using namespace llvm::AMDGPU::VGPRIndexMode;
8152
8153 if (trySkipToken(AsmToken::RParen)) {
8154 return OFF;
8155 }
8156
8157 int64_t Imm = 0;
8158
8159 while (true) {
8160 unsigned Mode = 0;
8161 SMLoc S = getLoc();
8162
8163 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8164 if (trySkipId(IdSymbolic[ModeId])) {
8165 Mode = 1 << ModeId;
8166 break;
8167 }
8168 }
8169
8170 if (Mode == 0) {
8171 Error(S, (Imm == 0)?
8172 "expected a VGPR index mode or a closing parenthesis" :
8173 "expected a VGPR index mode");
8174 return UNDEF;
8175 }
8176
8177 if (Imm & Mode) {
8178 Error(S, "duplicate VGPR index mode");
8179 return UNDEF;
8180 }
8181 Imm |= Mode;
8182
8183 if (trySkipToken(AsmToken::RParen))
8184 break;
8185 if (!skipToken(AsmToken::Comma,
8186 "expected a comma or a closing parenthesis"))
8187 return UNDEF;
8188 }
8189
8190 return Imm;
8191}
8192
8193ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8194
8195 using namespace llvm::AMDGPU::VGPRIndexMode;
8196
8197 int64_t Imm = 0;
8198 SMLoc S = getLoc();
8199
8200 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8201 Imm = parseGPRIdxMacro();
8202 if (Imm == UNDEF)
8203 return ParseStatus::Failure;
8204 } else {
8205 if (getParser().parseAbsoluteExpression(Imm))
8206 return ParseStatus::Failure;
8207 if (Imm < 0 || !isUInt<4>(Imm))
8208 return Error(S, "invalid immediate: only 4-bit values are legal");
8209 }
8210
8211 Operands.push_back(
8212 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8213 return ParseStatus::Success;
8214}
8215
8216bool AMDGPUOperand::isGPRIdxMode() const {
8217 return isImmTy(ImmTyGprIdxMode);
8218}
8219
8220//===----------------------------------------------------------------------===//
8221// sopp branch targets
8222//===----------------------------------------------------------------------===//
8223
8224ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8225
8226 // Make sure we are not parsing something
8227 // that looks like a label or an expression but is not.
8228 // This will improve error messages.
8229 if (isRegister() || isModifier())
8230 return ParseStatus::NoMatch;
8231
8232 if (!parseExpr(Operands))
8233 return ParseStatus::Failure;
8234
8235 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8236 assert(Opr.isImm() || Opr.isExpr());
8237 SMLoc Loc = Opr.getStartLoc();
8238
8239 // Currently we do not support arbitrary expressions as branch targets.
8240 // Only labels and absolute expressions are accepted.
8241 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8242 Error(Loc, "expected an absolute expression or a label");
8243 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8244 Error(Loc, "expected a 16-bit signed jump offset");
8245 }
8246
8247 return ParseStatus::Success;
8248}
8249
8250//===----------------------------------------------------------------------===//
8251// Boolean holding registers
8252//===----------------------------------------------------------------------===//
8253
8254ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8255 return parseReg(Operands);
8256}
8257
8258//===----------------------------------------------------------------------===//
8259// mubuf
8260//===----------------------------------------------------------------------===//
8261
8262void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8263 const OperandVector &Operands,
8264 bool IsAtomic) {
8265 OptionalImmIndexMap OptionalIdx;
8266 unsigned FirstOperandIdx = 1;
8267 bool IsAtomicReturn = false;
8268
8269 if (IsAtomic) {
8270 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
8272 }
8273
8274 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8275 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8276
8277 // Add the register arguments
8278 if (Op.isReg()) {
8279 Op.addRegOperands(Inst, 1);
8280 // Insert a tied src for atomic return dst.
8281 // This cannot be postponed as subsequent calls to
8282 // addImmOperands rely on correct number of MC operands.
8283 if (IsAtomicReturn && i == FirstOperandIdx)
8284 Op.addRegOperands(Inst, 1);
8285 continue;
8286 }
8287
8288 // Handle the case where soffset is an immediate
8289 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8290 Op.addImmOperands(Inst, 1);
8291 continue;
8292 }
8293
8294 // Handle tokens like 'offen' which are sometimes hard-coded into the
8295 // asm string. There are no MCInst operands for these.
8296 if (Op.isToken()) {
8297 continue;
8298 }
8299 assert(Op.isImm());
8300
8301 // Handle optional arguments
8302 OptionalIdx[Op.getImmTy()] = i;
8303 }
8304
8305 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8306 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8307}
8308
8309//===----------------------------------------------------------------------===//
8310// smrd
8311//===----------------------------------------------------------------------===//
8312
8313bool AMDGPUOperand::isSMRDOffset8() const {
8314 return isImmLiteral() && isUInt<8>(getImm());
8315}
8316
8317bool AMDGPUOperand::isSMEMOffset() const {
8318 // Offset range is checked later by validator.
8319 return isImmLiteral();
8320}
8321
8322bool AMDGPUOperand::isSMRDLiteralOffset() const {
8323 // 32-bit literals are only supported on CI and we only want to use them
8324 // when the offset is > 8-bits.
8325 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8326}
8327
8328//===----------------------------------------------------------------------===//
8329// vop3
8330//===----------------------------------------------------------------------===//
8331
8332static bool ConvertOmodMul(int64_t &Mul) {
8333 if (Mul != 1 && Mul != 2 && Mul != 4)
8334 return false;
8335
8336 Mul >>= 1;
8337 return true;
8338}
8339
8340static bool ConvertOmodDiv(int64_t &Div) {
8341 if (Div == 1) {
8342 Div = 0;
8343 return true;
8344 }
8345
8346 if (Div == 2) {
8347 Div = 3;
8348 return true;
8349 }
8350
8351 return false;
8352}
8353
8354// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8355// This is intentional and ensures compatibility with sp3.
8356// See bug 35397 for details.
8357bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8358 if (BoundCtrl == 0 || BoundCtrl == 1) {
8359 if (!isGFX11Plus())
8360 BoundCtrl = 1;
8361 return true;
8362 }
8363 return false;
8364}
8365
8366void AMDGPUAsmParser::onBeginOfFile() {
8367 if (!getParser().getStreamer().getTargetStreamer() ||
8368 getSTI().getTargetTriple().getArch() == Triple::r600)
8369 return;
8370
8371 if (!getTargetStreamer().getTargetID())
8372 getTargetStreamer().initializeTargetID(getSTI(),
8373 getSTI().getFeatureString());
8374
8375 if (isHsaAbi(getSTI()))
8376 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8377}
8378
8379/// Parse AMDGPU specific expressions.
8380///
8381/// expr ::= or(expr, ...) |
8382/// max(expr, ...)
8383///
8384bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8386
8387 if (isToken(AsmToken::Identifier)) {
8388 StringRef TokenId = getTokenStr();
8389 AGVK VK = StringSwitch<AGVK>(TokenId)
8390 .Case("max", AGVK::AGVK_Max)
8391 .Case("or", AGVK::AGVK_Or)
8392 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
8393 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
8394 .Case("alignto", AGVK::AGVK_AlignTo)
8395 .Case("occupancy", AGVK::AGVK_Occupancy)
8396 .Default(AGVK::AGVK_None);
8397
8398 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
8400 uint64_t CommaCount = 0;
8401 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
8402 lex(); // Eat '('
8403 while (true) {
8404 if (trySkipToken(AsmToken::RParen)) {
8405 if (Exprs.empty()) {
8406 Error(getToken().getLoc(),
8407 "empty " + Twine(TokenId) + " expression");
8408 return true;
8409 }
8410 if (CommaCount + 1 != Exprs.size()) {
8411 Error(getToken().getLoc(),
8412 "mismatch of commas in " + Twine(TokenId) + " expression");
8413 return true;
8414 }
8415 Res = AMDGPUVariadicMCExpr::create(VK, Exprs, getContext());
8416 return false;
8417 }
8418 const MCExpr *Expr;
8419 if (getParser().parseExpression(Expr, EndLoc))
8420 return true;
8421 Exprs.push_back(Expr);
8422 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
8423 if (LastTokenWasComma)
8424 CommaCount++;
8425 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
8426 Error(getToken().getLoc(),
8427 "unexpected token in " + Twine(TokenId) + " expression");
8428 return true;
8429 }
8430 }
8431 }
8432 }
8433 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
8434}
8435
8436ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8437 StringRef Name = getTokenStr();
8438 if (Name == "mul") {
8439 return parseIntWithPrefix("mul", Operands,
8440 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8441 }
8442
8443 if (Name == "div") {
8444 return parseIntWithPrefix("div", Operands,
8445 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8446 }
8447
8448 return ParseStatus::NoMatch;
8449}
8450
8451// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8452// the number of src operands present, then copies that bit into src0_modifiers.
8453static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
8454 int Opc = Inst.getOpcode();
8455 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8456 if (OpSelIdx == -1)
8457 return;
8458
8459 int SrcNum;
8460 const int Ops[] = { AMDGPU::OpName::src0,
8461 AMDGPU::OpName::src1,
8462 AMDGPU::OpName::src2 };
8463 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8464 ++SrcNum)
8465 ;
8466 assert(SrcNum > 0);
8467
8468 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8469
8470 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
8471 if (DstIdx == -1)
8472 return;
8473
8474 const MCOperand &DstOp = Inst.getOperand(DstIdx);
8475 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8476 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8477 if (DstOp.isReg() &&
8478 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
8479 if (AMDGPU::isHi(DstOp.getReg(), MRI))
8480 ModVal |= SISrcMods::DST_OP_SEL;
8481 } else {
8482 if ((OpSel & (1 << SrcNum)) != 0)
8483 ModVal |= SISrcMods::DST_OP_SEL;
8484 }
8485 Inst.getOperand(ModIdx).setImm(ModVal);
8486}
8487
8488void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8489 const OperandVector &Operands) {
8490 cvtVOP3P(Inst, Operands);
8491 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8492}
8493
8494void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8495 OptionalImmIndexMap &OptionalIdx) {
8496 cvtVOP3P(Inst, Operands, OptionalIdx);
8497 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8498}
8499
8500static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8501 return
8502 // 1. This operand is input modifiers
8503 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8504 // 2. This is not last operand
8505 && Desc.NumOperands > (OpNum + 1)
8506 // 3. Next operand is register class
8507 && Desc.operands()[OpNum + 1].RegClass != -1
8508 // 4. Next register is not tied to any other operand
8509 && Desc.getOperandConstraint(OpNum + 1,
8510 MCOI::OperandConstraint::TIED_TO) == -1;
8511}
8512
8513void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8514{
8515 OptionalImmIndexMap OptionalIdx;
8516 unsigned Opc = Inst.getOpcode();
8517
8518 unsigned I = 1;
8519 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8520 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8521 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8522 }
8523
8524 for (unsigned E = Operands.size(); I != E; ++I) {
8525 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8527 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8528 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
8529 Op.isInterpAttrChan()) {
8530 Inst.addOperand(MCOperand::createImm(Op.getImm()));
8531 } else if (Op.isImmModifier()) {
8532 OptionalIdx[Op.getImmTy()] = I;
8533 } else {
8534 llvm_unreachable("unhandled operand type");
8535 }
8536 }
8537
8538 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
8539 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8540 AMDGPUOperand::ImmTyHigh);
8541
8542 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8543 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8544 AMDGPUOperand::ImmTyClampSI);
8545
8546 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8547 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8548 AMDGPUOperand::ImmTyOModSI);
8549}
8550
8551void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8552{
8553 OptionalImmIndexMap OptionalIdx;
8554 unsigned Opc = Inst.getOpcode();
8555
8556 unsigned I = 1;
8557 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8558 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8559 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8560 }
8561
8562 for (unsigned E = Operands.size(); I != E; ++I) {
8563 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8565 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8566 } else if (Op.isImmModifier()) {
8567 OptionalIdx[Op.getImmTy()] = I;
8568 } else {
8569 llvm_unreachable("unhandled operand type");
8570 }
8571 }
8572
8573 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8574
8575 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8576 if (OpSelIdx != -1)
8577 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8578
8579 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8580
8581 if (OpSelIdx == -1)
8582 return;
8583
8584 const int Ops[] = { AMDGPU::OpName::src0,
8585 AMDGPU::OpName::src1,
8586 AMDGPU::OpName::src2 };
8587 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8588 AMDGPU::OpName::src1_modifiers,
8589 AMDGPU::OpName::src2_modifiers };
8590
8591 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8592
8593 for (int J = 0; J < 3; ++J) {
8594 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8595 if (OpIdx == -1)
8596 break;
8597
8598 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8599 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8600
8601 if ((OpSel & (1 << J)) != 0)
8602 ModVal |= SISrcMods::OP_SEL_0;
8603 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8604 (OpSel & (1 << 3)) != 0)
8605 ModVal |= SISrcMods::DST_OP_SEL;
8606
8607 Inst.getOperand(ModIdx).setImm(ModVal);
8608 }
8609}
8610
8611void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8612 OptionalImmIndexMap &OptionalIdx) {
8613 unsigned Opc = Inst.getOpcode();
8614
8615 unsigned I = 1;
8616 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8617 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8618 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8619 }
8620
8621 for (unsigned E = Operands.size(); I != E; ++I) {
8622 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8624 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8625 } else if (Op.isImmModifier()) {
8626 OptionalIdx[Op.getImmTy()] = I;
8627 } else if (Op.isRegOrImm()) {
8628 Op.addRegOrImmOperands(Inst, 1);
8629 } else {
8630 llvm_unreachable("unhandled operand type");
8631 }
8632 }
8633
8634 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
8635 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
8636 Inst.addOperand(Inst.getOperand(0));
8637 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8638 AMDGPUOperand::ImmTyByteSel);
8639 }
8640
8641 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8642 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8643 AMDGPUOperand::ImmTyClampSI);
8644
8645 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8646 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8647 AMDGPUOperand::ImmTyOModSI);
8648
8649 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8650 // it has src2 register operand that is tied to dst operand
8651 // we don't allow modifiers for this operand in assembler so src2_modifiers
8652 // should be 0.
8653 if (isMAC(Opc)) {
8654 auto it = Inst.begin();
8655 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8656 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8657 ++it;
8658 // Copy the operand to ensure it's not invalidated when Inst grows.
8659 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8660 }
8661}
8662
8663void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8664 OptionalImmIndexMap OptionalIdx;
8665 cvtVOP3(Inst, Operands, OptionalIdx);
8666}
8667
8668void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8669 OptionalImmIndexMap &OptIdx) {
8670 const int Opc = Inst.getOpcode();
8671 const MCInstrDesc &Desc = MII.get(Opc);
8672
8673 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8674
8675 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8676 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8677 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
8678 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
8679 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8680 Inst.addOperand(Inst.getOperand(0));
8681 }
8682
8683 // Adding vdst_in operand is already covered for these DPP instructions in
8684 // cvtVOP3DPP.
8685 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
8686 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8687 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8688 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8689 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 ||
8690 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
8691 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
8692 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
8693 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
8694 assert(!IsPacked);
8695 Inst.addOperand(Inst.getOperand(0));
8696 }
8697
8698 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8699 // instruction, and then figure out where to actually put the modifiers
8700
8701 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8702 if (OpSelIdx != -1) {
8703 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8704 }
8705
8706 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8707 if (OpSelHiIdx != -1) {
8708 int DefaultVal = IsPacked ? -1 : 0;
8709 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8710 DefaultVal);
8711 }
8712
8713 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8714 if (NegLoIdx != -1)
8715 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8716
8717 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8718 if (NegHiIdx != -1)
8719 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8720
8721 const int Ops[] = { AMDGPU::OpName::src0,
8722 AMDGPU::OpName::src1,
8723 AMDGPU::OpName::src2 };
8724 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8725 AMDGPU::OpName::src1_modifiers,
8726 AMDGPU::OpName::src2_modifiers };
8727
8728 unsigned OpSel = 0;
8729 unsigned OpSelHi = 0;
8730 unsigned NegLo = 0;
8731 unsigned NegHi = 0;
8732
8733 if (OpSelIdx != -1)
8734 OpSel = Inst.getOperand(OpSelIdx).getImm();
8735
8736 if (OpSelHiIdx != -1)
8737 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8738
8739 if (NegLoIdx != -1)
8740 NegLo = Inst.getOperand(NegLoIdx).getImm();
8741
8742 if (NegHiIdx != -1)
8743 NegHi = Inst.getOperand(NegHiIdx).getImm();
8744
8745 for (int J = 0; J < 3; ++J) {
8746 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8747 if (OpIdx == -1)
8748 break;
8749
8750 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8751
8752 if (ModIdx == -1)
8753 continue;
8754
8755 uint32_t ModVal = 0;
8756
8757 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
8758 if (SrcOp.isReg() && getMRI()
8759 ->getRegClass(AMDGPU::VGPR_16RegClassID)
8760 .contains(SrcOp.getReg())) {
8761 bool VGPRSuffixIsHi = AMDGPU::isHi(SrcOp.getReg(), *getMRI());
8762 if (VGPRSuffixIsHi)
8763 ModVal |= SISrcMods::OP_SEL_0;
8764 } else {
8765 if ((OpSel & (1 << J)) != 0)
8766 ModVal |= SISrcMods::OP_SEL_0;
8767 }
8768
8769 if ((OpSelHi & (1 << J)) != 0)
8770 ModVal |= SISrcMods::OP_SEL_1;
8771
8772 if ((NegLo & (1 << J)) != 0)
8773 ModVal |= SISrcMods::NEG;
8774
8775 if ((NegHi & (1 << J)) != 0)
8776 ModVal |= SISrcMods::NEG_HI;
8777
8778 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8779 }
8780}
8781
8782void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8783 OptionalImmIndexMap OptIdx;
8784 cvtVOP3(Inst, Operands, OptIdx);
8785 cvtVOP3P(Inst, Operands, OptIdx);
8786}
8787
8789 unsigned i, unsigned Opc, unsigned OpName) {
8790 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
8791 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8792 else
8793 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
8794}
8795
8796void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
8797 unsigned Opc = Inst.getOpcode();
8798
8799 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
8800 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
8801 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
8802 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
8803 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
8804
8805 OptionalImmIndexMap OptIdx;
8806 for (unsigned i = 5; i < Operands.size(); ++i) {
8807 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8808 OptIdx[Op.getImmTy()] = i;
8809 }
8810
8811 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
8812 addOptionalImmOperand(Inst, Operands, OptIdx,
8813 AMDGPUOperand::ImmTyIndexKey8bit);
8814
8815 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
8816 addOptionalImmOperand(Inst, Operands, OptIdx,
8817 AMDGPUOperand::ImmTyIndexKey16bit);
8818
8819 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8820 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI);
8821
8822 cvtVOP3P(Inst, Operands, OptIdx);
8823}
8824
8825//===----------------------------------------------------------------------===//
8826// VOPD
8827//===----------------------------------------------------------------------===//
8828
8829ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8830 if (!hasVOPD(getSTI()))
8831 return ParseStatus::NoMatch;
8832
8833 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8834 SMLoc S = getLoc();
8835 lex();
8836 lex();
8837 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8838 SMLoc OpYLoc = getLoc();
8839 StringRef OpYName;
8840 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8841 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8842 return ParseStatus::Success;
8843 }
8844 return Error(OpYLoc, "expected a VOPDY instruction after ::");
8845 }
8846 return ParseStatus::NoMatch;
8847}
8848
8849// Create VOPD MCInst operands using parsed assembler operands.
8850void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8851 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8852 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8853 if (Op.isReg()) {
8854 Op.addRegOperands(Inst, 1);
8855 return;
8856 }
8857 if (Op.isImm()) {
8858 Op.addImmOperands(Inst, 1);
8859 return;
8860 }
8861 llvm_unreachable("Unhandled operand type in cvtVOPD");
8862 };
8863
8864 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8865
8866 // MCInst operands are ordered as follows:
8867 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8868
8869 for (auto CompIdx : VOPD::COMPONENTS) {
8870 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8871 }
8872
8873 for (auto CompIdx : VOPD::COMPONENTS) {
8874 const auto &CInfo = InstInfo[CompIdx];
8875 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8876 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8877 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8878 if (CInfo.hasSrc2Acc())
8879 addOp(CInfo.getIndexOfDstInParsedOperands());
8880 }
8881}
8882
8883//===----------------------------------------------------------------------===//
8884// dpp
8885//===----------------------------------------------------------------------===//
8886
8887bool AMDGPUOperand::isDPP8() const {
8888 return isImmTy(ImmTyDPP8);
8889}
8890
8891bool AMDGPUOperand::isDPPCtrl() const {
8892 using namespace AMDGPU::DPP;
8893
8894 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8895 if (result) {
8896 int64_t Imm = getImm();
8897 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8898 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8899 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8900 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8901 (Imm == DppCtrl::WAVE_SHL1) ||
8902 (Imm == DppCtrl::WAVE_ROL1) ||
8903 (Imm == DppCtrl::WAVE_SHR1) ||
8904 (Imm == DppCtrl::WAVE_ROR1) ||
8905 (Imm == DppCtrl::ROW_MIRROR) ||
8906 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8907 (Imm == DppCtrl::BCAST15) ||
8908 (Imm == DppCtrl::BCAST31) ||
8909 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8910 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8911 }
8912 return false;
8913}
8914
8915//===----------------------------------------------------------------------===//
8916// mAI
8917//===----------------------------------------------------------------------===//
8918
8919bool AMDGPUOperand::isBLGP() const {
8920 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8921}
8922
8923bool AMDGPUOperand::isS16Imm() const {
8924 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8925}
8926
8927bool AMDGPUOperand::isU16Imm() const {
8928 return isImmLiteral() && isUInt<16>(getImm());
8929}
8930
8931//===----------------------------------------------------------------------===//
8932// dim
8933//===----------------------------------------------------------------------===//
8934
8935bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8936 // We want to allow "dim:1D" etc.,
8937 // but the initial 1 is tokenized as an integer.
8938 std::string Token;
8939 if (isToken(AsmToken::Integer)) {
8940 SMLoc Loc = getToken().getEndLoc();
8941 Token = std::string(getTokenStr());
8942 lex();
8943 if (getLoc() != Loc)
8944 return false;
8945 }
8946
8947 StringRef Suffix;
8948 if (!parseId(Suffix))
8949 return false;
8950 Token += Suffix;
8951
8952 StringRef DimId = Token;
8953 if (DimId.starts_with("SQ_RSRC_IMG_"))
8954 DimId = DimId.drop_front(12);
8955
8957 if (!DimInfo)
8958 return false;
8959
8960 Encoding = DimInfo->Encoding;
8961 return true;
8962}
8963
8964ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8965 if (!isGFX10Plus())
8966 return ParseStatus::NoMatch;
8967
8968 SMLoc S = getLoc();
8969
8970 if (!trySkipId("dim", AsmToken::Colon))
8971 return ParseStatus::NoMatch;
8972
8973 unsigned Encoding;
8974 SMLoc Loc = getLoc();
8975 if (!parseDimId(Encoding))
8976 return Error(Loc, "invalid dim value");
8977
8978 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8979 AMDGPUOperand::ImmTyDim));
8980 return ParseStatus::Success;
8981}
8982
8983//===----------------------------------------------------------------------===//
8984// dpp
8985//===----------------------------------------------------------------------===//
8986
8987ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8988 SMLoc S = getLoc();
8989
8990 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8991 return ParseStatus::NoMatch;
8992
8993 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8994
8995 int64_t Sels[8];
8996
8997 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8998 return ParseStatus::Failure;
8999
9000 for (size_t i = 0; i < 8; ++i) {
9001 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9002 return ParseStatus::Failure;
9003
9004 SMLoc Loc = getLoc();
9005 if (getParser().parseAbsoluteExpression(Sels[i]))
9006 return ParseStatus::Failure;
9007 if (0 > Sels[i] || 7 < Sels[i])
9008 return Error(Loc, "expected a 3-bit value");
9009 }
9010
9011 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9012 return ParseStatus::Failure;
9013
9014 unsigned DPP8 = 0;
9015 for (size_t i = 0; i < 8; ++i)
9016 DPP8 |= (Sels[i] << (i * 3));
9017
9018 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9019 return ParseStatus::Success;
9020}
9021
9022bool
9023AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9024 const OperandVector &Operands) {
9025 if (Ctrl == "row_newbcast")
9026 return isGFX90A();
9027
9028 if (Ctrl == "row_share" ||
9029 Ctrl == "row_xmask")
9030 return isGFX10Plus();
9031
9032 if (Ctrl == "wave_shl" ||
9033 Ctrl == "wave_shr" ||
9034 Ctrl == "wave_rol" ||
9035 Ctrl == "wave_ror" ||
9036 Ctrl == "row_bcast")
9037 return isVI() || isGFX9();
9038
9039 return Ctrl == "row_mirror" ||
9040 Ctrl == "row_half_mirror" ||
9041 Ctrl == "quad_perm" ||
9042 Ctrl == "row_shl" ||
9043 Ctrl == "row_shr" ||
9044 Ctrl == "row_ror";
9045}
9046
9047int64_t
9048AMDGPUAsmParser::parseDPPCtrlPerm() {
9049 // quad_perm:[%d,%d,%d,%d]
9050
9051 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9052 return -1;
9053
9054 int64_t Val = 0;
9055 for (int i = 0; i < 4; ++i) {
9056 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9057 return -1;
9058
9059 int64_t Temp;
9060 SMLoc Loc = getLoc();
9061 if (getParser().parseAbsoluteExpression(Temp))
9062 return -1;
9063 if (Temp < 0 || Temp > 3) {
9064 Error(Loc, "expected a 2-bit value");
9065 return -1;
9066 }
9067
9068 Val += (Temp << i * 2);
9069 }
9070
9071 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9072 return -1;
9073
9074 return Val;
9075}
9076
9077int64_t
9078AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9079 using namespace AMDGPU::DPP;
9080
9081 // sel:%d
9082
9083 int64_t Val;
9084 SMLoc Loc = getLoc();
9085
9086 if (getParser().parseAbsoluteExpression(Val))
9087 return -1;
9088
9089 struct DppCtrlCheck {
9090 int64_t Ctrl;
9091 int Lo;
9092 int Hi;
9093 };
9094
9095 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9096 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9097 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9098 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9099 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9100 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9101 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9102 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9103 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9104 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9105 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9106 .Default({-1, 0, 0});
9107
9108 bool Valid;
9109 if (Check.Ctrl == -1) {
9110 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
9111 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9112 } else {
9113 Valid = Check.Lo <= Val && Val <= Check.Hi;
9114 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
9115 }
9116
9117 if (!Valid) {
9118 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
9119 return -1;
9120 }
9121
9122 return Val;
9123}
9124
9125ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
9126 using namespace AMDGPU::DPP;
9127
9128 if (!isToken(AsmToken::Identifier) ||
9129 !isSupportedDPPCtrl(getTokenStr(), Operands))
9130 return ParseStatus::NoMatch;
9131
9132 SMLoc S = getLoc();
9133 int64_t Val = -1;
9135
9136 parseId(Ctrl);
9137
9138 if (Ctrl == "row_mirror") {
9139 Val = DppCtrl::ROW_MIRROR;
9140 } else if (Ctrl == "row_half_mirror") {
9141 Val = DppCtrl::ROW_HALF_MIRROR;
9142 } else {
9143 if (skipToken(AsmToken::Colon, "expected a colon")) {
9144 if (Ctrl == "quad_perm") {
9145 Val = parseDPPCtrlPerm();
9146 } else {
9147 Val = parseDPPCtrlSel(Ctrl);
9148 }
9149 }
9150 }
9151
9152 if (Val == -1)
9153 return ParseStatus::Failure;
9154
9155 Operands.push_back(
9156 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9157 return ParseStatus::Success;
9158}
9159
9160void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
9161 bool IsDPP8) {
9162 OptionalImmIndexMap OptionalIdx;
9163 unsigned Opc = Inst.getOpcode();
9164 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9165
9166 // MAC instructions are special because they have 'old'
9167 // operand which is not tied to dst (but assumed to be).
9168 // They also have dummy unused src2_modifiers.
9169 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
9170 int Src2ModIdx =
9171 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
9172 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9173 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
9174
9175 unsigned I = 1;
9176 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9177 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9178 }
9179
9180 int Fi = 0;
9181 for (unsigned E = Operands.size(); I != E; ++I) {
9182
9183 if (IsMAC) {
9184 int NumOperands = Inst.getNumOperands();
9185 if (OldIdx == NumOperands) {
9186 // Handle old operand
9187 constexpr int DST_IDX = 0;
9188 Inst.addOperand(Inst.getOperand(DST_IDX));
9189 } else if (Src2ModIdx == NumOperands) {
9190 // Add unused dummy src2_modifiers
9192 }
9193 }
9194
9195 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9196 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
9197 Inst.addOperand(Inst.getOperand(0));
9198 }
9199
9200 bool IsVOP3CvtSrDpp =
9201 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9202 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9203 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9204 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9205 if (IsVOP3CvtSrDpp) {
9206 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
9209 }
9210 }
9211
9212 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9214 if (TiedTo != -1) {
9215 assert((unsigned)TiedTo < Inst.getNumOperands());
9216 // handle tied old or src2 for MAC instructions
9217 Inst.addOperand(Inst.getOperand(TiedTo));
9218 }
9219 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9220 // Add the register arguments
9221 if (IsDPP8 && Op.isDppFI()) {
9222 Fi = Op.getImm();
9223 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9224 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9225 } else if (Op.isReg()) {
9226 Op.addRegOperands(Inst, 1);
9227 } else if (Op.isImm() &&
9228 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
9229 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
9230 Op.addImmOperands(Inst, 1);
9231 } else if (Op.isImm()) {
9232 OptionalIdx[Op.getImmTy()] = I;
9233 } else {
9234 llvm_unreachable("unhandled operand type");
9235 }
9236 }
9237
9238 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel))
9239 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9240 AMDGPUOperand::ImmTyByteSel);
9241
9242 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9243 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
9244
9245 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9246 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
9247
9248 if (Desc.TSFlags & SIInstrFlags::VOP3P)
9249 cvtVOP3P(Inst, Operands, OptionalIdx);
9250 else if (Desc.TSFlags & SIInstrFlags::VOP3)
9251 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
9252 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9253 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9254 }
9255
9256 if (IsDPP8) {
9257 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
9258 using namespace llvm::AMDGPU::DPP;
9259 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9260 } else {
9261 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
9262 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9263 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9264 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9265
9266 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
9267 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9268 AMDGPUOperand::ImmTyDppFI);
9269 }
9270}
9271
9272void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
9273 OptionalImmIndexMap OptionalIdx;
9274
9275 unsigned I = 1;
9276 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9277 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9278 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9279 }
9280
9281 int Fi = 0;
9282 for (unsigned E = Operands.size(); I != E; ++I) {
9283 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9285 if (TiedTo != -1) {
9286 assert((unsigned)TiedTo < Inst.getNumOperands());
9287 // handle tied old or src2 for MAC instructions
9288 Inst.addOperand(Inst.getOperand(TiedTo));
9289 }
9290 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9291 // Add the register arguments
9292 if (Op.isReg() && validateVccOperand(Op.getReg())) {
9293 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
9294 // Skip it.
9295 continue;
9296 }
9297
9298 if (IsDPP8) {
9299 if (Op.isDPP8()) {
9300 Op.addImmOperands(Inst, 1);
9301 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9302 Op.addRegWithFPInputModsOperands(Inst, 2);
9303 } else if (Op.isDppFI()) {
9304 Fi = Op.getImm();
9305 } else if (Op.isReg()) {
9306 Op.addRegOperands(Inst, 1);
9307 } else {
9308 llvm_unreachable("Invalid operand type");
9309 }
9310 } else {
9312 Op.addRegWithFPInputModsOperands(Inst, 2);
9313 } else if (Op.isReg()) {
9314 Op.addRegOperands(Inst, 1);
9315 } else if (Op.isDPPCtrl()) {
9316 Op.addImmOperands(Inst, 1);
9317 } else if (Op.isImm()) {
9318 // Handle optional arguments
9319 OptionalIdx[Op.getImmTy()] = I;
9320 } else {
9321 llvm_unreachable("Invalid operand type");
9322 }
9323 }
9324 }
9325
9326 if (IsDPP8) {
9327 using namespace llvm::AMDGPU::DPP;
9328 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9329 } else {
9330 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9331 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9332 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9333 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
9334 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9335 AMDGPUOperand::ImmTyDppFI);
9336 }
9337 }
9338}
9339
9340//===----------------------------------------------------------------------===//
9341// sdwa
9342//===----------------------------------------------------------------------===//
9343
9344ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9345 StringRef Prefix,
9346 AMDGPUOperand::ImmTy Type) {
9347 using namespace llvm::AMDGPU::SDWA;
9348
9349 SMLoc S = getLoc();
9351
9352 SMLoc StringLoc;
9353 ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
9354 if (!Res.isSuccess())
9355 return Res;
9356
9357 int64_t Int;
9359 .Case("BYTE_0", SdwaSel::BYTE_0)
9360 .Case("BYTE_1", SdwaSel::BYTE_1)
9361 .Case("BYTE_2", SdwaSel::BYTE_2)
9362 .Case("BYTE_3", SdwaSel::BYTE_3)
9363 .Case("WORD_0", SdwaSel::WORD_0)
9364 .Case("WORD_1", SdwaSel::WORD_1)
9365 .Case("DWORD", SdwaSel::DWORD)
9366 .Default(0xffffffff);
9367
9368 if (Int == 0xffffffff)
9369 return Error(StringLoc, "invalid " + Twine(Prefix) + " value");
9370
9371 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
9372 return ParseStatus::Success;
9373}
9374
9375ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9376 using namespace llvm::AMDGPU::SDWA;
9377
9378 SMLoc S = getLoc();
9380
9381 SMLoc StringLoc;
9382 ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc);
9383 if (!Res.isSuccess())
9384 return Res;
9385
9386 int64_t Int;
9388 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
9389 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
9390 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
9391 .Default(0xffffffff);
9392
9393 if (Int == 0xffffffff)
9394 return Error(StringLoc, "invalid dst_unused value");
9395
9396 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused));
9397 return ParseStatus::Success;
9398}
9399
9400void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9401 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
9402}
9403
9404void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9405 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
9406}
9407
9408void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9409 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
9410}
9411
9412void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9413 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
9414}
9415
9416void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9417 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9418}
9419
9420void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9421 uint64_t BasicInstType,
9422 bool SkipDstVcc,
9423 bool SkipSrcVcc) {
9424 using namespace llvm::AMDGPU::SDWA;
9425
9426 OptionalImmIndexMap OptionalIdx;
9427 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9428 bool SkippedVcc = false;
9429
9430 unsigned I = 1;
9431 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9432 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9433 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9434 }
9435
9436 for (unsigned E = Operands.size(); I != E; ++I) {
9437 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9438 if (SkipVcc && !SkippedVcc && Op.isReg() &&
9439 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9440 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9441 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9442 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9443 // Skip VCC only if we didn't skip it on previous iteration.
9444 // Note that src0 and src1 occupy 2 slots each because of modifiers.
9445 if (BasicInstType == SIInstrFlags::VOP2 &&
9446 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9447 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9448 SkippedVcc = true;
9449 continue;
9450 } else if (BasicInstType == SIInstrFlags::VOPC &&
9451 Inst.getNumOperands() == 0) {
9452 SkippedVcc = true;
9453 continue;
9454 }
9455 }
9457 Op.addRegOrImmWithInputModsOperands(Inst, 2);
9458 } else if (Op.isImm()) {
9459 // Handle optional arguments
9460 OptionalIdx[Op.getImmTy()] = I;
9461 } else {
9462 llvm_unreachable("Invalid operand type");
9463 }
9464 SkippedVcc = false;
9465 }
9466
9467 const unsigned Opc = Inst.getOpcode();
9468 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9469 Opc != AMDGPU::V_NOP_sdwa_vi) {
9470 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9471 switch (BasicInstType) {
9472 case SIInstrFlags::VOP1:
9473 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9474 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9475 AMDGPUOperand::ImmTyClampSI, 0);
9476
9477 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9478 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9479 AMDGPUOperand::ImmTyOModSI, 0);
9480
9481 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
9482 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9483 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9484
9485 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
9486 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9487 AMDGPUOperand::ImmTySDWADstUnused,
9488 DstUnused::UNUSED_PRESERVE);
9489
9490 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9491 break;
9492
9493 case SIInstrFlags::VOP2:
9494 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9495
9496 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
9497 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9498
9499 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9500 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
9501 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9502 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9503 break;
9504
9505 case SIInstrFlags::VOPC:
9506 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
9507 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9508 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9509 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9510 break;
9511
9512 default:
9513 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9514 }
9515 }
9516
9517 // special case v_mac_{f16, f32}:
9518 // it has src2 register operand that is tied to dst operand
9519 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9520 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9521 auto it = Inst.begin();
9522 std::advance(
9523 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9524 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9525 }
9526}
9527
9528/// Force static initialization.
9532}
9533
9534#define GET_REGISTER_MATCHER
9535#define GET_MATCHER_IMPLEMENTATION
9536#define GET_MNEMONIC_SPELL_CHECKER
9537#define GET_MNEMONIC_CHECKER
9538#include "AMDGPUGenAsmMatcher.inc"
9539
9540ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9541 unsigned MCK) {
9542 switch (MCK) {
9543 case MCK_addr64:
9544 return parseTokenOp("addr64", Operands);
9545 case MCK_done:
9546 return parseTokenOp("done", Operands);
9547 case MCK_idxen:
9548 return parseTokenOp("idxen", Operands);
9549 case MCK_lds:
9550 return parseTokenOp("lds", Operands);
9551 case MCK_offen:
9552 return parseTokenOp("offen", Operands);
9553 case MCK_off:
9554 return parseTokenOp("off", Operands);
9555 case MCK_row_95_en:
9556 return parseTokenOp("row_en", Operands);
9557 case MCK_gds:
9558 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
9559 case MCK_tfe:
9560 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
9561 }
9562 return tryCustomParseOperand(Operands, MCK);
9563}
9564
9565// This function should be defined after auto-generated include so that we have
9566// MatchClassKind enum defined
9567unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9568 unsigned Kind) {
9569 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9570 // But MatchInstructionImpl() expects to meet token and fails to validate
9571 // operand. This method checks if we are given immediate operand but expect to
9572 // get corresponding token.
9573 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9574 switch (Kind) {
9575 case MCK_addr64:
9576 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9577 case MCK_gds:
9578 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9579 case MCK_lds:
9580 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9581 case MCK_idxen:
9582 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9583 case MCK_offen:
9584 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9585 case MCK_tfe:
9586 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9587 case MCK_SSrc_b32:
9588 // When operands have expression values, they will return true for isToken,
9589 // because it is not possible to distinguish between a token and an
9590 // expression at parse time. MatchInstructionImpl() will always try to
9591 // match an operand as a token, when isToken returns true, and when the
9592 // name of the expression is not a valid token, the match will fail,
9593 // so we need to handle it here.
9594 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9595 case MCK_SSrc_f32:
9596 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9597 case MCK_SOPPBrTarget:
9598 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9599 case MCK_VReg32OrOff:
9600 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9601 case MCK_InterpSlot:
9602 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9603 case MCK_InterpAttr:
9604 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9605 case MCK_InterpAttrChan:
9606 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9607 case MCK_SReg_64:
9608 case MCK_SReg_64_XEXEC:
9609 // Null is defined as a 32-bit register but
9610 // it should also be enabled with 64-bit operands.
9611 // The following code enables it for SReg_64 operands
9612 // used as source and destination. Remaining source
9613 // operands are handled in isInlinableImm.
9614 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9615 default:
9616 return Match_InvalidOperand;
9617 }
9618}
9619
9620//===----------------------------------------------------------------------===//
9621// endpgm
9622//===----------------------------------------------------------------------===//
9623
9624ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9625 SMLoc S = getLoc();
9626 int64_t Imm = 0;
9627
9628 if (!parseExpr(Imm)) {
9629 // The operand is optional, if not present default to 0
9630 Imm = 0;
9631 }
9632
9633 if (!isUInt<16>(Imm))
9634 return Error(S, "expected a 16-bit value");
9635
9636 Operands.push_back(
9637 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9638 return ParseStatus::Success;
9639}
9640
9641bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9642
9643//===----------------------------------------------------------------------===//
9644// Split Barrier
9645//===----------------------------------------------------------------------===//
9646
9647bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static unsigned getSpecialRegForName(StringRef RegName)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, unsigned OpName)
static bool IsRevOpcode(const unsigned Opcode)
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_READNONE
Definition: Compiler.h:220
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
@ Default
Definition: DwarfDebug.cpp:87
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
Definition: InlineInfo.cpp:180
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
LLVMContext & Context
#define P(N)
#define G_00B848_FWD_PROGRESS(x)
Definition: SIDefines.h:1154
#define G_00B848_MEM_ORDERED(x)
Definition: SIDefines.h:1151
#define G_00B848_IEEE_MODE(x)
Definition: SIDefines.h:1145
#define G_00B848_DX10_CLAMP(x)
Definition: SIDefines.h:1136
#define G_00B848_WGP_MODE(x)
Definition: SIDefines.h:1148
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
unsigned unsigned DefaultVal
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
static bool Enabled
Definition: Statistic.cpp:46
StringSet - A set-like wrapper for the StringMap.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
BinaryOperator * Mul
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
support::ulittle16_t & Hi
Definition: aarch32.cpp:205
static const AMDGPUVariadicMCExpr * create(VariadicKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5191
Class for arbitrary precision integers.
Definition: APInt.h:76
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmMacro.h:110
bool is(TokenKind K) const
Definition: MCAsmMacro.h:82
TokenKind getKind() const
Definition: MCAsmMacro.h:81
This class represents an Operation in the Expression.
Register getReg() const
Base class for user error types.
Definition: Error.h:352
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:474
Class representing an expression and its matching format.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:123
virtual MCStreamer & getStreamer()=0
Return the output streamer for the assembler.
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
Context object for machine code objects.
Definition: MCContext.h:81
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:439
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:201
const MCSubtargetInfo * getSubtargetInfo() const
Definition: MCContext.h:443
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
SMLoc getLoc() const
Definition: MCInst.h:204
void setLoc(SMLoc loc)
Definition: MCInst.h:203
unsigned getOpcode() const
Definition: MCInst.h:198
iterator insert(iterator I, const MCOperand &Op)
Definition: MCInst.h:224
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
iterator begin()
Definition: MCInst.h:219
size_t size() const
Definition: MCInst.h:218
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:444
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
void setImm(int64_t Val)
Definition: MCInst.h:85
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
void setReg(unsigned Reg)
Set the register number.
Definition: MCInst.h:75
int64_t getImm() const
Definition: MCInst.h:80
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
bool isImm() const
Definition: MCInst.h:62
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
bool isReg() const
Definition: MCInst.h:61
bool isExpr() const
Definition: MCInst.h:65
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual bool isReg() const =0
isReg - Is this a register operand?
virtual bool isMem() const =0
isMem - Is this a memory operand?
virtual MCRegister getReg() const =0
virtual bool isToken() const =0
isToken - Is this a token operand?
virtual bool isImm() const =0
isImm - Is this an immediate operand?
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
Streaming machine code generation interface.
Definition: MCStreamer.h:212
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Definition: MCStreamer.h:301
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:40
void setVariableValue(const MCExpr *Value)
Definition: MCSymbol.cpp:47
MCTargetAsmParser - Generic interface to target specific assembly parsers.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
const MCInstrInfo & MII
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn't/can't handle effec...
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
ParseInstruction - Parse one assembly instruction.
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
MatchAndEmitInstruction - Recognize a series of operands of a parsed instruction as an actual MCInst ...
Target specific streamer interface.
Definition: MCStreamer.h:93
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Represents a location in source code.
Definition: SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
constexpr const char * getPointer() const
Definition: SMLoc.h:34
constexpr bool isValid() const
Definition: SMLoc.h:29
Represents a range in source code.
Definition: SMLoc.h:48
SMLoc Start
Definition: SMLoc.h:50
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:290
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Register getReg() const
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:845
StringMapEntry - This is used to represent one value that is inserted into a StringMap.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition: StringRef.h:647
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:601
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:269
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
bool contains(StringRef key) const
Check if the set contains the given key.
Definition: StringSet.h:55
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:690
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
Key
PAL metadata keys.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned COMPONENTS[]
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool isGFX9(const MCSubtargetInfo &STI)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition: SIDefines.h:201
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:211
@ OPERAND_REG_INLINE_C_V2INT32
Definition: SIDefines.h:227
@ OPERAND_REG_INLINE_C_FP64
Definition: SIDefines.h:223
@ OPERAND_REG_INLINE_C_BF16
Definition: SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2BF16
Definition: SIDefines.h:225
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:212
@ OPERAND_REG_IMM_BF16
Definition: SIDefines.h:205
@ OPERAND_REG_INLINE_AC_V2FP16
Definition: SIDefines.h:246
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
Definition: SIDefines.h:200
@ OPERAND_REG_IMM_V2BF16
Definition: SIDefines.h:210
@ OPERAND_REG_IMM_BF16_DEFERRED
Definition: SIDefines.h:207
@ OPERAND_REG_IMM_FP16
Definition: SIDefines.h:206
@ OPERAND_REG_INLINE_C_INT64
Definition: SIDefines.h:219
@ OPERAND_REG_INLINE_AC_BF16
Definition: SIDefines.h:240
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition: SIDefines.h:217
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
Definition: SIDefines.h:238
@ OPERAND_REG_IMM_FP64
Definition: SIDefines.h:204
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:226
@ OPERAND_REG_INLINE_AC_V2INT16
Definition: SIDefines.h:244
@ OPERAND_REG_INLINE_AC_FP16
Definition: SIDefines.h:241
@ OPERAND_REG_INLINE_AC_INT32
Definition: SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition: SIDefines.h:242
@ OPERAND_REG_INLINE_AC_V2BF16
Definition: SIDefines.h:245
@ OPERAND_REG_IMM_V2INT32
Definition: SIDefines.h:213
@ OPERAND_REG_IMM_FP32
Definition: SIDefines.h:203
@ OPERAND_INPUT_MODS
Definition: SIDefines.h:251
@ OPERAND_REG_INLINE_C_FP32
Definition: SIDefines.h:222
@ OPERAND_REG_INLINE_C_INT32
Definition: SIDefines.h:218
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:224
@ OPERAND_REG_IMM_V2FP32
Definition: SIDefines.h:214
@ OPERAND_REG_INLINE_AC_FP64
Definition: SIDefines.h:243
@ OPERAND_REG_INLINE_C_FP16
Definition: SIDefines.h:221
@ OPERAND_REG_IMM_INT16
Definition: SIDefines.h:202
@ OPERAND_REG_INLINE_C_V2FP32
Definition: SIDefines.h:228
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition: SIDefines.h:231
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isVI(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1336
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:212
const uint64_t Version
Definition: InstrProf.h:1177
@ OPERAND_IMMEDIATE
Definition: MCInstrDesc.h:60
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Reg
All possible values of the reg field in the ModR/M byte.
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
Format
The format used for serializing/deserializing remarks.
Definition: RemarkFormat.h:25
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition: Error.h:1071
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition: Alignment.h:217
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:428
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:239
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
void PrintError(const Twine &Msg)
Definition: Error.cpp:101
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
Target & getTheR600Target()
The target for R600 GPUs.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:138
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:177
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:143
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:244
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
unsigned M0(unsigned Val)
Definition: VE.h:375
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
bool parseAmdKernelCodeField(StringRef ID, MCAsmParser &Parser, amd_kernel_code_t &C, raw_ostream &Err)
#define N
RegisterKind Kind
StringLiteral Name
AMD Kernel Code Object (amd_kernel_code_t).
Instruction set architecture version.
Definition: TargetParser.h:125
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
Represents the counter values to wait for in an s_waitcnt instruction.
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:249
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:247
static const fltSemantics & BFloat() LLVM_READNONE
Definition: APFloat.cpp:248
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:246
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...