LLVM 23.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/MC/MCSymbol.h"
47#include <optional>
48
49using namespace llvm;
50using namespace llvm::AMDGPU;
51using namespace llvm::amdhsa;
52
53namespace {
54
55class AMDGPUAsmParser;
56
57enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
58
59//===----------------------------------------------------------------------===//
60// Operand
61//===----------------------------------------------------------------------===//
62
63class AMDGPUOperand : public MCParsedAsmOperand {
64 enum KindTy {
65 Token,
66 Immediate,
67 Register,
68 Expression
69 } Kind;
70
71 SMLoc StartLoc, EndLoc;
72 const AMDGPUAsmParser *AsmParser;
73
74public:
75 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
76 : Kind(Kind_), AsmParser(AsmParser_) {}
77
78 using Ptr = std::unique_ptr<AMDGPUOperand>;
79
80 struct Modifiers {
81 bool Abs = false;
82 bool Neg = false;
83 bool Sext = false;
84 LitModifier Lit = LitModifier::None;
85
86 bool hasFPModifiers() const { return Abs || Neg; }
87 bool hasIntModifiers() const { return Sext; }
88 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
89 bool isForcedLit() const { return Lit == LitModifier::Lit; }
90 bool isForcedLit64() const { return Lit == LitModifier::Lit64; }
91
92 int64_t getFPModifiersOperand() const {
93 int64_t Operand = 0;
94 Operand |= Abs ? SISrcMods::ABS : 0u;
95 Operand |= Neg ? SISrcMods::NEG : 0u;
96 return Operand;
97 }
98
99 int64_t getIntModifiersOperand() const {
100 int64_t Operand = 0;
101 Operand |= Sext ? SISrcMods::SEXT : 0u;
102 return Operand;
103 }
104
105 int64_t getModifiersOperand() const {
106 assert(!(hasFPModifiers() && hasIntModifiers())
107 && "fp and int modifiers should not be used simultaneously");
108 if (hasFPModifiers())
109 return getFPModifiersOperand();
110 if (hasIntModifiers())
111 return getIntModifiersOperand();
112 return 0;
113 }
114
115 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
116 };
117
118 enum ImmTy {
119 ImmTyNone,
120 ImmTyGDS,
121 ImmTyLDS,
122 ImmTyOffen,
123 ImmTyIdxen,
124 ImmTyAddr64,
125 ImmTyOffset,
126 ImmTyInstOffset,
127 ImmTyOffset0,
128 ImmTyOffset1,
129 ImmTySMEMOffsetMod,
130 ImmTyCPol,
131 ImmTyTFE,
132 ImmTyIsAsync,
133 ImmTyD16,
134 ImmTyClamp,
135 ImmTyOModSI,
136 ImmTySDWADstSel,
137 ImmTySDWASrc0Sel,
138 ImmTySDWASrc1Sel,
139 ImmTySDWADstUnused,
140 ImmTyDMask,
141 ImmTyDim,
142 ImmTyUNorm,
143 ImmTyDA,
144 ImmTyR128A16,
145 ImmTyA16,
146 ImmTyLWE,
147 ImmTyExpTgt,
148 ImmTyExpCompr,
149 ImmTyExpVM,
150 ImmTyDone,
151 ImmTyRowEn,
152 ImmTyFORMAT,
153 ImmTyHwreg,
154 ImmTyOff,
155 ImmTySendMsg,
156 ImmTyWaitEvent,
157 ImmTyInterpSlot,
158 ImmTyInterpAttr,
159 ImmTyInterpAttrChan,
160 ImmTyOpSel,
161 ImmTyOpSelHi,
162 ImmTyNegLo,
163 ImmTyNegHi,
164 ImmTyIndexKey8bit,
165 ImmTyIndexKey16bit,
166 ImmTyIndexKey32bit,
167 ImmTyDPP8,
168 ImmTyDppCtrl,
169 ImmTyDppRowMask,
170 ImmTyDppBankMask,
171 ImmTyDppBoundCtrl,
172 ImmTyDppFI,
173 ImmTySwizzle,
174 ImmTyGprIdxMode,
175 ImmTyHigh,
176 ImmTyBLGP,
177 ImmTyCBSZ,
178 ImmTyABID,
179 ImmTyEndpgm,
180 ImmTyWaitVDST,
181 ImmTyWaitEXP,
182 ImmTyWaitVAVDst,
183 ImmTyWaitVMVSrc,
184 ImmTyBitOp3,
185 ImmTyMatrixAFMT,
186 ImmTyMatrixBFMT,
187 ImmTyMatrixAScale,
188 ImmTyMatrixBScale,
189 ImmTyMatrixAScaleFmt,
190 ImmTyMatrixBScaleFmt,
191 ImmTyMatrixAReuse,
192 ImmTyMatrixBReuse,
193 ImmTyScaleSel,
194 ImmTyByteSel,
195 };
196
197private:
198 struct TokOp {
199 const char *Data;
200 unsigned Length;
201 };
202
203 struct ImmOp {
204 int64_t Val;
205 ImmTy Type;
206 bool IsFPImm;
207 Modifiers Mods;
208 };
209
210 struct RegOp {
211 MCRegister RegNo;
212 Modifiers Mods;
213 };
214
215 union {
216 TokOp Tok;
217 ImmOp Imm;
218 RegOp Reg;
219 const MCExpr *Expr;
220 };
221
222 // The index of the associated MCInst operand.
223 mutable int MCOpIdx = -1;
224
225public:
226 bool isToken() const override { return Kind == Token; }
227
228 bool isSymbolRefExpr() const {
229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230 }
231
232 bool isImm() const override {
233 return Kind == Immediate;
234 }
235
236 bool isInlinableImm(MVT type) const;
237 bool isLiteralImm(MVT type) const;
238
239 bool isRegKind() const {
240 return Kind == Register;
241 }
242
243 bool isReg() const override {
244 return isRegKind() && !hasModifiers();
245 }
246
247 bool isRegOrInline(unsigned RCID, MVT type) const {
248 return isRegClass(RCID) || isInlinableImm(type);
249 }
250
251 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
252 return isRegOrInline(RCID, type) || isLiteralImm(type);
253 }
254
255 bool isRegOrImmWithInt16InputMods() const {
256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
257 }
258
259 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
261 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
262 }
263
264 bool isRegOrImmWithInt32InputMods() const {
265 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
266 }
267
268 bool isRegOrInlineImmWithInt16InputMods() const {
269 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
270 }
271
272 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
273 return isRegOrInline(
274 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
275 }
276
277 bool isRegOrInlineImmWithInt32InputMods() const {
278 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
279 }
280
281 bool isRegOrImmWithInt64InputMods() const {
282 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
283 }
284
285 bool isRegOrImmWithFP16InputMods() const {
286 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
287 }
288
289 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
291 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
292 }
293
294 bool isRegOrImmWithFP32InputMods() const {
295 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
296 }
297
298 bool isRegOrImmWithFP64InputMods() const {
299 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
300 }
301
302 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
303 return isRegOrInline(
304 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
305 }
306
307 bool isRegOrInlineImmWithFP32InputMods() const {
308 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
309 }
310
311 bool isRegOrInlineImmWithFP64InputMods() const {
312 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
313 }
314
315 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
316
317 bool isVRegWithFP32InputMods() const {
318 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
319 }
320
321 bool isVRegWithFP64InputMods() const {
322 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
323 }
324
325 bool isPackedFP16InputMods() const {
326 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
327 }
328
329 bool isPackedVGPRFP32InputMods() const {
330 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
331 }
332
333 bool isVReg() const {
334 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
335 isRegClass(AMDGPU::VReg_64RegClassID) ||
336 isRegClass(AMDGPU::VReg_96RegClassID) ||
337 isRegClass(AMDGPU::VReg_128RegClassID) ||
338 isRegClass(AMDGPU::VReg_160RegClassID) ||
339 isRegClass(AMDGPU::VReg_192RegClassID) ||
340 isRegClass(AMDGPU::VReg_256RegClassID) ||
341 isRegClass(AMDGPU::VReg_512RegClassID) ||
342 isRegClass(AMDGPU::VReg_1024RegClassID);
343 }
344
345 bool isVReg32() const {
346 return isRegClass(AMDGPU::VGPR_32RegClassID);
347 }
348
349 bool isVReg32OrOff() const {
350 return isOff() || isVReg32();
351 }
352
353 bool isNull() const {
354 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
355 }
356
357 bool isAV_LdSt_32_Align2_RegOp() const {
358 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
359 isRegClass(AMDGPU::AGPR_32RegClassID);
360 }
361
362 bool isVRegWithInputMods() const;
363 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
364 template <bool IsFake16> bool isT16VRegWithInputMods() const;
365
366 bool isSDWAOperand(MVT type) const;
367 bool isSDWAFP16Operand() const;
368 bool isSDWAFP32Operand() const;
369 bool isSDWAInt16Operand() const;
370 bool isSDWAInt32Operand() const;
371
372 bool isImmTy(ImmTy ImmT) const {
373 return isImm() && Imm.Type == ImmT;
374 }
375
376 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
377
378 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
379
380 bool isImmModifier() const {
381 return isImm() && Imm.Type != ImmTyNone;
382 }
383
384 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
385 bool isDim() const { return isImmTy(ImmTyDim); }
386 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
387 bool isOff() const { return isImmTy(ImmTyOff); }
388 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
389 bool isOffen() const { return isImmTy(ImmTyOffen); }
390 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
391 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
392 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
393 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
394 bool isGDS() const { return isImmTy(ImmTyGDS); }
395 bool isLDS() const { return isImmTy(ImmTyLDS); }
396 bool isCPol() const { return isImmTy(ImmTyCPol); }
397 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
398 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
399 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
400 bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
401 bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
402 bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
403 bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
404 bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
405 bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
406 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
407 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
408 bool isTFE() const { return isImmTy(ImmTyTFE); }
409 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
410 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
411 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
412 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
413 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
414 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
415 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
416 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
417 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
418 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
419 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
420 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
421 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
422 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
423 bool isDone() const { return isImmTy(ImmTyDone); }
424 bool isRowEn() const { return isImmTy(ImmTyRowEn); }
425
426 bool isRegOrImm() const {
427 return isReg() || isImm();
428 }
429
430 bool isRegClass(unsigned RCID) const;
431
432 bool isInlineValue() const;
433
434 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
435 return isRegOrInline(RCID, type) && !hasModifiers();
436 }
437
438 bool isSCSrcB16() const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
440 }
441
442 bool isSCSrcV2B16() const {
443 return isSCSrcB16();
444 }
445
446 bool isSCSrc_b32() const {
447 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
448 }
449
450 bool isSCSrc_b64() const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
452 }
453
454 bool isBoolReg() const;
455
456 bool isSCSrcF16() const {
457 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
458 }
459
460 bool isSCSrcV2F16() const {
461 return isSCSrcF16();
462 }
463
464 bool isSCSrcF32() const {
465 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
466 }
467
468 bool isSCSrcF64() const {
469 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
470 }
471
472 bool isSSrc_b32() const {
473 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
474 }
475
476 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
477
478 bool isSSrcV2B16() const {
479 llvm_unreachable("cannot happen");
480 return isSSrc_b16();
481 }
482
483 bool isSSrc_b64() const {
484 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
485 // See isVSrc64().
486 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
487 (((const MCTargetAsmParser *)AsmParser)
488 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
489 isExpr());
490 }
491
492 bool isSSrc_f32() const {
493 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
494 }
495
496 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
497
498 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
499
500 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
501
502 bool isSSrcV2F16() const {
503 llvm_unreachable("cannot happen");
504 return isSSrc_f16();
505 }
506
507 bool isSSrcV2FP32() const {
508 llvm_unreachable("cannot happen");
509 return isSSrc_f32();
510 }
511
512 bool isSCSrcV2FP32() const {
513 llvm_unreachable("cannot happen");
514 return isSCSrcF32();
515 }
516
517 bool isSSrcV2INT32() const {
518 llvm_unreachable("cannot happen");
519 return isSSrc_b32();
520 }
521
522 bool isSCSrcV2INT32() const {
523 llvm_unreachable("cannot happen");
524 return isSCSrc_b32();
525 }
526
527 bool isSSrcOrLds_b32() const {
528 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
529 isLiteralImm(MVT::i32) || isExpr();
530 }
531
532 bool isVCSrc_b32() const {
533 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
534 }
535
536 bool isVCSrc_b32_Lo256() const {
537 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
538 }
539
540 bool isVCSrc_b64_Lo256() const {
541 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
542 }
543
544 bool isVCSrc_b64() const {
545 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
546 }
547
548 bool isVCSrcT_b16() const {
549 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
550 }
551
552 bool isVCSrcTB16_Lo128() const {
553 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
554 }
555
556 bool isVCSrcFake16B16_Lo128() const {
557 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
558 }
559
560 bool isVCSrc_b16() const {
561 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
562 }
563
564 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
565
566 bool isVCSrc_f32() const {
567 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
568 }
569
570 bool isVCSrc_f64() const {
571 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
572 }
573
574 bool isVCSrcTBF16() const {
575 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
576 }
577
578 bool isVCSrcT_f16() const {
579 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
580 }
581
582 bool isVCSrcT_bf16() const {
583 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
584 }
585
586 bool isVCSrcTBF16_Lo128() const {
587 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
588 }
589
590 bool isVCSrcTF16_Lo128() const {
591 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
592 }
593
594 bool isVCSrcFake16BF16_Lo128() const {
595 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
596 }
597
598 bool isVCSrcFake16F16_Lo128() const {
599 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
600 }
601
602 bool isVCSrc_bf16() const {
603 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
604 }
605
606 bool isVCSrc_f16() const {
607 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
608 }
609
610 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
611
612 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
613
614 bool isVSrc_b32() const {
615 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
616 }
617
618 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
619
620 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
621
622 bool isVSrcT_b16_Lo128() const {
623 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
624 }
625
626 bool isVSrcFake16_b16_Lo128() const {
627 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
628 }
629
630 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
631
632 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
633
634 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
635
636 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
637
638 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
639
640 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
641
642 bool isVSrc_f32() const {
643 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
644 }
645
646 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
647
648 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
649
650 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
651
652 bool isVSrcT_bf16_Lo128() const {
653 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
654 }
655
656 bool isVSrcT_f16_Lo128() const {
657 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
658 }
659
660 bool isVSrcFake16_bf16_Lo128() const {
661 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
662 }
663
664 bool isVSrcFake16_f16_Lo128() const {
665 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
666 }
667
668 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
669
670 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
671
672 bool isVSrc_v2bf16() const {
673 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
674 }
675
676 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
677
678 bool isVSrc_v2f16_splat() const { return isVSrc_v2f16(); }
679
680 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
681
682 bool isVISrcB32() const {
683 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
684 }
685
686 bool isVISrcB16() const {
687 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
688 }
689
690 bool isVISrcV2B16() const {
691 return isVISrcB16();
692 }
693
694 bool isVISrcF32() const {
695 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
696 }
697
698 bool isVISrcF16() const {
699 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
700 }
701
702 bool isVISrcV2F16() const {
703 return isVISrcF16() || isVISrcB32();
704 }
705
706 bool isVISrc_64_bf16() const {
707 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
708 }
709
710 bool isVISrc_64_f16() const {
711 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
712 }
713
714 bool isVISrc_64_b32() const {
715 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
716 }
717
718 bool isVISrc_64B64() const {
719 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
720 }
721
722 bool isVISrc_64_f64() const {
723 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
724 }
725
726 bool isVISrc_64V2FP32() const {
727 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
728 }
729
730 bool isVISrc_64V2INT32() const {
731 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
732 }
733
734 bool isVISrc_256_b32() const {
735 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
736 }
737
738 bool isVISrc_256_f32() const {
739 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
740 }
741
742 bool isVISrc_256B64() const {
743 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
744 }
745
746 bool isVISrc_256_f64() const {
747 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
748 }
749
750 bool isVISrc_512_f64() const {
751 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
752 }
753
754 bool isVISrc_128B16() const {
755 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
756 }
757
758 bool isVISrc_128V2B16() const {
759 return isVISrc_128B16();
760 }
761
762 bool isVISrc_128_b32() const {
763 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
764 }
765
766 bool isVISrc_128_f32() const {
767 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
768 }
769
770 bool isVISrc_256V2FP32() const {
771 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
772 }
773
774 bool isVISrc_256V2INT32() const {
775 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
776 }
777
778 bool isVISrc_512_b32() const {
779 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
780 }
781
782 bool isVISrc_512B16() const {
783 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
784 }
785
786 bool isVISrc_512V2B16() const {
787 return isVISrc_512B16();
788 }
789
790 bool isVISrc_512_f32() const {
791 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
792 }
793
794 bool isVISrc_512F16() const {
795 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
796 }
797
798 bool isVISrc_512V2F16() const {
799 return isVISrc_512F16() || isVISrc_512_b32();
800 }
801
802 bool isVISrc_1024_b32() const {
803 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
804 }
805
806 bool isVISrc_1024B16() const {
807 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
808 }
809
810 bool isVISrc_1024V2B16() const {
811 return isVISrc_1024B16();
812 }
813
814 bool isVISrc_1024_f32() const {
815 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
816 }
817
818 bool isVISrc_1024F16() const {
819 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
820 }
821
822 bool isVISrc_1024V2F16() const {
823 return isVISrc_1024F16() || isVISrc_1024_b32();
824 }
825
826 bool isAISrcB32() const {
827 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
828 }
829
830 bool isAISrcB16() const {
831 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
832 }
833
834 bool isAISrcV2B16() const {
835 return isAISrcB16();
836 }
837
838 bool isAISrcF32() const {
839 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
840 }
841
842 bool isAISrcF16() const {
843 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
844 }
845
846 bool isAISrcV2F16() const {
847 return isAISrcF16() || isAISrcB32();
848 }
849
850 bool isAISrc_64B64() const {
851 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
852 }
853
854 bool isAISrc_64_f64() const {
855 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
856 }
857
858 bool isAISrc_128_b32() const {
859 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
860 }
861
862 bool isAISrc_128B16() const {
863 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
864 }
865
866 bool isAISrc_128V2B16() const {
867 return isAISrc_128B16();
868 }
869
870 bool isAISrc_128_f32() const {
871 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
872 }
873
874 bool isAISrc_128F16() const {
875 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
876 }
877
878 bool isAISrc_128V2F16() const {
879 return isAISrc_128F16() || isAISrc_128_b32();
880 }
881
882 bool isVISrc_128_bf16() const {
883 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
884 }
885
886 bool isVISrc_128_f16() const {
887 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
888 }
889
890 bool isVISrc_128V2F16() const {
891 return isVISrc_128_f16() || isVISrc_128_b32();
892 }
893
894 bool isAISrc_256B64() const {
895 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
896 }
897
898 bool isAISrc_256_f64() const {
899 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
900 }
901
902 bool isAISrc_512_b32() const {
903 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
904 }
905
906 bool isAISrc_512B16() const {
907 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
908 }
909
910 bool isAISrc_512V2B16() const {
911 return isAISrc_512B16();
912 }
913
914 bool isAISrc_512_f32() const {
915 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
916 }
917
918 bool isAISrc_512F16() const {
919 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
920 }
921
922 bool isAISrc_512V2F16() const {
923 return isAISrc_512F16() || isAISrc_512_b32();
924 }
925
926 bool isAISrc_1024_b32() const {
927 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
928 }
929
930 bool isAISrc_1024B16() const {
931 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
932 }
933
934 bool isAISrc_1024V2B16() const {
935 return isAISrc_1024B16();
936 }
937
938 bool isAISrc_1024_f32() const {
939 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
940 }
941
942 bool isAISrc_1024F16() const {
943 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
944 }
945
946 bool isAISrc_1024V2F16() const {
947 return isAISrc_1024F16() || isAISrc_1024_b32();
948 }
949
950 bool isKImmFP32() const {
951 return isLiteralImm(MVT::f32);
952 }
953
954 bool isKImmFP16() const {
955 return isLiteralImm(MVT::f16);
956 }
957
958 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
959
960 bool isMem() const override {
961 return false;
962 }
963
964 bool isExpr() const {
965 return Kind == Expression;
966 }
967
968 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
969
970 bool isSWaitCnt() const;
971 bool isDepCtr() const;
972 bool isSDelayALU() const;
973 bool isHwreg() const;
974 bool isSendMsg() const;
975 bool isWaitEvent() const;
976 bool isSplitBarrier() const;
977 bool isSwizzle() const;
978 bool isSMRDOffset8() const;
979 bool isSMEMOffset() const;
980 bool isSMRDLiteralOffset() const;
981 bool isDPP8() const;
982 bool isDPPCtrl() const;
983 bool isBLGP() const;
984 bool isGPRIdxMode() const;
985 bool isS16Imm() const;
986 bool isU16Imm() const;
987 bool isEndpgm() const;
988
989 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
990 return [this, P]() { return P(*this); };
991 }
992
993 StringRef getToken() const {
994 assert(isToken());
995 return StringRef(Tok.Data, Tok.Length);
996 }
997
998 int64_t getImm() const {
999 assert(isImm());
1000 return Imm.Val;
1001 }
1002
1003 void setImm(int64_t Val) {
1004 assert(isImm());
1005 Imm.Val = Val;
1006 }
1007
1008 ImmTy getImmTy() const {
1009 assert(isImm());
1010 return Imm.Type;
1011 }
1012
1013 MCRegister getReg() const override {
1014 assert(isRegKind());
1015 return Reg.RegNo;
1016 }
1017
1018 SMLoc getStartLoc() const override {
1019 return StartLoc;
1020 }
1021
1022 SMLoc getEndLoc() const override {
1023 return EndLoc;
1024 }
1025
1026 SMRange getLocRange() const {
1027 return SMRange(StartLoc, EndLoc);
1028 }
1029
1030 int getMCOpIdx() const { return MCOpIdx; }
1031
1032 Modifiers getModifiers() const {
1033 assert(isRegKind() || isImmTy(ImmTyNone));
1034 return isRegKind() ? Reg.Mods : Imm.Mods;
1035 }
1036
1037 void setModifiers(Modifiers Mods) {
1038 assert(isRegKind() || isImmTy(ImmTyNone));
1039 if (isRegKind())
1040 Reg.Mods = Mods;
1041 else
1042 Imm.Mods = Mods;
1043 }
1044
1045 bool hasModifiers() const {
1046 return getModifiers().hasModifiers();
1047 }
1048
1049 bool hasFPModifiers() const {
1050 return getModifiers().hasFPModifiers();
1051 }
1052
1053 bool hasIntModifiers() const {
1054 return getModifiers().hasIntModifiers();
1055 }
1056
1057 bool isForcedLit() const {
1058 return isImmLiteral() && getModifiers().isForcedLit();
1059 }
1060
1061 bool isForcedLit64() const {
1062 return isImmLiteral() && getModifiers().isForcedLit64();
1063 }
1064
1065 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1066
1067 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1068
1069 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1070
1071 void addRegOperands(MCInst &Inst, unsigned N) const;
1072
1073 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1074 if (isRegKind())
1075 addRegOperands(Inst, N);
1076 else
1077 addImmOperands(Inst, N);
1078 }
1079
1080 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1081 Modifiers Mods = getModifiers();
1082 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1083 if (isRegKind()) {
1084 addRegOperands(Inst, N);
1085 } else {
1086 addImmOperands(Inst, N, false);
1087 }
1088 }
1089
1090 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1091 assert(!hasIntModifiers());
1092 addRegOrImmWithInputModsOperands(Inst, N);
1093 }
1094
1095 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1096 assert(!hasFPModifiers());
1097 addRegOrImmWithInputModsOperands(Inst, N);
1098 }
1099
1100 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1101 Modifiers Mods = getModifiers();
1102 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1103 assert(isRegKind());
1104 addRegOperands(Inst, N);
1105 }
1106
1107 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1108 assert(!hasIntModifiers());
1109 addRegWithInputModsOperands(Inst, N);
1110 }
1111
1112 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1113 assert(!hasFPModifiers());
1114 addRegWithInputModsOperands(Inst, N);
1115 }
1116
1117 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1118 // clang-format off
1119 switch (Type) {
1120 case ImmTyNone: OS << "None"; break;
1121 case ImmTyGDS: OS << "GDS"; break;
1122 case ImmTyLDS: OS << "LDS"; break;
1123 case ImmTyOffen: OS << "Offen"; break;
1124 case ImmTyIdxen: OS << "Idxen"; break;
1125 case ImmTyAddr64: OS << "Addr64"; break;
1126 case ImmTyOffset: OS << "Offset"; break;
1127 case ImmTyInstOffset: OS << "InstOffset"; break;
1128 case ImmTyOffset0: OS << "Offset0"; break;
1129 case ImmTyOffset1: OS << "Offset1"; break;
1130 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1131 case ImmTyCPol: OS << "CPol"; break;
1132 case ImmTyIndexKey8bit: OS << "index_key"; break;
1133 case ImmTyIndexKey16bit: OS << "index_key"; break;
1134 case ImmTyIndexKey32bit: OS << "index_key"; break;
1135 case ImmTyTFE: OS << "TFE"; break;
1136 case ImmTyIsAsync: OS << "IsAsync"; break;
1137 case ImmTyD16: OS << "D16"; break;
1138 case ImmTyFORMAT: OS << "FORMAT"; break;
1139 case ImmTyClamp: OS << "Clamp"; break;
1140 case ImmTyOModSI: OS << "OModSI"; break;
1141 case ImmTyDPP8: OS << "DPP8"; break;
1142 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1143 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1144 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1145 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1146 case ImmTyDppFI: OS << "DppFI"; break;
1147 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1148 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1149 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1150 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1151 case ImmTyDMask: OS << "DMask"; break;
1152 case ImmTyDim: OS << "Dim"; break;
1153 case ImmTyUNorm: OS << "UNorm"; break;
1154 case ImmTyDA: OS << "DA"; break;
1155 case ImmTyR128A16: OS << "R128A16"; break;
1156 case ImmTyA16: OS << "A16"; break;
1157 case ImmTyLWE: OS << "LWE"; break;
1158 case ImmTyOff: OS << "Off"; break;
1159 case ImmTyExpTgt: OS << "ExpTgt"; break;
1160 case ImmTyExpCompr: OS << "ExpCompr"; break;
1161 case ImmTyExpVM: OS << "ExpVM"; break;
1162 case ImmTyDone: OS << "Done"; break;
1163 case ImmTyRowEn: OS << "RowEn"; break;
1164 case ImmTyHwreg: OS << "Hwreg"; break;
1165 case ImmTySendMsg: OS << "SendMsg"; break;
1166 case ImmTyWaitEvent: OS << "WaitEvent"; break;
1167 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1168 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1169 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1170 case ImmTyOpSel: OS << "OpSel"; break;
1171 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1172 case ImmTyNegLo: OS << "NegLo"; break;
1173 case ImmTyNegHi: OS << "NegHi"; break;
1174 case ImmTySwizzle: OS << "Swizzle"; break;
1175 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1176 case ImmTyHigh: OS << "High"; break;
1177 case ImmTyBLGP: OS << "BLGP"; break;
1178 case ImmTyCBSZ: OS << "CBSZ"; break;
1179 case ImmTyABID: OS << "ABID"; break;
1180 case ImmTyEndpgm: OS << "Endpgm"; break;
1181 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1182 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1183 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1184 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1185 case ImmTyBitOp3: OS << "BitOp3"; break;
1186 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1187 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1188 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1189 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1190 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1191 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1192 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1193 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1194 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1195 case ImmTyByteSel: OS << "ByteSel" ; break;
1196 }
1197 // clang-format on
1198 }
1199
1200 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1201 switch (Kind) {
1202 case Register:
1203 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1204 << " mods: " << Reg.Mods << '>';
1205 break;
1206 case Immediate:
1207 OS << '<' << getImm();
1208 if (getImmTy() != ImmTyNone) {
1209 OS << " type: "; printImmTy(OS, getImmTy());
1210 }
1211 OS << " mods: " << Imm.Mods << '>';
1212 break;
1213 case Token:
1214 OS << '\'' << getToken() << '\'';
1215 break;
1216 case Expression:
1217 OS << "<expr ";
1218 MAI.printExpr(OS, *Expr);
1219 OS << '>';
1220 break;
1221 }
1222 }
1223
1224 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1225 int64_t Val, SMLoc Loc,
1226 ImmTy Type = ImmTyNone,
1227 bool IsFPImm = false) {
1228 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1229 Op->Imm.Val = Val;
1230 Op->Imm.IsFPImm = IsFPImm;
1231 Op->Imm.Type = Type;
1232 Op->Imm.Mods = Modifiers();
1233 Op->StartLoc = Loc;
1234 Op->EndLoc = Loc;
1235 return Op;
1236 }
1237
1238 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1239 StringRef Str, SMLoc Loc,
1240 bool HasExplicitEncodingSize = true) {
1241 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1242 Res->Tok.Data = Str.data();
1243 Res->Tok.Length = Str.size();
1244 Res->StartLoc = Loc;
1245 Res->EndLoc = Loc;
1246 return Res;
1247 }
1248
1249 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1250 MCRegister Reg, SMLoc S, SMLoc E) {
1251 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1252 Op->Reg.RegNo = Reg;
1253 Op->Reg.Mods = Modifiers();
1254 Op->StartLoc = S;
1255 Op->EndLoc = E;
1256 return Op;
1257 }
1258
1259 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1260 const class MCExpr *Expr, SMLoc S) {
1261 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1262 Op->Expr = Expr;
1263 Op->StartLoc = S;
1264 Op->EndLoc = S;
1265 return Op;
1266 }
1267};
1268
1269raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1270 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1271 return OS;
1272}
1273
1274//===----------------------------------------------------------------------===//
1275// AsmParser
1276//===----------------------------------------------------------------------===//
1277
1278// TODO: define GET_SUBTARGET_FEATURE_NAME
1279#define GET_REGISTER_MATCHER
1280#include "AMDGPUGenAsmMatcher.inc"
1281#undef GET_REGISTER_MATCHER
1282#undef GET_SUBTARGET_FEATURE_NAME
1283
1284// Holds info related to the current kernel, e.g. count of SGPRs used.
1285// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1286// .amdgpu_hsa_kernel or at EOF.
1287class KernelScopeInfo {
1288 int SgprIndexUnusedMin = -1;
1289 int VgprIndexUnusedMin = -1;
1290 int AgprIndexUnusedMin = -1;
1291 MCContext *Ctx = nullptr;
1292 MCSubtargetInfo const *MSTI = nullptr;
1293
1294 void usesSgprAt(int i) {
1295 if (i >= SgprIndexUnusedMin) {
1296 SgprIndexUnusedMin = ++i;
1297 if (Ctx) {
1298 MCSymbol* const Sym =
1299 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1300 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1301 }
1302 }
1303 }
1304
1305 void usesVgprAt(int i) {
1306 if (i >= VgprIndexUnusedMin) {
1307 VgprIndexUnusedMin = ++i;
1308 if (Ctx) {
1309 MCSymbol* const Sym =
1310 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1311 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1312 VgprIndexUnusedMin);
1313 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1314 }
1315 }
1316 }
1317
1318 void usesAgprAt(int i) {
1319 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1320 if (!hasMAIInsts(*MSTI))
1321 return;
1322
1323 if (i >= AgprIndexUnusedMin) {
1324 AgprIndexUnusedMin = ++i;
1325 if (Ctx) {
1326 MCSymbol* const Sym =
1327 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1328 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1329
1330 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1331 MCSymbol* const vSym =
1332 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1333 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1334 VgprIndexUnusedMin);
1335 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1336 }
1337 }
1338 }
1339
1340public:
1341 KernelScopeInfo() = default;
1342
1343 void initialize(MCContext &Context) {
1344 Ctx = &Context;
1345 MSTI = Ctx->getSubtargetInfo();
1346
1347 usesSgprAt(SgprIndexUnusedMin = -1);
1348 usesVgprAt(VgprIndexUnusedMin = -1);
1349 if (hasMAIInsts(*MSTI)) {
1350 usesAgprAt(AgprIndexUnusedMin = -1);
1351 }
1352 }
1353
1354 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1355 unsigned RegWidth) {
1356 switch (RegKind) {
1357 case IS_SGPR:
1358 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1359 break;
1360 case IS_AGPR:
1361 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1362 break;
1363 case IS_VGPR:
1364 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1365 break;
1366 default:
1367 break;
1368 }
1369 }
1370};
1371
1372class AMDGPUAsmParser : public MCTargetAsmParser {
1373 MCAsmParser &Parser;
1374
1375 unsigned ForcedEncodingSize = 0;
1376 bool ForcedDPP = false;
1377 bool ForcedSDWA = false;
1378 KernelScopeInfo KernelScope;
1379 const unsigned HwMode;
1380
1381 /// @name Auto-generated Match Functions
1382 /// {
1383
1384#define GET_ASSEMBLER_HEADER
1385#include "AMDGPUGenAsmMatcher.inc"
1386
1387 /// }
1388
1389 /// Get size of register operand
1390 unsigned getRegOperandSize(const MCInstrDesc &Desc, unsigned OpNo) const {
1391 assert(OpNo < Desc.NumOperands);
1392 int16_t RCID = MII.getOpRegClassID(Desc.operands()[OpNo], HwMode);
1393 return getRegBitWidth(RCID) / 8;
1394 }
1395
1396 std::optional<AMDGPU::InfoSectionData> InfoData;
1397
1398private:
1399 void createConstantSymbol(StringRef Id, int64_t Val);
1400
1401 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1402 bool OutOfRangeError(SMRange Range);
1403 /// Calculate VGPR/SGPR blocks required for given target, reserved
1404 /// registers, and user-specified NextFreeXGPR values.
1405 ///
1406 /// \param Features [in] Target features, used for bug corrections.
1407 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1408 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1409 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1410 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1411 /// descriptor field, if valid.
1412 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1413 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1414 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1415 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1416 /// \param VGPRBlocks [out] Result VGPR block count.
1417 /// \param SGPRBlocks [out] Result SGPR block count.
1418 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1419 const MCExpr *FlatScrUsed, bool XNACKUsed,
1420 std::optional<bool> EnableWavefrontSize32,
1421 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1422 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1423 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1424 bool ParseDirectiveAMDGCNTarget();
1425 bool ParseDirectiveAMDHSACodeObjectVersion();
1426 bool ParseDirectiveAMDHSAKernel();
1427 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1428 bool ParseDirectiveAMDKernelCodeT();
1429 // TODO: Possibly make subtargetHasRegister const.
1430 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1431 bool ParseDirectiveAMDGPUHsaKernel();
1432
1433 bool ParseDirectiveISAVersion();
1434 bool ParseDirectiveHSAMetadata();
1435 bool ParseDirectivePALMetadataBegin();
1436 bool ParseDirectivePALMetadata();
1437 bool ParseDirectiveAMDGPULDS();
1438 bool ParseDirectiveAMDGPUInfo();
1439
1440 /// Common code to parse out a block of text (typically YAML) between start and
1441 /// end directives.
1442 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1443 const char *AssemblerDirectiveEnd,
1444 std::string &CollectString);
1445
1446 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1447 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1448 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1449 unsigned &RegNum, unsigned &RegWidth,
1450 bool RestoreOnFailure = false);
1451 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1452 unsigned &RegNum, unsigned &RegWidth,
1453 SmallVectorImpl<AsmToken> &Tokens);
1454 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1455 unsigned &RegWidth,
1456 SmallVectorImpl<AsmToken> &Tokens);
1457 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1458 unsigned &RegWidth,
1459 SmallVectorImpl<AsmToken> &Tokens);
1460 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1461 unsigned &RegWidth,
1462 SmallVectorImpl<AsmToken> &Tokens);
1463 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1464 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1465 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1466
1467 bool isRegister();
1468 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1469 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1470 void initializeGprCountSymbol(RegisterKind RegKind);
1471 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1472 unsigned RegWidth);
1473 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1474 bool IsAtomic);
1475
1476public:
1477 enum OperandMode {
1478 OperandMode_Default,
1479 OperandMode_NSA,
1480 };
1481
1482 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1483
1484 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1485 const MCInstrInfo &MII)
1486 : MCTargetAsmParser(STI, MII), Parser(_Parser),
1487 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1489
1490 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1491
1492 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1493 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1494 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1495 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1496 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1497 } else {
1498 createConstantSymbol(".option.machine_version_major", ISA.Major);
1499 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1500 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1501 }
1502 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1503 initializeGprCountSymbol(IS_VGPR);
1504 initializeGprCountSymbol(IS_SGPR);
1505 } else
1506 KernelScope.initialize(getContext());
1507
1508 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1509 createConstantSymbol(Symbol, Code);
1510
1511 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1512 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1513 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1514 }
1515
1516 bool hasMIMG_R128() const {
1517 return AMDGPU::hasMIMG_R128(getSTI());
1518 }
1519
1520 bool hasPackedD16() const {
1521 return AMDGPU::hasPackedD16(getSTI());
1522 }
1523
1524 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1525
1526 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1527
1528 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1529
1530 bool isSI() const {
1531 return AMDGPU::isSI(getSTI());
1532 }
1533
1534 bool isCI() const {
1535 return AMDGPU::isCI(getSTI());
1536 }
1537
1538 bool isVI() const {
1539 return AMDGPU::isVI(getSTI());
1540 }
1541
1542 bool isGFX9() const {
1543 return AMDGPU::isGFX9(getSTI());
1544 }
1545
1546 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1547 bool isGFX90A() const {
1548 return AMDGPU::isGFX90A(getSTI());
1549 }
1550
1551 bool isGFX940() const {
1552 return AMDGPU::isGFX940(getSTI());
1553 }
1554
1555 bool isGFX9Plus() const {
1556 return AMDGPU::isGFX9Plus(getSTI());
1557 }
1558
1559 bool isGFX10() const {
1560 return AMDGPU::isGFX10(getSTI());
1561 }
1562
1563 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1564
1565 bool isGFX11() const {
1566 return AMDGPU::isGFX11(getSTI());
1567 }
1568
1569 bool isGFX11Plus() const {
1570 return AMDGPU::isGFX11Plus(getSTI());
1571 }
1572
1573 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1574
1575 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1576
1577 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1578
1579 bool isGFX1250Plus() const { return AMDGPU::isGFX1250Plus(getSTI()); }
1580
1581 bool isGFX13() const { return AMDGPU::isGFX13(getSTI()); }
1582
1583 bool isGFX13Plus() const { return AMDGPU::isGFX13Plus(getSTI()); }
1584
1585 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1586
1587 bool isGFX10_BEncoding() const {
1588 return AMDGPU::isGFX10_BEncoding(getSTI());
1589 }
1590
1591 bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
1592
1593 bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
1594
1595 bool hasInv2PiInlineImm() const {
1596 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1597 }
1598
1599 bool has64BitLiterals() const {
1600 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1601 }
1602
1603 bool hasFlatOffsets() const {
1604 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1605 }
1606
1607 bool hasTrue16Insts() const {
1608 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1609 }
1610
1611 bool hasArchitectedFlatScratch() const {
1612 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1613 }
1614
1615 bool hasSGPR102_SGPR103() const {
1616 return !isVI() && !isGFX9();
1617 }
1618
1619 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1620
1621 bool hasIntClamp() const {
1622 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1623 }
1624
1625 bool hasPartialNSAEncoding() const {
1626 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1627 }
1628
1629 bool hasGloballyAddressableScratch() const {
1630 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1631 }
1632
1633 unsigned getNSAMaxSize(bool HasSampler = false) const {
1634 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1635 }
1636
1637 unsigned getMaxNumUserSGPRs() const {
1638 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1639 }
1640
1641 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1642
1643 AMDGPUTargetStreamer &getTargetStreamer() {
1644 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1645 return static_cast<AMDGPUTargetStreamer &>(TS);
1646 }
1647
1648 MCContext &getContext() const {
1649 // We need this const_cast because for some reason getContext() is not const
1650 // in MCAsmParser.
1651 return const_cast<AMDGPUAsmParser *>(this)->MCTargetAsmParser::getContext();
1652 }
1653
1654 const MCRegisterInfo *getMRI() const {
1655 return getContext().getRegisterInfo();
1656 }
1657
1658 const MCInstrInfo *getMII() const {
1659 return &MII;
1660 }
1661
1662 // FIXME: This should not be used. Instead, should use queries derived from
1663 // getAvailableFeatures().
1664 const FeatureBitset &getFeatureBits() const {
1665 return getSTI().getFeatureBits();
1666 }
1667
1668 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1669 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1670 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1671
1672 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1673 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1674 bool isForcedDPP() const { return ForcedDPP; }
1675 bool isForcedSDWA() const { return ForcedSDWA; }
1676 ArrayRef<unsigned> getMatchedVariants() const;
1677 StringRef getMatchedVariantName() const;
1678
1679 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1680 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1681 bool RestoreOnFailure);
1682 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1683 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1684 SMLoc &EndLoc) override;
1685 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1686 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1687 unsigned Kind) override;
1688 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1689 OperandVector &Operands, MCStreamer &Out,
1690 uint64_t &ErrorInfo,
1691 bool MatchingInlineAsm) override;
1692 bool ParseDirective(AsmToken DirectiveID) override;
1693 void onEndOfFile() override;
1694 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1695 OperandMode Mode = OperandMode_Default);
1696 StringRef parseMnemonicSuffix(StringRef Name);
1697 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1698 SMLoc NameLoc, OperandVector &Operands) override;
1699 //bool ProcessInstruction(MCInst &Inst);
1700
1701 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1702
1703 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1704
1705 ParseStatus
1706 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1707 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1708 std::function<bool(int64_t &)> ConvertResult = nullptr);
1709
1710 ParseStatus parseOperandArrayWithPrefix(
1711 const char *Prefix, OperandVector &Operands,
1712 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1713 bool (*ConvertResult)(int64_t &) = nullptr);
1714
1715 ParseStatus
1716 parseNamedBit(StringRef Name, OperandVector &Operands,
1717 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1718 bool IgnoreNegative = false);
1719 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1720 ParseStatus parseCPol(OperandVector &Operands);
1721 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1722 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1723 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1724 SMLoc &StringLoc);
1725 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1726 StringRef Name,
1727 ArrayRef<const char *> Ids,
1728 int64_t &IntVal);
1729 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1730 StringRef Name,
1731 ArrayRef<const char *> Ids,
1732 AMDGPUOperand::ImmTy Type);
1733
1734 bool isModifier();
1735 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1736 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1737 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1738 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1739 bool parseSP3NegModifier();
1740 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1741 LitModifier Lit = LitModifier::None);
1742 ParseStatus parseReg(OperandVector &Operands);
1743 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1744 LitModifier Lit = LitModifier::None);
1745 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1746 bool AllowImm = true);
1747 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1748 bool AllowImm = true);
1749 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1750 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1751 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1752 ParseStatus tryParseIndexKey(OperandVector &Operands,
1753 AMDGPUOperand::ImmTy ImmTy);
1754 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1755 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1756 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1757 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1758 AMDGPUOperand::ImmTy Type);
1759 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1760 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1761 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1762 AMDGPUOperand::ImmTy Type);
1763 ParseStatus parseMatrixAScale(OperandVector &Operands);
1764 ParseStatus parseMatrixBScale(OperandVector &Operands);
1765 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1766 AMDGPUOperand::ImmTy Type);
1767 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1768 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1769
1770 ParseStatus parseDfmtNfmt(int64_t &Format);
1771 ParseStatus parseUfmt(int64_t &Format);
1772 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1773 int64_t &Format);
1774 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1775 int64_t &Format);
1776 ParseStatus parseFORMAT(OperandVector &Operands);
1777 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1778 ParseStatus parseNumericFormat(int64_t &Format);
1779 ParseStatus parseFlatOffset(OperandVector &Operands);
1780 ParseStatus parseR128A16(OperandVector &Operands);
1781 ParseStatus parseBLGP(OperandVector &Operands);
1782 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1783 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1784
1785 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1786
1787 bool parseCnt(int64_t &IntVal);
1788 ParseStatus parseSWaitCnt(OperandVector &Operands);
1789
1790 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1791 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1792 ParseStatus parseDepCtr(OperandVector &Operands);
1793
1794 bool parseDelay(int64_t &Delay);
1795 ParseStatus parseSDelayALU(OperandVector &Operands);
1796
1797 ParseStatus parseHwreg(OperandVector &Operands);
1798
1799private:
1800 struct OperandInfoTy {
1801 SMLoc Loc;
1802 int64_t Val;
1803 bool IsSymbolic = false;
1804 bool IsDefined = false;
1805
1806 constexpr OperandInfoTy(int64_t Val) : Val(Val) {}
1807 };
1808
1809 struct StructuredOpField : OperandInfoTy {
1810 StringLiteral Id;
1811 StringLiteral Desc;
1812 unsigned Width;
1813 bool IsDefined = false;
1814
1815 constexpr StructuredOpField(StringLiteral Id, StringLiteral Desc,
1816 unsigned Width, int64_t Default)
1817 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1818 virtual ~StructuredOpField() = default;
1819
1820 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1821 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1822 return false;
1823 }
1824
1825 virtual bool validate(AMDGPUAsmParser &Parser) const {
1826 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1827 return Error(Parser, "not supported on this GPU");
1828 if (!isUIntN(Width, Val))
1829 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1830 return true;
1831 }
1832 };
1833
1834 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1835 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1836
1837 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1838 bool validateSendMsg(const OperandInfoTy &Msg,
1839 const OperandInfoTy &Op,
1840 const OperandInfoTy &Stream);
1841
1842 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1843 OperandInfoTy &Width);
1844
1845 const AMDGPUOperand &findMCOperand(const OperandVector &Operands,
1846 int MCOpIdx) const;
1847
1848 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1849
1850 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1851 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1852 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1853
1854 SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1855 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1856 const OperandVector &Operands) const;
1857 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1858 const OperandVector &Operands) const;
1859 SMLoc getInstLoc(const OperandVector &Operands) const;
1860
1861 bool validateInstruction(const MCInst &Inst, SMLoc IDLoc,
1862 const OperandVector &Operands);
1863 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1864 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1865 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1866 bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1867 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1868 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1869 bool AsVOPD3);
1870 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1871 bool tryVOPD(const MCInst &Inst);
1872 bool tryVOPD3(const MCInst &Inst);
1873 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1874
1875 bool validateIntClampSupported(const MCInst &Inst);
1876 bool validateMIMGAtomicDMask(const MCInst &Inst);
1877 bool validateMIMGGatherDMask(const MCInst &Inst);
1878 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1879 bool validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc);
1880 bool validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc);
1881 bool validateMIMGD16(const MCInst &Inst);
1882 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1883 bool validateTensorR128(const MCInst &Inst);
1884 bool validateMIMGMSAA(const MCInst &Inst);
1885 bool validateOpSel(const MCInst &Inst);
1886 bool validateTrue16OpSel(const MCInst &Inst);
1887 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1888 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1889 bool validateVccOperand(MCRegister Reg) const;
1890 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1891 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1892 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1893 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1894 bool validateAGPRLdSt(const MCInst &Inst) const;
1895 bool validateVGPRAlign(const MCInst &Inst) const;
1896 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1897 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1898 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1899 bool validateDivScale(const MCInst &Inst);
1900 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1901 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1902 SMLoc IDLoc);
1903 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1904 const unsigned CPol);
1905 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1906 bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1907 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1908 unsigned getConstantBusLimit(unsigned Opcode) const;
1909 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1910 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1911 MCRegister findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1912
1913 bool isSupportedMnemo(StringRef Mnemo,
1914 const FeatureBitset &FBS);
1915 bool isSupportedMnemo(StringRef Mnemo,
1916 const FeatureBitset &FBS,
1917 ArrayRef<unsigned> Variants);
1918 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1919
1920 bool isId(const StringRef Id) const;
1921 bool isId(const AsmToken &Token, const StringRef Id) const;
1922 bool isToken(const AsmToken::TokenKind Kind) const;
1923 StringRef getId() const;
1924 bool trySkipId(const StringRef Id);
1925 bool trySkipId(const StringRef Pref, const StringRef Id);
1926 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1927 bool trySkipToken(const AsmToken::TokenKind Kind);
1928 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1929 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1930 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1931
1932 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1933 AsmToken::TokenKind getTokenKind() const;
1934 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1935 bool parseExpr(OperandVector &Operands);
1936 StringRef getTokenStr() const;
1937 AsmToken peekToken(bool ShouldSkipSpace = true);
1938 AsmToken getToken() const;
1939 SMLoc getLoc() const;
1940 void lex();
1941
1942public:
1943 void onBeginOfFile() override;
1944 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1945
1946 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1947
1948 ParseStatus parseExpTgt(OperandVector &Operands);
1949 ParseStatus parseSendMsg(OperandVector &Operands);
1950 ParseStatus parseWaitEvent(OperandVector &Operands);
1951 ParseStatus parseInterpSlot(OperandVector &Operands);
1952 ParseStatus parseInterpAttr(OperandVector &Operands);
1953 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1954 ParseStatus parseBoolReg(OperandVector &Operands);
1955
1956 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1957 const unsigned MaxVal, const Twine &ErrMsg,
1958 SMLoc &Loc);
1959 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1960 const unsigned MinVal,
1961 const unsigned MaxVal,
1962 const StringRef ErrMsg);
1963 ParseStatus parseSwizzle(OperandVector &Operands);
1964 bool parseSwizzleOffset(int64_t &Imm);
1965 bool parseSwizzleMacro(int64_t &Imm);
1966 bool parseSwizzleQuadPerm(int64_t &Imm);
1967 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1968 bool parseSwizzleBroadcast(int64_t &Imm);
1969 bool parseSwizzleSwap(int64_t &Imm);
1970 bool parseSwizzleReverse(int64_t &Imm);
1971 bool parseSwizzleFFT(int64_t &Imm);
1972 bool parseSwizzleRotate(int64_t &Imm);
1973
1974 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1975 int64_t parseGPRIdxMacro();
1976
1977 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1978 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1979
1980 ParseStatus parseOModSI(OperandVector &Operands);
1981
1982 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1983 OptionalImmIndexMap &OptionalIdx);
1984 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1985 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1986 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1987 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1988 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1989
1990 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1991 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1992 OptionalImmIndexMap &OptionalIdx);
1993 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1994 OptionalImmIndexMap &OptionalIdx);
1995
1996 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1997 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1998 void cvtOpSelHelper(MCInst &Inst, unsigned OpSel);
1999
2000 bool parseDimId(unsigned &Encoding);
2001 ParseStatus parseDim(OperandVector &Operands);
2002 bool convertDppBoundCtrl(int64_t &BoundCtrl);
2003 ParseStatus parseDPP8(OperandVector &Operands);
2004 ParseStatus parseDPPCtrl(OperandVector &Operands);
2005 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
2006 int64_t parseDPPCtrlSel(StringRef Ctrl);
2007 int64_t parseDPPCtrlPerm();
2008 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
2009 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
2010 cvtDPP(Inst, Operands, true);
2011 }
2012 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
2013 bool IsDPP8 = false);
2014 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
2015 cvtVOP3DPP(Inst, Operands, true);
2016 }
2017
2018 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
2019 AMDGPUOperand::ImmTy Type);
2020 ParseStatus parseSDWADstUnused(OperandVector &Operands);
2021 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
2022 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
2023 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
2024 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
2025 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
2026 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
2027 uint64_t BasicInstType,
2028 bool SkipDstVcc = false,
2029 bool SkipSrcVcc = false);
2030
2031 ParseStatus parseEndpgm(OperandVector &Operands);
2032
2033 ParseStatus parseVOPD(OperandVector &Operands);
2034};
2035
2036} // end anonymous namespace
2037
2038// May be called with integer type with equivalent bitwidth.
2039static const fltSemantics *getFltSemantics(unsigned Size) {
2040 switch (Size) {
2041 case 4:
2042 return &APFloat::IEEEsingle();
2043 case 8:
2044 return &APFloat::IEEEdouble();
2045 case 2:
2046 return &APFloat::IEEEhalf();
2047 default:
2048 llvm_unreachable("unsupported fp type");
2049 }
2050}
2051
2053 return getFltSemantics(VT.getSizeInBits() / 8);
2054}
2055
2057 switch (OperandType) {
2058 // When floating-point immediate is used as operand of type i16, the 32-bit
2059 // representation of the constant truncated to the 16 LSBs should be used.
2074 return &APFloat::IEEEsingle();
2081 return &APFloat::IEEEdouble();
2089 return &APFloat::IEEEhalf();
2094 return &APFloat::BFloat();
2095 default:
2096 llvm_unreachable("unsupported fp type");
2097 }
2098}
2099
2100//===----------------------------------------------------------------------===//
2101// Operand
2102//===----------------------------------------------------------------------===//
2103
2104static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2105 bool Lost;
2106
2107 // Convert literal to single precision
2110 &Lost);
2111 // We allow precision lost but not overflow or underflow
2112 if (Status != APFloat::opOK &&
2113 Lost &&
2114 ((Status & APFloat::opOverflow) != 0 ||
2115 (Status & APFloat::opUnderflow) != 0)) {
2116 return false;
2117 }
2118
2119 return true;
2120}
2121
2122static bool isSafeTruncation(int64_t Val, unsigned Size) {
2123 return isUIntN(Size, Val) || isIntN(Size, Val);
2124}
2125
2126static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2127 if (VT.getScalarType() == MVT::i16)
2128 return isInlinableLiteral32(Val, HasInv2Pi);
2129
2130 if (VT.getScalarType() == MVT::f16)
2131 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2132
2133 assert(VT.getScalarType() == MVT::bf16);
2134
2135 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2136}
2137
2138bool AMDGPUOperand::isInlinableImm(MVT type) const {
2139
2140 // This is a hack to enable named inline values like
2141 // shared_base with both 32-bit and 64-bit operands.
2142 // Note that these values are defined as
2143 // 32-bit operands only.
2144 if (isInlineValue()) {
2145 return true;
2146 }
2147
2148 if (!isImmTy(ImmTyNone)) {
2149 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2150 return false;
2151 }
2152
2153 if (getModifiers().Lit != LitModifier::None)
2154 return false;
2155
2156 // TODO: We should avoid using host float here. It would be better to
2157 // check the float bit values which is what a few other places do.
2158 // We've had bot failures before due to weird NaN support on mips hosts.
2159
2160 APInt Literal(64, Imm.Val);
2161
2162 if (Imm.IsFPImm) { // We got fp literal token
2163 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2165 AsmParser->hasInv2PiInlineImm());
2166 }
2167
2168 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2169 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2170 return false;
2171
2172 if (type.getScalarSizeInBits() == 16) {
2173 bool Lost = false;
2174 switch (type.getScalarType().SimpleTy) {
2175 default:
2176 llvm_unreachable("unknown 16-bit type");
2177 case MVT::bf16:
2178 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2179 &Lost);
2180 break;
2181 case MVT::f16:
2182 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2183 &Lost);
2184 break;
2185 case MVT::i16:
2186 FPLiteral.convert(APFloatBase::IEEEsingle(),
2187 APFloat::rmNearestTiesToEven, &Lost);
2188 break;
2189 }
2190 // We need to use 32-bit representation here because when a floating-point
2191 // inline constant is used as an i16 operand, its 32-bit representation
2192 // representation will be used. We will need the 32-bit value to check if
2193 // it is FP inline constant.
2194 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2195 return isInlineableLiteralOp16(ImmVal, type,
2196 AsmParser->hasInv2PiInlineImm());
2197 }
2198
2199 // Check if single precision literal is inlinable
2201 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2202 AsmParser->hasInv2PiInlineImm());
2203 }
2204
2205 // We got int literal token.
2206 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2208 AsmParser->hasInv2PiInlineImm());
2209 }
2210
2211 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2212 return false;
2213 }
2214
2215 if (type.getScalarSizeInBits() == 16) {
2217 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2218 type, AsmParser->hasInv2PiInlineImm());
2219 }
2220
2222 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2223 AsmParser->hasInv2PiInlineImm());
2224}
2225
2226bool AMDGPUOperand::isLiteralImm(MVT type) const {
2227 // Check that this immediate can be added as literal
2228 if (!isImmTy(ImmTyNone)) {
2229 return false;
2230 }
2231
2232 bool Allow64Bit =
2233 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2234
2235 if (!Imm.IsFPImm) {
2236 // We got int literal token.
2237
2238 if (type == MVT::f64 && hasFPModifiers()) {
2239 // Cannot apply fp modifiers to int literals preserving the same semantics
2240 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2241 // disable these cases.
2242 return false;
2243 }
2244
2245 unsigned Size = type.getSizeInBits();
2246 if (Size == 64) {
2247 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2248 return true;
2249 Size = 32;
2250 }
2251
2252 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2253 // types.
2254 return isSafeTruncation(Imm.Val, Size);
2255 }
2256
2257 // We got fp literal token
2258 if (type == MVT::f64) { // Expected 64-bit fp operand
2259 // We would set low 64-bits of literal to zeroes but we accept this literals
2260 return true;
2261 }
2262
2263 if (type == MVT::i64) { // Expected 64-bit int operand
2264 // We don't allow fp literals in 64-bit integer instructions. It is
2265 // unclear how we should encode them.
2266 return false;
2267 }
2268
2269 // We allow fp literals with f16x2 operands assuming that the specified
2270 // literal goes into the lower half and the upper half is zero. We also
2271 // require that the literal may be losslessly converted to f16.
2272 //
2273 // For i16x2 operands, we assume that the specified literal is encoded as a
2274 // single-precision float. This is pretty odd, but it matches SP3 and what
2275 // happens in hardware.
2276 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2277 : (type == MVT::v2i16) ? MVT::f32
2278 : (type == MVT::v2f32) ? MVT::f32
2279 : type;
2280
2281 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2282 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2283}
2284
2285bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2286 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2287}
2288
2289bool AMDGPUOperand::isVRegWithInputMods() const {
2290 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2291 // GFX90A allows DPP on 64-bit operands.
2292 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2293 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2294}
2295
2296template <bool IsFake16>
2297bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2298 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2299 : AMDGPU::VGPR_16_Lo128RegClassID);
2300}
2301
2302template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2303 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2304 : AMDGPU::VGPR_16RegClassID);
2305}
2306
2307bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2308 if (AsmParser->isVI())
2309 return isVReg32();
2310 if (AsmParser->isGFX9Plus())
2311 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2312 return false;
2313}
2314
2315bool AMDGPUOperand::isSDWAFP16Operand() const {
2316 return isSDWAOperand(MVT::f16);
2317}
2318
2319bool AMDGPUOperand::isSDWAFP32Operand() const {
2320 return isSDWAOperand(MVT::f32);
2321}
2322
2323bool AMDGPUOperand::isSDWAInt16Operand() const {
2324 return isSDWAOperand(MVT::i16);
2325}
2326
2327bool AMDGPUOperand::isSDWAInt32Operand() const {
2328 return isSDWAOperand(MVT::i32);
2329}
2330
2331bool AMDGPUOperand::isBoolReg() const {
2332 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2333 (AsmParser->isWave32() && isSCSrc_b32()));
2334}
2335
2336uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2337{
2338 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2339 assert(Size == 2 || Size == 4 || Size == 8);
2340
2341 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2342
2343 if (Imm.Mods.Abs) {
2344 Val &= ~FpSignMask;
2345 }
2346 if (Imm.Mods.Neg) {
2347 Val ^= FpSignMask;
2348 }
2349
2350 return Val;
2351}
2352
2353void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2354 MCOpIdx = Inst.getNumOperands();
2355
2356 if (isExpr()) {
2358 return;
2359 }
2360
2361 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2362 Inst.getNumOperands())) {
2363 addLiteralImmOperand(Inst, Imm.Val,
2364 ApplyModifiers &
2365 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2366 } else {
2367 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2369 }
2370}
2371
2372void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2373 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2374 auto OpNum = Inst.getNumOperands();
2375 // Check that this operand accepts literals
2376 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2377
2378 if (ApplyModifiers) {
2379 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2380 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2381 Val = applyInputFPModifiers(Val, Size);
2382 }
2383
2384 APInt Literal(64, Val);
2385 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2386
2387 bool CanUse64BitLiterals =
2388 AsmParser->has64BitLiterals() &&
2389 !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
2390 LitModifier Lit = getModifiers().Lit;
2391 MCContext &Ctx = AsmParser->getContext();
2392
2393 if (Imm.IsFPImm) { // We got fp literal token
2394 switch (OpTy) {
2400 if (Lit == LitModifier::None &&
2402 AsmParser->hasInv2PiInlineImm())) {
2403 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2404 return;
2405 }
2406
2407 // Non-inlineable
2408 if (AMDGPU::isSISrcFPOperand(InstDesc,
2409 OpNum)) { // Expected 64-bit fp operand
2410 bool HasMandatoryLiteral =
2411 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2412 // For fp operands we check if low 32 bits are zeros
2413 if (Literal.getLoBits(32) != 0 &&
2414 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2415 !HasMandatoryLiteral) {
2416 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2417 Inst.getLoc(),
2418 "Can't encode literal as exact 64-bit floating-point operand. "
2419 "Low 32-bits will be set to zero");
2420 Val &= 0xffffffff00000000u;
2421 }
2422
2423 if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 ||
2426 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2427 (isInt<32>(Val) || isUInt<32>(Val))) {
2428 // The floating-point operand will be verbalized as an
2429 // integer one. If that integer happens to fit 32 bits, on
2430 // re-assembling it will be intepreted as the high half of
2431 // the actual value, so we have to wrap it into lit64().
2432 Lit = LitModifier::Lit64;
2433 } else if (Lit == LitModifier::Lit) {
2434 // For FP64 operands lit() specifies the high half of the value.
2435 Val = Hi_32(Val);
2436 }
2437 }
2438 break;
2439 }
2440
2441 // We don't allow fp literals in 64-bit integer instructions. It is
2442 // unclear how we should encode them. This case should be checked earlier
2443 // in predicate methods (isLiteralImm())
2444 llvm_unreachable("fp literal in 64-bit integer instruction.");
2445
2447 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2448 (isInt<32>(Val) || isUInt<32>(Val)))
2449 Lit = LitModifier::Lit64;
2450 break;
2451
2456 if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2457 Literal == 0x3fc45f306725feed) {
2458 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2459 // loss of precision. The constant represents ideomatic fp32 value of
2460 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2461 // bits. Prevent rounding below.
2462 Inst.addOperand(MCOperand::createImm(0x3e22));
2463 return;
2464 }
2465 [[fallthrough]];
2466
2488 bool lost;
2489 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2490 // Convert literal to single precision
2491 FPLiteral.convert(*getOpFltSemantics(OpTy),
2492 APFloat::rmNearestTiesToEven, &lost);
2493 // We allow precision lost but not overflow or underflow. This should be
2494 // checked earlier in isLiteralImm()
2495
2496 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2497 break;
2498 }
2499 default:
2500 llvm_unreachable("invalid operand size");
2501 }
2502
2503 if (Lit != LitModifier::None) {
2504 Inst.addOperand(
2506 } else {
2508 }
2509 return;
2510 }
2511
2512 // We got int literal token.
2513 // Only sign extend inline immediates.
2514 switch (OpTy) {
2529 break;
2530
2533 if (Lit == LitModifier::None &&
2534 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2536 return;
2537 }
2538
2539 // When the 32 MSBs are not zero (effectively means it can't be safely
2540 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2541 // the lit modifier is explicitly used, we need to truncate it to the 32
2542 // LSBs.
2543 if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit)
2544 Val = Lo_32(Val);
2545 break;
2546
2550 if (Lit == LitModifier::None &&
2551 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2553 return;
2554 }
2555
2556 // If the target doesn't support 64-bit literals, we need to use the
2557 // constant as the high 32 MSBs of a double-precision floating point value.
2558 if (!AsmParser->has64BitLiterals()) {
2559 Val = static_cast<uint64_t>(Val) << 32;
2560 } else {
2561 // Now the target does support 64-bit literals, there are two cases
2562 // where we still want to use src_literal encoding:
2563 // 1) explicitly forced by using lit modifier;
2564 // 2) the value is a valid 32-bit representation (signed or unsigned),
2565 // meanwhile not forced by lit64 modifier.
2566 if (Lit == LitModifier::Lit ||
2567 (Lit != LitModifier::Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2568 Val = static_cast<uint64_t>(Val) << 32;
2569 }
2570
2571 // For FP64 operands lit() specifies the high half of the value.
2572 if (Lit == LitModifier::Lit)
2573 Val = Hi_32(Val);
2574 break;
2575
2587 break;
2588
2590 if ((isInt<32>(Val) || isUInt<32>(Val)) && Lit != LitModifier::Lit64)
2591 Val <<= 32;
2592 break;
2593
2594 default:
2595 llvm_unreachable("invalid operand type");
2596 }
2597
2598 if (Lit != LitModifier::None) {
2599 Inst.addOperand(
2601 } else {
2603 }
2604}
2605
2606void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2607 MCOpIdx = Inst.getNumOperands();
2608 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2609}
2610
2611bool AMDGPUOperand::isInlineValue() const {
2612 return isRegKind() && ::isInlineValue(getReg());
2613}
2614
2615//===----------------------------------------------------------------------===//
2616// AsmParser
2617//===----------------------------------------------------------------------===//
2618
2619void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2620 // TODO: make those pre-defined variables read-only.
2621 // Currently there is none suitable machinery in the core llvm-mc for this.
2622 // MCSymbol::isRedefinable is intended for another purpose, and
2623 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2624 MCContext &Ctx = getContext();
2625 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2627}
2628
2629static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2630 if (Is == IS_VGPR) {
2631 switch (RegWidth) {
2632 default: return -1;
2633 case 32:
2634 return AMDGPU::VGPR_32RegClassID;
2635 case 64:
2636 return AMDGPU::VReg_64RegClassID;
2637 case 96:
2638 return AMDGPU::VReg_96RegClassID;
2639 case 128:
2640 return AMDGPU::VReg_128RegClassID;
2641 case 160:
2642 return AMDGPU::VReg_160RegClassID;
2643 case 192:
2644 return AMDGPU::VReg_192RegClassID;
2645 case 224:
2646 return AMDGPU::VReg_224RegClassID;
2647 case 256:
2648 return AMDGPU::VReg_256RegClassID;
2649 case 288:
2650 return AMDGPU::VReg_288RegClassID;
2651 case 320:
2652 return AMDGPU::VReg_320RegClassID;
2653 case 352:
2654 return AMDGPU::VReg_352RegClassID;
2655 case 384:
2656 return AMDGPU::VReg_384RegClassID;
2657 case 512:
2658 return AMDGPU::VReg_512RegClassID;
2659 case 1024:
2660 return AMDGPU::VReg_1024RegClassID;
2661 }
2662 } else if (Is == IS_TTMP) {
2663 switch (RegWidth) {
2664 default: return -1;
2665 case 32:
2666 return AMDGPU::TTMP_32RegClassID;
2667 case 64:
2668 return AMDGPU::TTMP_64RegClassID;
2669 case 128:
2670 return AMDGPU::TTMP_128RegClassID;
2671 case 256:
2672 return AMDGPU::TTMP_256RegClassID;
2673 case 512:
2674 return AMDGPU::TTMP_512RegClassID;
2675 }
2676 } else if (Is == IS_SGPR) {
2677 switch (RegWidth) {
2678 default: return -1;
2679 case 32:
2680 return AMDGPU::SGPR_32RegClassID;
2681 case 64:
2682 return AMDGPU::SGPR_64RegClassID;
2683 case 96:
2684 return AMDGPU::SGPR_96RegClassID;
2685 case 128:
2686 return AMDGPU::SGPR_128RegClassID;
2687 case 160:
2688 return AMDGPU::SGPR_160RegClassID;
2689 case 192:
2690 return AMDGPU::SGPR_192RegClassID;
2691 case 224:
2692 return AMDGPU::SGPR_224RegClassID;
2693 case 256:
2694 return AMDGPU::SGPR_256RegClassID;
2695 case 288:
2696 return AMDGPU::SGPR_288RegClassID;
2697 case 320:
2698 return AMDGPU::SGPR_320RegClassID;
2699 case 352:
2700 return AMDGPU::SGPR_352RegClassID;
2701 case 384:
2702 return AMDGPU::SGPR_384RegClassID;
2703 case 512:
2704 return AMDGPU::SGPR_512RegClassID;
2705 }
2706 } else if (Is == IS_AGPR) {
2707 switch (RegWidth) {
2708 default: return -1;
2709 case 32:
2710 return AMDGPU::AGPR_32RegClassID;
2711 case 64:
2712 return AMDGPU::AReg_64RegClassID;
2713 case 96:
2714 return AMDGPU::AReg_96RegClassID;
2715 case 128:
2716 return AMDGPU::AReg_128RegClassID;
2717 case 160:
2718 return AMDGPU::AReg_160RegClassID;
2719 case 192:
2720 return AMDGPU::AReg_192RegClassID;
2721 case 224:
2722 return AMDGPU::AReg_224RegClassID;
2723 case 256:
2724 return AMDGPU::AReg_256RegClassID;
2725 case 288:
2726 return AMDGPU::AReg_288RegClassID;
2727 case 320:
2728 return AMDGPU::AReg_320RegClassID;
2729 case 352:
2730 return AMDGPU::AReg_352RegClassID;
2731 case 384:
2732 return AMDGPU::AReg_384RegClassID;
2733 case 512:
2734 return AMDGPU::AReg_512RegClassID;
2735 case 1024:
2736 return AMDGPU::AReg_1024RegClassID;
2737 }
2738 }
2739 return -1;
2740}
2741
2744 .Case("exec", AMDGPU::EXEC)
2745 .Case("vcc", AMDGPU::VCC)
2746 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2747 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2748 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2749 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2750 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2751 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2752 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2753 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2754 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2755 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2756 .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2757 .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2758 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2759 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2760 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2761 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2762 .Case("m0", AMDGPU::M0)
2763 .Case("vccz", AMDGPU::SRC_VCCZ)
2764 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2765 .Case("execz", AMDGPU::SRC_EXECZ)
2766 .Case("src_execz", AMDGPU::SRC_EXECZ)
2767 .Case("scc", AMDGPU::SRC_SCC)
2768 .Case("src_scc", AMDGPU::SRC_SCC)
2769 .Case("tba", AMDGPU::TBA)
2770 .Case("tma", AMDGPU::TMA)
2771 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2772 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2773 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2774 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2775 .Case("vcc_lo", AMDGPU::VCC_LO)
2776 .Case("vcc_hi", AMDGPU::VCC_HI)
2777 .Case("exec_lo", AMDGPU::EXEC_LO)
2778 .Case("exec_hi", AMDGPU::EXEC_HI)
2779 .Case("tma_lo", AMDGPU::TMA_LO)
2780 .Case("tma_hi", AMDGPU::TMA_HI)
2781 .Case("tba_lo", AMDGPU::TBA_LO)
2782 .Case("tba_hi", AMDGPU::TBA_HI)
2783 .Case("pc", AMDGPU::PC_REG)
2784 .Case("null", AMDGPU::SGPR_NULL)
2785 .Default(AMDGPU::NoRegister);
2786}
2787
2788bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2789 SMLoc &EndLoc, bool RestoreOnFailure) {
2790 auto R = parseRegister();
2791 if (!R) return true;
2792 assert(R->isReg());
2793 RegNo = R->getReg();
2794 StartLoc = R->getStartLoc();
2795 EndLoc = R->getEndLoc();
2796 return false;
2797}
2798
2799bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2800 SMLoc &EndLoc) {
2801 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2802}
2803
2804ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2805 SMLoc &EndLoc) {
2806 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2807 bool PendingErrors = getParser().hasPendingError();
2808 getParser().clearPendingErrors();
2809 if (PendingErrors)
2810 return ParseStatus::Failure;
2811 if (Result)
2812 return ParseStatus::NoMatch;
2813 return ParseStatus::Success;
2814}
2815
2816bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2817 RegisterKind RegKind,
2818 MCRegister Reg1, SMLoc Loc) {
2819 switch (RegKind) {
2820 case IS_SPECIAL:
2821 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2822 Reg = AMDGPU::EXEC;
2823 RegWidth = 64;
2824 return true;
2825 }
2826 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2827 Reg = AMDGPU::FLAT_SCR;
2828 RegWidth = 64;
2829 return true;
2830 }
2831 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2832 Reg = AMDGPU::XNACK_MASK;
2833 RegWidth = 64;
2834 return true;
2835 }
2836 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2837 Reg = AMDGPU::VCC;
2838 RegWidth = 64;
2839 return true;
2840 }
2841 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2842 Reg = AMDGPU::TBA;
2843 RegWidth = 64;
2844 return true;
2845 }
2846 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2847 Reg = AMDGPU::TMA;
2848 RegWidth = 64;
2849 return true;
2850 }
2851 Error(Loc, "register does not fit in the list");
2852 return false;
2853 case IS_VGPR:
2854 case IS_SGPR:
2855 case IS_AGPR:
2856 case IS_TTMP:
2857 if (Reg1 != Reg + RegWidth / 32) {
2858 Error(Loc, "registers in a list must have consecutive indices");
2859 return false;
2860 }
2861 RegWidth += 32;
2862 return true;
2863 default:
2864 llvm_unreachable("unexpected register kind");
2865 }
2866}
2867
2868struct RegInfo {
2870 RegisterKind Kind;
2871};
2872
2873static constexpr RegInfo RegularRegisters[] = {
2874 {{"v"}, IS_VGPR},
2875 {{"s"}, IS_SGPR},
2876 {{"ttmp"}, IS_TTMP},
2877 {{"acc"}, IS_AGPR},
2878 {{"a"}, IS_AGPR},
2879};
2880
2881static bool isRegularReg(RegisterKind Kind) {
2882 return Kind == IS_VGPR ||
2883 Kind == IS_SGPR ||
2884 Kind == IS_TTMP ||
2885 Kind == IS_AGPR;
2886}
2887
2889 for (const RegInfo &Reg : RegularRegisters)
2890 if (Str.starts_with(Reg.Name))
2891 return &Reg;
2892 return nullptr;
2893}
2894
2895static bool getRegNum(StringRef Str, unsigned& Num) {
2896 return !Str.getAsInteger(10, Num);
2897}
2898
2899bool
2900AMDGPUAsmParser::isRegister(const AsmToken &Token,
2901 const AsmToken &NextToken) const {
2902
2903 // A list of consecutive registers: [s0,s1,s2,s3]
2904 if (Token.is(AsmToken::LBrac))
2905 return true;
2906
2907 if (!Token.is(AsmToken::Identifier))
2908 return false;
2909
2910 // A single register like s0 or a range of registers like s[0:1]
2911
2912 StringRef Str = Token.getString();
2913 const RegInfo *Reg = getRegularRegInfo(Str);
2914 if (Reg) {
2915 StringRef RegName = Reg->Name;
2916 StringRef RegSuffix = Str.substr(RegName.size());
2917 if (!RegSuffix.empty()) {
2918 RegSuffix.consume_back(".l");
2919 RegSuffix.consume_back(".h");
2920 unsigned Num;
2921 // A single register with an index: rXX
2922 if (getRegNum(RegSuffix, Num))
2923 return true;
2924 } else {
2925 // A range of registers: r[XX:YY].
2926 if (NextToken.is(AsmToken::LBrac))
2927 return true;
2928 }
2929 }
2930
2931 return getSpecialRegForName(Str).isValid();
2932}
2933
2934bool
2935AMDGPUAsmParser::isRegister()
2936{
2937 return isRegister(getToken(), peekToken());
2938}
2939
2940MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2941 unsigned SubReg, unsigned RegWidth,
2942 SMLoc Loc) {
2943 assert(isRegularReg(RegKind));
2944
2945 unsigned AlignSize = 1;
2946 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2947 // SGPR and TTMP registers must be aligned.
2948 // Max required alignment is 4 dwords.
2949 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2950 }
2951
2952 if (RegNum % AlignSize != 0) {
2953 Error(Loc, "invalid register alignment");
2954 return MCRegister();
2955 }
2956
2957 unsigned RegIdx = RegNum / AlignSize;
2958 int RCID = getRegClass(RegKind, RegWidth);
2959 if (RCID == -1) {
2960 Error(Loc, "invalid or unsupported register size");
2961 return MCRegister();
2962 }
2963
2964 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2965 const MCRegisterClass RC = TRI->getRegClass(RCID);
2966 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2967 Error(Loc, "register index is out of range");
2968 return AMDGPU::NoRegister;
2969 }
2970
2971 if (RegKind == IS_VGPR && !isGFX1250Plus() && RegIdx + RegWidth / 32 > 256) {
2972 Error(Loc, "register index is out of range");
2973 return MCRegister();
2974 }
2975
2976 MCRegister Reg = RC.getRegister(RegIdx);
2977
2978 if (SubReg) {
2979 Reg = TRI->getSubReg(Reg, SubReg);
2980
2981 // Currently all regular registers have their .l and .h subregisters, so
2982 // we should never need to generate an error here.
2983 assert(Reg && "Invalid subregister!");
2984 }
2985
2986 return Reg;
2987}
2988
2989bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
2990 unsigned &SubReg) {
2991 int64_t RegLo, RegHi;
2992 if (!skipToken(AsmToken::LBrac, "missing register index"))
2993 return false;
2994
2995 SMLoc FirstIdxLoc = getLoc();
2996 SMLoc SecondIdxLoc;
2997
2998 if (!parseExpr(RegLo))
2999 return false;
3000
3001 if (trySkipToken(AsmToken::Colon)) {
3002 SecondIdxLoc = getLoc();
3003 if (!parseExpr(RegHi))
3004 return false;
3005 } else {
3006 RegHi = RegLo;
3007 }
3008
3009 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
3010 return false;
3011
3012 if (!isUInt<32>(RegLo)) {
3013 Error(FirstIdxLoc, "invalid register index");
3014 return false;
3015 }
3016
3017 if (!isUInt<32>(RegHi)) {
3018 Error(SecondIdxLoc, "invalid register index");
3019 return false;
3020 }
3021
3022 if (RegLo > RegHi) {
3023 Error(FirstIdxLoc, "first register index should not exceed second index");
3024 return false;
3025 }
3026
3027 if (RegHi == RegLo) {
3028 StringRef RegSuffix = getTokenStr();
3029 if (RegSuffix == ".l") {
3030 SubReg = AMDGPU::lo16;
3031 lex();
3032 } else if (RegSuffix == ".h") {
3033 SubReg = AMDGPU::hi16;
3034 lex();
3035 }
3036 }
3037
3038 Num = static_cast<unsigned>(RegLo);
3039 RegWidth = 32 * ((RegHi - RegLo) + 1);
3040
3041 return true;
3042}
3043
3044MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3045 unsigned &RegNum,
3046 unsigned &RegWidth,
3047 SmallVectorImpl<AsmToken> &Tokens) {
3048 assert(isToken(AsmToken::Identifier));
3049 MCRegister Reg = getSpecialRegForName(getTokenStr());
3050 if (Reg) {
3051 RegNum = 0;
3052 RegWidth = 32;
3053 RegKind = IS_SPECIAL;
3054 Tokens.push_back(getToken());
3055 lex(); // skip register name
3056 }
3057 return Reg;
3058}
3059
3060MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3061 unsigned &RegNum,
3062 unsigned &RegWidth,
3063 SmallVectorImpl<AsmToken> &Tokens) {
3064 assert(isToken(AsmToken::Identifier));
3065 StringRef RegName = getTokenStr();
3066 auto Loc = getLoc();
3067
3068 const RegInfo *RI = getRegularRegInfo(RegName);
3069 if (!RI) {
3070 Error(Loc, "invalid register name");
3071 return MCRegister();
3072 }
3073
3074 Tokens.push_back(getToken());
3075 lex(); // skip register name
3076
3077 RegKind = RI->Kind;
3078 StringRef RegSuffix = RegName.substr(RI->Name.size());
3079 unsigned SubReg = NoSubRegister;
3080 if (!RegSuffix.empty()) {
3081 if (RegSuffix.consume_back(".l"))
3082 SubReg = AMDGPU::lo16;
3083 else if (RegSuffix.consume_back(".h"))
3084 SubReg = AMDGPU::hi16;
3085
3086 // Single 32-bit register: vXX.
3087 if (!getRegNum(RegSuffix, RegNum)) {
3088 Error(Loc, "invalid register index");
3089 return MCRegister();
3090 }
3091 RegWidth = 32;
3092 } else {
3093 // Range of registers: v[XX:YY]. ":YY" is optional.
3094 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3095 return MCRegister();
3096 }
3097
3098 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3099}
3100
3101MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3102 unsigned &RegNum, unsigned &RegWidth,
3103 SmallVectorImpl<AsmToken> &Tokens) {
3104 MCRegister Reg;
3105 auto ListLoc = getLoc();
3106
3107 if (!skipToken(AsmToken::LBrac,
3108 "expected a register or a list of registers")) {
3109 return MCRegister();
3110 }
3111
3112 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3113
3114 auto Loc = getLoc();
3115 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3116 return MCRegister();
3117 if (RegWidth != 32) {
3118 Error(Loc, "expected a single 32-bit register");
3119 return MCRegister();
3120 }
3121
3122 for (; trySkipToken(AsmToken::Comma); ) {
3123 RegisterKind NextRegKind;
3124 MCRegister NextReg;
3125 unsigned NextRegNum, NextRegWidth;
3126 Loc = getLoc();
3127
3128 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3129 NextRegNum, NextRegWidth,
3130 Tokens)) {
3131 return MCRegister();
3132 }
3133 if (NextRegWidth != 32) {
3134 Error(Loc, "expected a single 32-bit register");
3135 return MCRegister();
3136 }
3137 if (NextRegKind != RegKind) {
3138 Error(Loc, "registers in a list must be of the same kind");
3139 return MCRegister();
3140 }
3141 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3142 return MCRegister();
3143 }
3144
3145 if (!skipToken(AsmToken::RBrac,
3146 "expected a comma or a closing square bracket")) {
3147 return MCRegister();
3148 }
3149
3150 if (isRegularReg(RegKind))
3151 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3152
3153 return Reg;
3154}
3155
3156bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3157 MCRegister &Reg, unsigned &RegNum,
3158 unsigned &RegWidth,
3159 SmallVectorImpl<AsmToken> &Tokens) {
3160 auto Loc = getLoc();
3161 Reg = MCRegister();
3162
3163 if (isToken(AsmToken::Identifier)) {
3164 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3165 if (!Reg)
3166 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3167 } else {
3168 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3169 }
3170
3171 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3172 if (!Reg) {
3173 assert(Parser.hasPendingError());
3174 return false;
3175 }
3176
3177 if (!subtargetHasRegister(*TRI, Reg)) {
3178 if (Reg == AMDGPU::SGPR_NULL) {
3179 Error(Loc, "'null' operand is not supported on this GPU");
3180 } else {
3182 " register not available on this GPU");
3183 }
3184 return false;
3185 }
3186
3187 return true;
3188}
3189
3190bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3191 MCRegister &Reg, unsigned &RegNum,
3192 unsigned &RegWidth,
3193 bool RestoreOnFailure /*=false*/) {
3194 Reg = MCRegister();
3195
3197 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3198 if (RestoreOnFailure) {
3199 while (!Tokens.empty()) {
3200 getLexer().UnLex(Tokens.pop_back_val());
3201 }
3202 }
3203 return true;
3204 }
3205 return false;
3206}
3207
3208std::optional<StringRef>
3209AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3210 switch (RegKind) {
3211 case IS_VGPR:
3212 return StringRef(".amdgcn.next_free_vgpr");
3213 case IS_SGPR:
3214 return StringRef(".amdgcn.next_free_sgpr");
3215 default:
3216 return std::nullopt;
3217 }
3218}
3219
3220void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3221 auto SymbolName = getGprCountSymbolName(RegKind);
3222 assert(SymbolName && "initializing invalid register kind");
3223 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3225 Sym->setRedefinable(true);
3226}
3227
3228bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3229 unsigned DwordRegIndex,
3230 unsigned RegWidth) {
3231 // Symbols are only defined for GCN targets
3232 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3233 return true;
3234
3235 auto SymbolName = getGprCountSymbolName(RegKind);
3236 if (!SymbolName)
3237 return true;
3238 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3239
3240 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3241 int64_t OldCount;
3242
3243 if (!Sym->isVariable())
3244 return !Error(getLoc(),
3245 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3246 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3247 return !Error(
3248 getLoc(),
3249 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3250
3251 if (OldCount <= NewMax)
3253
3254 return true;
3255}
3256
3257std::unique_ptr<AMDGPUOperand>
3258AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3259 const auto &Tok = getToken();
3260 SMLoc StartLoc = Tok.getLoc();
3261 SMLoc EndLoc = Tok.getEndLoc();
3262 RegisterKind RegKind;
3263 MCRegister Reg;
3264 unsigned RegNum, RegWidth;
3265
3266 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3267 return nullptr;
3268 }
3269 if (isHsaAbi(getSTI())) {
3270 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3271 return nullptr;
3272 } else
3273 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3274 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3275}
3276
3277ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3278 bool HasSP3AbsModifier, LitModifier Lit) {
3279 // TODO: add syntactic sugar for 1/(2*PI)
3280
3281 if (isRegister() || isModifier())
3282 return ParseStatus::NoMatch;
3283
3284 if (Lit == LitModifier::None) {
3285 if (trySkipId("lit"))
3286 Lit = LitModifier::Lit;
3287 else if (trySkipId("lit64"))
3288 Lit = LitModifier::Lit64;
3289
3290 if (Lit != LitModifier::None) {
3291 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3292 return ParseStatus::Failure;
3293 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, Lit);
3294 if (S.isSuccess() &&
3295 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3296 return ParseStatus::Failure;
3297 return S;
3298 }
3299 }
3300
3301 const auto& Tok = getToken();
3302 const auto& NextTok = peekToken();
3303 bool IsReal = Tok.is(AsmToken::Real);
3304 SMLoc S = getLoc();
3305 bool Negate = false;
3306
3307 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3308 lex();
3309 IsReal = true;
3310 Negate = true;
3311 }
3312
3313 AMDGPUOperand::Modifiers Mods;
3314 Mods.Lit = Lit;
3315
3316 if (IsReal) {
3317 // Floating-point expressions are not supported.
3318 // Can only allow floating-point literals with an
3319 // optional sign.
3320
3321 StringRef Num = getTokenStr();
3322 lex();
3323
3324 APFloat RealVal(APFloat::IEEEdouble());
3325 auto roundMode = APFloat::rmNearestTiesToEven;
3326 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3327 return ParseStatus::Failure;
3328 if (Negate)
3329 RealVal.changeSign();
3330
3331 Operands.push_back(
3332 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3333 AMDGPUOperand::ImmTyNone, true));
3334 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3335 Op.setModifiers(Mods);
3336
3337 return ParseStatus::Success;
3338
3339 } else {
3340 int64_t IntVal;
3341 const MCExpr *Expr;
3342 SMLoc S = getLoc();
3343
3344 if (HasSP3AbsModifier) {
3345 // This is a workaround for handling expressions
3346 // as arguments of SP3 'abs' modifier, for example:
3347 // |1.0|
3348 // |-1|
3349 // |1+x|
3350 // This syntax is not compatible with syntax of standard
3351 // MC expressions (due to the trailing '|').
3352 SMLoc EndLoc;
3353 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3354 return ParseStatus::Failure;
3355 } else {
3356 if (Parser.parseExpression(Expr))
3357 return ParseStatus::Failure;
3358 }
3359
3360 if (Expr->evaluateAsAbsolute(IntVal)) {
3361 if (Lit == LitModifier::Lit && !isInt<32>(IntVal) && !isUInt<32>(IntVal))
3362 return Error(S, "literal value out of range");
3363 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3364 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3365 Op.setModifiers(Mods);
3366 } else {
3367 if (Lit != LitModifier::None)
3368 return ParseStatus::NoMatch;
3369 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3370 }
3371
3372 return ParseStatus::Success;
3373 }
3374
3375 return ParseStatus::NoMatch;
3376}
3377
3378ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3379 if (!isRegister())
3380 return ParseStatus::NoMatch;
3381
3382 if (auto R = parseRegister()) {
3383 assert(R->isReg());
3384 Operands.push_back(std::move(R));
3385 return ParseStatus::Success;
3386 }
3387 return ParseStatus::Failure;
3388}
3389
3390ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3391 bool HasSP3AbsMod, LitModifier Lit) {
3392 ParseStatus Res = parseReg(Operands);
3393 if (!Res.isNoMatch())
3394 return Res;
3395 if (isModifier())
3396 return ParseStatus::NoMatch;
3397 return parseImm(Operands, HasSP3AbsMod, Lit);
3398}
3399
3400bool
3401AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3402 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3403 const auto &str = Token.getString();
3404 return str == "abs" || str == "neg" || str == "sext";
3405 }
3406 return false;
3407}
3408
3409bool
3410AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3411 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3412}
3413
3414bool
3415AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3416 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3417}
3418
3419bool
3420AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3421 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3422}
3423
3424// Check if this is an operand modifier or an opcode modifier
3425// which may look like an expression but it is not. We should
3426// avoid parsing these modifiers as expressions. Currently
3427// recognized sequences are:
3428// |...|
3429// abs(...)
3430// neg(...)
3431// sext(...)
3432// -reg
3433// -|...|
3434// -abs(...)
3435// name:...
3436//
3437bool
3438AMDGPUAsmParser::isModifier() {
3439
3440 AsmToken Tok = getToken();
3441 AsmToken NextToken[2];
3442 peekTokens(NextToken);
3443
3444 return isOperandModifier(Tok, NextToken[0]) ||
3445 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3446 isOpcodeModifierWithVal(Tok, NextToken[0]);
3447}
3448
3449// Check if the current token is an SP3 'neg' modifier.
3450// Currently this modifier is allowed in the following context:
3451//
3452// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3453// 2. Before an 'abs' modifier: -abs(...)
3454// 3. Before an SP3 'abs' modifier: -|...|
3455//
3456// In all other cases "-" is handled as a part
3457// of an expression that follows the sign.
3458//
3459// Note: When "-" is followed by an integer literal,
3460// this is interpreted as integer negation rather
3461// than a floating-point NEG modifier applied to N.
3462// Beside being contr-intuitive, such use of floating-point
3463// NEG modifier would have resulted in different meaning
3464// of integer literals used with VOP1/2/C and VOP3,
3465// for example:
3466// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3467// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3468// Negative fp literals with preceding "-" are
3469// handled likewise for uniformity
3470//
3471bool
3472AMDGPUAsmParser::parseSP3NegModifier() {
3473
3474 AsmToken NextToken[2];
3475 peekTokens(NextToken);
3476
3477 if (isToken(AsmToken::Minus) &&
3478 (isRegister(NextToken[0], NextToken[1]) ||
3479 NextToken[0].is(AsmToken::Pipe) ||
3480 isId(NextToken[0], "abs"))) {
3481 lex();
3482 return true;
3483 }
3484
3485 return false;
3486}
3487
3488ParseStatus
3489AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3490 bool AllowImm) {
3491 bool Neg, SP3Neg;
3492 bool Abs, SP3Abs;
3493 SMLoc Loc;
3494
3495 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3496 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3497 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3498
3499 SP3Neg = parseSP3NegModifier();
3500
3501 Loc = getLoc();
3502 Neg = trySkipId("neg");
3503 if (Neg && SP3Neg)
3504 return Error(Loc, "expected register or immediate");
3505 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3506 return ParseStatus::Failure;
3507
3508 Abs = trySkipId("abs");
3509 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3510 return ParseStatus::Failure;
3511
3512 LitModifier Lit = LitModifier::None;
3513 if (trySkipId("lit")) {
3514 Lit = LitModifier::Lit;
3515 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3516 return ParseStatus::Failure;
3517 } else if (trySkipId("lit64")) {
3518 Lit = LitModifier::Lit64;
3519 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3520 return ParseStatus::Failure;
3521 if (!has64BitLiterals())
3522 return Error(Loc, "lit64 is not supported on this GPU");
3523 }
3524
3525 Loc = getLoc();
3526 SP3Abs = trySkipToken(AsmToken::Pipe);
3527 if (Abs && SP3Abs)
3528 return Error(Loc, "expected register or immediate");
3529
3530 ParseStatus Res;
3531 if (AllowImm) {
3532 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3533 } else {
3534 Res = parseReg(Operands);
3535 }
3536 if (!Res.isSuccess())
3537 return (SP3Neg || Neg || SP3Abs || Abs || Lit != LitModifier::None)
3539 : Res;
3540
3541 if (Lit != LitModifier::None && !Operands.back()->isImm())
3542 Error(Loc, "expected immediate with lit modifier");
3543
3544 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3545 return ParseStatus::Failure;
3546 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3547 return ParseStatus::Failure;
3548 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3549 return ParseStatus::Failure;
3550 if (Lit != LitModifier::None &&
3551 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3552 return ParseStatus::Failure;
3553
3554 AMDGPUOperand::Modifiers Mods;
3555 Mods.Abs = Abs || SP3Abs;
3556 Mods.Neg = Neg || SP3Neg;
3557 Mods.Lit = Lit;
3558
3559 if (Mods.hasFPModifiers() || Lit != LitModifier::None) {
3560 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3561 if (Op.isExpr())
3562 return Error(Op.getStartLoc(), "expected an absolute expression");
3563 Op.setModifiers(Mods);
3564 }
3565 return ParseStatus::Success;
3566}
3567
3568ParseStatus
3569AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3570 bool AllowImm) {
3571 bool Sext = trySkipId("sext");
3572 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3573 return ParseStatus::Failure;
3574
3575 ParseStatus Res;
3576 if (AllowImm) {
3577 Res = parseRegOrImm(Operands);
3578 } else {
3579 Res = parseReg(Operands);
3580 }
3581 if (!Res.isSuccess())
3582 return Sext ? ParseStatus::Failure : Res;
3583
3584 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3585 return ParseStatus::Failure;
3586
3587 AMDGPUOperand::Modifiers Mods;
3588 Mods.Sext = Sext;
3589
3590 if (Mods.hasIntModifiers()) {
3591 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3592 if (Op.isExpr())
3593 return Error(Op.getStartLoc(), "expected an absolute expression");
3594 Op.setModifiers(Mods);
3595 }
3596
3597 return ParseStatus::Success;
3598}
3599
3600ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3601 return parseRegOrImmWithFPInputMods(Operands, false);
3602}
3603
3604ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3605 return parseRegOrImmWithIntInputMods(Operands, false);
3606}
3607
3608ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3609 auto Loc = getLoc();
3610 if (trySkipId("off")) {
3611 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3612 AMDGPUOperand::ImmTyOff, false));
3613 return ParseStatus::Success;
3614 }
3615
3616 if (!isRegister())
3617 return ParseStatus::NoMatch;
3618
3619 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3620 if (Reg) {
3621 Operands.push_back(std::move(Reg));
3622 return ParseStatus::Success;
3623 }
3624
3625 return ParseStatus::Failure;
3626}
3627
3628unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3629 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3630
3631 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3632 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3633 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3634 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3635 return Match_InvalidOperand;
3636
3637 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3638 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3639 // v_mac_f32/16 allow only dst_sel == DWORD;
3640 auto OpNum =
3641 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3642 const auto &Op = Inst.getOperand(OpNum);
3643 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3644 return Match_InvalidOperand;
3645 }
3646 }
3647
3648 // Asm can first try to match VOPD or VOPD3. By failing early here with
3649 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3650 // Checking later during validateInstruction does not give a chance to retry
3651 // parsing as a different encoding.
3652 if (tryAnotherVOPDEncoding(Inst))
3653 return Match_InvalidOperand;
3654
3655 return Match_Success;
3656}
3657
3667
3668// What asm variants we should check
3669ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3670 if (isForcedDPP() && isForcedVOP3()) {
3671 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3672 return ArrayRef(Variants);
3673 }
3674 if (getForcedEncodingSize() == 32) {
3675 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3676 return ArrayRef(Variants);
3677 }
3678
3679 if (isForcedVOP3()) {
3680 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3681 return ArrayRef(Variants);
3682 }
3683
3684 if (isForcedSDWA()) {
3685 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3687 return ArrayRef(Variants);
3688 }
3689
3690 if (isForcedDPP()) {
3691 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3692 return ArrayRef(Variants);
3693 }
3694
3695 return getAllVariants();
3696}
3697
3698StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3699 if (isForcedDPP() && isForcedVOP3())
3700 return "e64_dpp";
3701
3702 if (getForcedEncodingSize() == 32)
3703 return "e32";
3704
3705 if (isForcedVOP3())
3706 return "e64";
3707
3708 if (isForcedSDWA())
3709 return "sdwa";
3710
3711 if (isForcedDPP())
3712 return "dpp";
3713
3714 return "";
3715}
3716
3717MCRegister
3718AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3719 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3720 for (MCPhysReg Reg : Desc.implicit_uses()) {
3721 switch (Reg) {
3722 case AMDGPU::FLAT_SCR:
3723 case AMDGPU::VCC:
3724 case AMDGPU::VCC_LO:
3725 case AMDGPU::VCC_HI:
3726 case AMDGPU::M0:
3727 return Reg;
3728 default:
3729 break;
3730 }
3731 }
3732 return MCRegister();
3733}
3734
3735// NB: This code is correct only when used to check constant
3736// bus limitations because GFX7 support no f16 inline constants.
3737// Note that there are no cases when a GFX7 opcode violates
3738// constant bus limitations due to the use of an f16 constant.
3739bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3740 unsigned OpIdx) const {
3741 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3742
3745 return false;
3746 }
3747
3748 const MCOperand &MO = Inst.getOperand(OpIdx);
3749
3750 int64_t Val = MO.isImm() ? MO.getImm() : getLitValue(MO.getExpr());
3751 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3752
3753 switch (OpSize) { // expected operand size
3754 case 8:
3755 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3756 case 4:
3757 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3758 case 2: {
3759 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3762 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3763
3767
3771
3774
3778
3781 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3782
3785 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3786
3788 return false;
3789
3790 llvm_unreachable("invalid operand type");
3791 }
3792 default:
3793 llvm_unreachable("invalid operand size");
3794 }
3795}
3796
3797unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3798 if (!isGFX10Plus())
3799 return 1;
3800
3801 switch (Opcode) {
3802 // 64-bit shift instructions can use only one scalar value input
3803 case AMDGPU::V_LSHLREV_B64_e64:
3804 case AMDGPU::V_LSHLREV_B64_gfx10:
3805 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3806 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3807 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3808 case AMDGPU::V_LSHRREV_B64_e64:
3809 case AMDGPU::V_LSHRREV_B64_gfx10:
3810 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3811 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3812 case AMDGPU::V_ASHRREV_I64_e64:
3813 case AMDGPU::V_ASHRREV_I64_gfx10:
3814 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3815 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3816 case AMDGPU::V_LSHL_B64_e64:
3817 case AMDGPU::V_LSHR_B64_e64:
3818 case AMDGPU::V_ASHR_I64_e64:
3819 return 1;
3820 default:
3821 return 2;
3822 }
3823}
3824
3825constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3827
3828// Get regular operand indices in the same order as specified
3829// in the instruction (but append mandatory literals to the end).
3831 bool AddMandatoryLiterals = false) {
3832
3833 int16_t ImmIdx =
3834 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3835
3836 if (isVOPD(Opcode)) {
3837 int16_t ImmXIdx =
3838 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3839
3840 return {getNamedOperandIdx(Opcode, OpName::src0X),
3841 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3842 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3843 getNamedOperandIdx(Opcode, OpName::src0Y),
3844 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3845 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3846 ImmXIdx,
3847 ImmIdx};
3848 }
3849
3850 return {getNamedOperandIdx(Opcode, OpName::src0),
3851 getNamedOperandIdx(Opcode, OpName::src1),
3852 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3853}
3854
3855bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3856 const MCOperand &MO = Inst.getOperand(OpIdx);
3857 if (MO.isImm())
3858 return !isInlineConstant(Inst, OpIdx);
3859 if (MO.isReg()) {
3860 auto Reg = MO.getReg();
3861 if (!Reg)
3862 return false;
3863 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3864 auto PReg = mc2PseudoReg(Reg);
3865 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3866 }
3867 return true;
3868}
3869
3870// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3871// Writelane is special in that it can use SGPR and M0 (which would normally
3872// count as using the constant bus twice - but in this case it is allowed since
3873// the lane selector doesn't count as a use of the constant bus). However, it is
3874// still required to abide by the 1 SGPR rule.
3875static bool checkWriteLane(const MCInst &Inst) {
3876 const unsigned Opcode = Inst.getOpcode();
3877 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3878 return false;
3879 const MCOperand &LaneSelOp = Inst.getOperand(2);
3880 if (!LaneSelOp.isReg())
3881 return false;
3882 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3883 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3884}
3885
3886bool AMDGPUAsmParser::validateConstantBusLimitations(
3887 const MCInst &Inst, const OperandVector &Operands) {
3888 const unsigned Opcode = Inst.getOpcode();
3889 const MCInstrDesc &Desc = MII.get(Opcode);
3890 MCRegister LastSGPR;
3891 unsigned ConstantBusUseCount = 0;
3892 unsigned NumLiterals = 0;
3893 unsigned LiteralSize;
3894
3895 if (!(Desc.TSFlags &
3898 !isVOPD(Opcode))
3899 return true;
3900
3901 if (checkWriteLane(Inst))
3902 return true;
3903
3904 // Check special imm operands (used by madmk, etc)
3905 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3906 ++NumLiterals;
3907 LiteralSize = 4;
3908 }
3909
3910 SmallDenseSet<MCRegister> SGPRsUsed;
3911 MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3912 if (SGPRUsed) {
3913 SGPRsUsed.insert(SGPRUsed);
3914 ++ConstantBusUseCount;
3915 }
3916
3917 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3918
3919 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3920
3921 for (int OpIdx : OpIndices) {
3922 if (OpIdx == -1)
3923 continue;
3924
3925 const MCOperand &MO = Inst.getOperand(OpIdx);
3926 if (usesConstantBus(Inst, OpIdx)) {
3927 if (MO.isReg()) {
3928 LastSGPR = mc2PseudoReg(MO.getReg());
3929 // Pairs of registers with a partial intersections like these
3930 // s0, s[0:1]
3931 // flat_scratch_lo, flat_scratch
3932 // flat_scratch_lo, flat_scratch_hi
3933 // are theoretically valid but they are disabled anyway.
3934 // Note that this code mimics SIInstrInfo::verifyInstruction
3935 if (SGPRsUsed.insert(LastSGPR).second) {
3936 ++ConstantBusUseCount;
3937 }
3938 } else { // Expression or a literal
3939
3940 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3941 continue; // special operand like VINTERP attr_chan
3942
3943 // An instruction may use only one literal.
3944 // This has been validated on the previous step.
3945 // See validateVOPLiteral.
3946 // This literal may be used as more than one operand.
3947 // If all these operands are of the same size,
3948 // this literal counts as one scalar value.
3949 // Otherwise it counts as 2 scalar values.
3950 // See "GFX10 Shader Programming", section 3.6.2.3.
3951
3953 if (Size < 4)
3954 Size = 4;
3955
3956 if (NumLiterals == 0) {
3957 NumLiterals = 1;
3958 LiteralSize = Size;
3959 } else if (LiteralSize != Size) {
3960 NumLiterals = 2;
3961 }
3962 }
3963 }
3964
3965 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3966 Error(getOperandLoc(Operands, OpIdx),
3967 "invalid operand (violates constant bus restrictions)");
3968 return false;
3969 }
3970 }
3971 return true;
3972}
3973
3974std::optional<unsigned>
3975AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
3976
3977 const unsigned Opcode = Inst.getOpcode();
3978 if (!isVOPD(Opcode))
3979 return {};
3980
3981 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3982
3983 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3984 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3985 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3986 ? Opr.getReg()
3987 : MCRegister();
3988 };
3989
3990 // On GFX1170+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3991 // source-cache.
3992 bool SkipSrc =
3993 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1170 ||
3994 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3995 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3996 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx13 ||
3997 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250 ||
3998 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx13;
3999 bool AllowSameVGPR = isGFX12Plus();
4000
4001 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
4002 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
4003 int I = getNamedOperandIdx(Opcode, OpName);
4004 const MCOperand &Op = Inst.getOperand(I);
4005 if (!Op.isImm())
4006 continue;
4007 int64_t Imm = Op.getImm();
4008 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
4009 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
4010 return (unsigned)I;
4011 }
4012
4013 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
4014 OpName::vsrc2Y, OpName::imm}) {
4015 int I = getNamedOperandIdx(Opcode, OpName);
4016 if (I == -1)
4017 continue;
4018 const MCOperand &Op = Inst.getOperand(I);
4019 if (Op.isImm())
4020 return (unsigned)I;
4021 }
4022 }
4023
4024 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4025 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4026 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
4027
4028 return InvalidCompOprIdx;
4029}
4030
4031bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
4032 const OperandVector &Operands) {
4033
4034 unsigned Opcode = Inst.getOpcode();
4035 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
4036
4037 if (AsVOPD3) {
4038 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
4039 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
4040 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4041 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
4042 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
4043 }
4044 }
4045
4046 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4047 if (!InvalidCompOprIdx.has_value())
4048 return true;
4049
4050 auto CompOprIdx = *InvalidCompOprIdx;
4051 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4052 auto ParsedIdx =
4053 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
4054 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4055 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4056
4057 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4058 if (CompOprIdx == VOPD::Component::DST) {
4059 if (AsVOPD3)
4060 Error(Loc, "dst registers must be distinct");
4061 else
4062 Error(Loc, "one dst register must be even and the other odd");
4063 } else {
4064 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4065 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
4066 " operands must use different VGPR banks");
4067 }
4068
4069 return false;
4070}
4071
4072// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4073// potentially used as VOPD3 with the same operands.
4074bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4075 // First check if it fits VOPD
4076 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4077 if (!InvalidCompOprIdx.has_value())
4078 return false;
4079
4080 // Then if it fits VOPD3
4081 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4082 if (InvalidCompOprIdx.has_value()) {
4083 // If failed operand is dst it is better to show error about VOPD3
4084 // instruction as it has more capabilities and error message will be
4085 // more informative. If the dst is not legal for VOPD3, then it is not
4086 // legal for VOPD either.
4087 if (*InvalidCompOprIdx == VOPD::Component::DST)
4088 return true;
4089
4090 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4091 // with a conflict in tied implicit src2 of fmac and no asm operand to
4092 // to point to.
4093 return false;
4094 }
4095 return true;
4096}
4097
4098// \returns true is a VOPD3 instruction can be also represented as a shorter
4099// VOPD encoding.
4100bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4101 const unsigned Opcode = Inst.getOpcode();
4102 const auto &II = getVOPDInstInfo(Opcode, &MII);
4103 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4104 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4105 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4106 return false;
4107
4108 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4109 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4110 // be parsed as VOPD which does not accept src2.
4111 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4112 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4113 return false;
4114
4115 // If any modifiers are set this cannot be VOPD.
4116 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4117 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4118 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4119 int I = getNamedOperandIdx(Opcode, OpName);
4120 if (I == -1)
4121 continue;
4122 if (Inst.getOperand(I).getImm())
4123 return false;
4124 }
4125
4126 return !tryVOPD3(Inst);
4127}
4128
4129// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4130// form but switch to VOPD3 otherwise.
4131bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4132 const unsigned Opcode = Inst.getOpcode();
4133 if (!isGFX1250Plus() || !isVOPD(Opcode))
4134 return false;
4135
4136 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4137 return tryVOPD(Inst);
4138 return tryVOPD3(Inst);
4139}
4140
4141bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4142
4143 const unsigned Opc = Inst.getOpcode();
4144 const MCInstrDesc &Desc = MII.get(Opc);
4145
4146 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4147 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4148 assert(ClampIdx != -1);
4149 return Inst.getOperand(ClampIdx).getImm() == 0;
4150 }
4151
4152 return true;
4153}
4154
4157
4158bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) {
4159
4160 const unsigned Opc = Inst.getOpcode();
4161 const MCInstrDesc &Desc = MII.get(Opc);
4162
4163 if ((Desc.TSFlags & MIMGFlags) == 0)
4164 return true;
4165
4166 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4167 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4168 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4169
4170 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4171 return true;
4172
4173 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4174 return true;
4175
4176 unsigned VDataSize = getRegOperandSize(Desc, VDataIdx);
4177 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4178 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4179 if (DMask == 0)
4180 DMask = 1;
4181
4182 bool IsPackedD16 = false;
4183 unsigned DataSize =
4184 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4185 if (hasPackedD16()) {
4186 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4187 IsPackedD16 = D16Idx >= 0;
4188 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4189 DataSize = (DataSize + 1) / 2;
4190 }
4191
4192 if ((VDataSize / 4) == DataSize + TFESize)
4193 return true;
4194
4195 StringRef Modifiers;
4196 if (isGFX90A())
4197 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4198 else
4199 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4200
4201 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4202 return false;
4203}
4204
4205bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
4206 const unsigned Opc = Inst.getOpcode();
4207 const MCInstrDesc &Desc = MII.get(Opc);
4208
4209 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4210 return true;
4211
4212 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4213
4214 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4216 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4217 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4218 ? AMDGPU::OpName::srsrc
4219 : AMDGPU::OpName::rsrc;
4220 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4221 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4222 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4223
4224 assert(VAddr0Idx != -1);
4225 assert(SrsrcIdx != -1);
4226 assert(SrsrcIdx > VAddr0Idx);
4227
4228 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4229 if (BaseOpcode->BVH) {
4230 if (IsA16 == BaseOpcode->A16)
4231 return true;
4232 Error(IDLoc, "image address size does not match a16");
4233 return false;
4234 }
4235
4236 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4237 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4238 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4239 unsigned ActualAddrSize =
4240 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(Desc, VAddr0Idx) / 4;
4241
4242 unsigned ExpectedAddrSize =
4243 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4244
4245 if (IsNSA) {
4246 if (hasPartialNSAEncoding() &&
4247 ExpectedAddrSize >
4249 int VAddrLastIdx = SrsrcIdx - 1;
4250 unsigned VAddrLastSize = getRegOperandSize(Desc, VAddrLastIdx) / 4;
4251
4252 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4253 }
4254 } else {
4255 if (ExpectedAddrSize > 12)
4256 ExpectedAddrSize = 16;
4257
4258 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4259 // This provides backward compatibility for assembly created
4260 // before 160b/192b/224b types were directly supported.
4261 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4262 return true;
4263 }
4264
4265 if (ActualAddrSize == ExpectedAddrSize)
4266 return true;
4267
4268 Error(IDLoc, "image address size does not match dim and a16");
4269 return false;
4270}
4271
4272bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4273
4274 const unsigned Opc = Inst.getOpcode();
4275 const MCInstrDesc &Desc = MII.get(Opc);
4276
4277 if ((Desc.TSFlags & MIMGFlags) == 0)
4278 return true;
4279 if (!Desc.mayLoad() || !Desc.mayStore())
4280 return true; // Not atomic
4281
4282 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4283 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4284
4285 // This is an incomplete check because image_atomic_cmpswap
4286 // may only use 0x3 and 0xf while other atomic operations
4287 // may use 0x1 and 0x3. However these limitations are
4288 // verified when we check that dmask matches dst size.
4289 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4290}
4291
4292bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4293
4294 const unsigned Opc = Inst.getOpcode();
4295 const MCInstrDesc &Desc = MII.get(Opc);
4296
4297 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4298 return true;
4299
4300 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4301 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4302
4303 // GATHER4 instructions use dmask in a different fashion compared to
4304 // other MIMG instructions. The only useful DMASK values are
4305 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4306 // (red,red,red,red) etc.) The ISA document doesn't mention
4307 // this.
4308 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4309}
4310
4311bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4312 const OperandVector &Operands) {
4313 if (!isGFX10Plus())
4314 return true;
4315
4316 const unsigned Opc = Inst.getOpcode();
4317 const MCInstrDesc &Desc = MII.get(Opc);
4318
4319 if ((Desc.TSFlags & MIMGFlags) == 0)
4320 return true;
4321
4322 // image_bvh_intersect_ray instructions do not have dim
4324 return true;
4325
4326 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4327 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4328 if (Op.isDim())
4329 return true;
4330 }
4331 return false;
4332}
4333
4334bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4335 const unsigned Opc = Inst.getOpcode();
4336 const MCInstrDesc &Desc = MII.get(Opc);
4337
4338 if ((Desc.TSFlags & MIMGFlags) == 0)
4339 return true;
4340
4341 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4342 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4344
4345 if (!BaseOpcode->MSAA)
4346 return true;
4347
4348 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4349 assert(DimIdx != -1);
4350
4351 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4352 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4353
4354 return DimInfo->MSAA;
4355}
4356
4357static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4358{
4359 switch (Opcode) {
4360 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4361 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4362 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4363 return true;
4364 default:
4365 return false;
4366 }
4367}
4368
4369// movrels* opcodes should only allow VGPRS as src0.
4370// This is specified in .td description for vop1/vop3,
4371// but sdwa is handled differently. See isSDWAOperand.
4372bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4373 const OperandVector &Operands) {
4374
4375 const unsigned Opc = Inst.getOpcode();
4376 const MCInstrDesc &Desc = MII.get(Opc);
4377
4378 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4379 return true;
4380
4381 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4382 assert(Src0Idx != -1);
4383
4384 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4385 if (Src0.isReg()) {
4386 auto Reg = mc2PseudoReg(Src0.getReg());
4387 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4388 if (!isSGPR(Reg, TRI))
4389 return true;
4390 }
4391
4392 Error(getOperandLoc(Operands, Src0Idx), "source operand must be a VGPR");
4393 return false;
4394}
4395
4396bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4397 const OperandVector &Operands) {
4398
4399 const unsigned Opc = Inst.getOpcode();
4400
4401 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4402 return true;
4403
4404 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4405 assert(Src0Idx != -1);
4406
4407 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4408 if (!Src0.isReg())
4409 return true;
4410
4411 auto Reg = mc2PseudoReg(Src0.getReg());
4412 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4413 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4414 Error(getOperandLoc(Operands, Src0Idx),
4415 "source operand must be either a VGPR or an inline constant");
4416 return false;
4417 }
4418
4419 return true;
4420}
4421
4422bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4423 const OperandVector &Operands) {
4424 unsigned Opcode = Inst.getOpcode();
4425 const MCInstrDesc &Desc = MII.get(Opcode);
4426
4427 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4428 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4429 return true;
4430
4431 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4432 if (Src2Idx == -1)
4433 return true;
4434
4435 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4436 Error(getOperandLoc(Operands, Src2Idx),
4437 "inline constants are not allowed for this operand");
4438 return false;
4439 }
4440
4441 return true;
4442}
4443
4444bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4445 const OperandVector &Operands) {
4446 const unsigned Opc = Inst.getOpcode();
4447 const MCInstrDesc &Desc = MII.get(Opc);
4448
4449 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4450 return true;
4451
4452 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4453 if (BlgpIdx != -1) {
4454 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4455 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4456
4457 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4458 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4459
4460 // Validate the correct register size was used for the floating point
4461 // format operands
4462
4463 bool Success = true;
4464 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4465 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4466 Error(getOperandLoc(Operands, Src0Idx),
4467 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4468 Success = false;
4469 }
4470
4471 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4472 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4473 Error(getOperandLoc(Operands, Src1Idx),
4474 "wrong register tuple size for blgp value " + Twine(BLGP));
4475 Success = false;
4476 }
4477
4478 return Success;
4479 }
4480 }
4481
4482 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4483 if (Src2Idx == -1)
4484 return true;
4485
4486 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4487 if (!Src2.isReg())
4488 return true;
4489
4490 MCRegister Src2Reg = Src2.getReg();
4491 MCRegister DstReg = Inst.getOperand(0).getReg();
4492 if (Src2Reg == DstReg)
4493 return true;
4494
4495 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4496 if (TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[0], HwMode))
4497 .getSizeInBits() <= 128)
4498 return true;
4499
4500 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4501 Error(getOperandLoc(Operands, Src2Idx),
4502 "source 2 operand must not partially overlap with dst");
4503 return false;
4504 }
4505
4506 return true;
4507}
4508
4509bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4510 switch (Inst.getOpcode()) {
4511 default:
4512 return true;
4513 case V_DIV_SCALE_F32_gfx6_gfx7:
4514 case V_DIV_SCALE_F32_vi:
4515 case V_DIV_SCALE_F32_gfx10:
4516 case V_DIV_SCALE_F64_gfx6_gfx7:
4517 case V_DIV_SCALE_F64_vi:
4518 case V_DIV_SCALE_F64_gfx10:
4519 break;
4520 }
4521
4522 // TODO: Check that src0 = src1 or src2.
4523
4524 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4525 AMDGPU::OpName::src2_modifiers,
4526 AMDGPU::OpName::src2_modifiers}) {
4527 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4528 .getImm() &
4530 return false;
4531 }
4532 }
4533
4534 return true;
4535}
4536
4537bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4538
4539 const unsigned Opc = Inst.getOpcode();
4540 const MCInstrDesc &Desc = MII.get(Opc);
4541
4542 if ((Desc.TSFlags & MIMGFlags) == 0)
4543 return true;
4544
4545 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4546 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4547 if (isCI() || isSI())
4548 return false;
4549 }
4550
4551 return true;
4552}
4553
4554bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4555 const unsigned Opc = Inst.getOpcode();
4556 const MCInstrDesc &Desc = MII.get(Opc);
4557
4558 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4559 return true;
4560
4561 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4562
4563 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4564}
4565
4566static bool IsRevOpcode(const unsigned Opcode)
4567{
4568 switch (Opcode) {
4569 case AMDGPU::V_SUBREV_F32_e32:
4570 case AMDGPU::V_SUBREV_F32_e64:
4571 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4572 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4573 case AMDGPU::V_SUBREV_F32_e32_vi:
4574 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4575 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4576 case AMDGPU::V_SUBREV_F32_e64_vi:
4577
4578 case AMDGPU::V_SUBREV_CO_U32_e32:
4579 case AMDGPU::V_SUBREV_CO_U32_e64:
4580 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4581 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4582
4583 case AMDGPU::V_SUBBREV_U32_e32:
4584 case AMDGPU::V_SUBBREV_U32_e64:
4585 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4586 case AMDGPU::V_SUBBREV_U32_e32_vi:
4587 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4588 case AMDGPU::V_SUBBREV_U32_e64_vi:
4589
4590 case AMDGPU::V_SUBREV_U32_e32:
4591 case AMDGPU::V_SUBREV_U32_e64:
4592 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4593 case AMDGPU::V_SUBREV_U32_e32_vi:
4594 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4595 case AMDGPU::V_SUBREV_U32_e64_vi:
4596
4597 case AMDGPU::V_SUBREV_F16_e32:
4598 case AMDGPU::V_SUBREV_F16_e64:
4599 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4600 case AMDGPU::V_SUBREV_F16_e32_vi:
4601 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4602 case AMDGPU::V_SUBREV_F16_e64_vi:
4603
4604 case AMDGPU::V_SUBREV_U16_e32:
4605 case AMDGPU::V_SUBREV_U16_e64:
4606 case AMDGPU::V_SUBREV_U16_e32_vi:
4607 case AMDGPU::V_SUBREV_U16_e64_vi:
4608
4609 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4610 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4611 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4612
4613 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4614 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4615
4616 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4617 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4618
4619 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4620 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4621
4622 case AMDGPU::V_LSHRREV_B32_e32:
4623 case AMDGPU::V_LSHRREV_B32_e64:
4624 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4625 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4626 case AMDGPU::V_LSHRREV_B32_e32_vi:
4627 case AMDGPU::V_LSHRREV_B32_e64_vi:
4628 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4629 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4630
4631 case AMDGPU::V_ASHRREV_I32_e32:
4632 case AMDGPU::V_ASHRREV_I32_e64:
4633 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4634 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4635 case AMDGPU::V_ASHRREV_I32_e32_vi:
4636 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4637 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4638 case AMDGPU::V_ASHRREV_I32_e64_vi:
4639
4640 case AMDGPU::V_LSHLREV_B32_e32:
4641 case AMDGPU::V_LSHLREV_B32_e64:
4642 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4643 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4644 case AMDGPU::V_LSHLREV_B32_e32_vi:
4645 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4646 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4647 case AMDGPU::V_LSHLREV_B32_e64_vi:
4648
4649 case AMDGPU::V_LSHLREV_B16_e32:
4650 case AMDGPU::V_LSHLREV_B16_e64:
4651 case AMDGPU::V_LSHLREV_B16_e32_vi:
4652 case AMDGPU::V_LSHLREV_B16_e64_vi:
4653 case AMDGPU::V_LSHLREV_B16_gfx10:
4654
4655 case AMDGPU::V_LSHRREV_B16_e32:
4656 case AMDGPU::V_LSHRREV_B16_e64:
4657 case AMDGPU::V_LSHRREV_B16_e32_vi:
4658 case AMDGPU::V_LSHRREV_B16_e64_vi:
4659 case AMDGPU::V_LSHRREV_B16_gfx10:
4660
4661 case AMDGPU::V_ASHRREV_I16_e32:
4662 case AMDGPU::V_ASHRREV_I16_e64:
4663 case AMDGPU::V_ASHRREV_I16_e32_vi:
4664 case AMDGPU::V_ASHRREV_I16_e64_vi:
4665 case AMDGPU::V_ASHRREV_I16_gfx10:
4666
4667 case AMDGPU::V_LSHLREV_B64_e64:
4668 case AMDGPU::V_LSHLREV_B64_gfx10:
4669 case AMDGPU::V_LSHLREV_B64_vi:
4670
4671 case AMDGPU::V_LSHRREV_B64_e64:
4672 case AMDGPU::V_LSHRREV_B64_gfx10:
4673 case AMDGPU::V_LSHRREV_B64_vi:
4674
4675 case AMDGPU::V_ASHRREV_I64_e64:
4676 case AMDGPU::V_ASHRREV_I64_gfx10:
4677 case AMDGPU::V_ASHRREV_I64_vi:
4678
4679 case AMDGPU::V_PK_LSHLREV_B16:
4680 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4681 case AMDGPU::V_PK_LSHLREV_B16_vi:
4682
4683 case AMDGPU::V_PK_LSHRREV_B16:
4684 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4685 case AMDGPU::V_PK_LSHRREV_B16_vi:
4686 case AMDGPU::V_PK_ASHRREV_I16:
4687 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4688 case AMDGPU::V_PK_ASHRREV_I16_vi:
4689 return true;
4690 default:
4691 return false;
4692 }
4693}
4694
4695bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4696 const OperandVector &Operands) {
4697 using namespace SIInstrFlags;
4698 const unsigned Opcode = Inst.getOpcode();
4699 const MCInstrDesc &Desc = MII.get(Opcode);
4700
4701 // lds_direct register is defined so that it can be used
4702 // with 9-bit operands only. Ignore encodings which do not accept these.
4703 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4704 if ((Desc.TSFlags & Enc) == 0)
4705 return true;
4706
4707 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4708 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4709 if (SrcIdx == -1)
4710 break;
4711 const auto &Src = Inst.getOperand(SrcIdx);
4712 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4713
4714 if (isGFX90A() || isGFX11Plus()) {
4715 Error(getOperandLoc(Operands, SrcIdx),
4716 "lds_direct is not supported on this GPU");
4717 return false;
4718 }
4719
4720 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) {
4721 Error(getOperandLoc(Operands, SrcIdx),
4722 "lds_direct cannot be used with this instruction");
4723 return false;
4724 }
4725
4726 if (SrcName != OpName::src0) {
4727 Error(getOperandLoc(Operands, SrcIdx),
4728 "lds_direct may be used as src0 only");
4729 return false;
4730 }
4731 }
4732 }
4733
4734 return true;
4735}
4736
4737SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4738 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4739 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4740 if (Op.isFlatOffset())
4741 return Op.getStartLoc();
4742 }
4743 return getLoc();
4744}
4745
4746bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4747 const OperandVector &Operands) {
4748 auto Opcode = Inst.getOpcode();
4749 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4750 if (OpNum == -1)
4751 return true;
4752
4753 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4754 if ((TSFlags & SIInstrFlags::FLAT))
4755 return validateFlatOffset(Inst, Operands);
4756
4757 if ((TSFlags & SIInstrFlags::SMRD))
4758 return validateSMEMOffset(Inst, Operands);
4759
4760 const auto &Op = Inst.getOperand(OpNum);
4761 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4762 if (isGFX12Plus() &&
4763 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4764 const unsigned OffsetSize = 24;
4765 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
4766 Error(getFlatOffsetLoc(Operands),
4767 Twine("expected a ") + Twine(OffsetSize - 1) +
4768 "-bit unsigned offset for buffer ops");
4769 return false;
4770 }
4771 } else {
4772 const unsigned OffsetSize = 16;
4773 if (!isUIntN(OffsetSize, Op.getImm())) {
4774 Error(getFlatOffsetLoc(Operands),
4775 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4776 return false;
4777 }
4778 }
4779 return true;
4780}
4781
4782bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4783 const OperandVector &Operands) {
4784 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4785 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4786 return true;
4787
4788 auto Opcode = Inst.getOpcode();
4789 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4790 assert(OpNum != -1);
4791
4792 const auto &Op = Inst.getOperand(OpNum);
4793 if (!hasFlatOffsets() && Op.getImm() != 0) {
4794 Error(getFlatOffsetLoc(Operands),
4795 "flat offset modifier is not supported on this GPU");
4796 return false;
4797 }
4798
4799 // For pre-GFX12 FLAT instructions the offset must be positive;
4800 // MSB is ignored and forced to zero.
4801 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4802 bool AllowNegative =
4804 isGFX12Plus();
4805 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4806 Error(getFlatOffsetLoc(Operands),
4807 Twine("expected a ") +
4808 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4809 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4810 return false;
4811 }
4812
4813 return true;
4814}
4815
4816SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4817 // Start with second operand because SMEM Offset cannot be dst or src0.
4818 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4819 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4820 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4821 return Op.getStartLoc();
4822 }
4823 return getLoc();
4824}
4825
4826bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4827 const OperandVector &Operands) {
4828 if (isCI() || isSI())
4829 return true;
4830
4831 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4832 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4833 return true;
4834
4835 auto Opcode = Inst.getOpcode();
4836 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4837 if (OpNum == -1)
4838 return true;
4839
4840 const auto &Op = Inst.getOperand(OpNum);
4841 if (!Op.isImm())
4842 return true;
4843
4844 uint64_t Offset = Op.getImm();
4845 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4848 return true;
4849
4850 Error(getSMEMOffsetLoc(Operands),
4851 isGFX12Plus() && IsBuffer
4852 ? "expected a 23-bit unsigned offset for buffer ops"
4853 : isGFX12Plus() ? "expected a 24-bit signed offset"
4854 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4855 : "expected a 21-bit signed offset");
4856
4857 return false;
4858}
4859
4860bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4861 const OperandVector &Operands) {
4862 unsigned Opcode = Inst.getOpcode();
4863 const MCInstrDesc &Desc = MII.get(Opcode);
4864 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4865 return true;
4866
4867 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4868 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4869
4870 const int OpIndices[] = { Src0Idx, Src1Idx };
4871
4872 unsigned NumExprs = 0;
4873 unsigned NumLiterals = 0;
4874 int64_t LiteralValue;
4875
4876 for (int OpIdx : OpIndices) {
4877 if (OpIdx == -1) break;
4878
4879 const MCOperand &MO = Inst.getOperand(OpIdx);
4880 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4882 bool IsLit = false;
4883 std::optional<int64_t> Imm;
4884 if (MO.isImm()) {
4885 Imm = MO.getImm();
4886 } else if (MO.isExpr()) {
4887 if (isLitExpr(MO.getExpr())) {
4888 IsLit = true;
4889 Imm = getLitValue(MO.getExpr());
4890 }
4891 } else {
4892 continue;
4893 }
4894
4895 if (!Imm.has_value()) {
4896 ++NumExprs;
4897 } else if (!isInlineConstant(Inst, OpIdx)) {
4898 auto OpType = static_cast<AMDGPU::OperandType>(
4899 Desc.operands()[OpIdx].OperandType);
4900 int64_t Value = encode32BitLiteral(*Imm, OpType, IsLit);
4901 if (NumLiterals == 0 || LiteralValue != Value) {
4903 ++NumLiterals;
4904 }
4905 }
4906 }
4907 }
4908
4909 if (NumLiterals + NumExprs <= 1)
4910 return true;
4911
4912 Error(getOperandLoc(Operands, Src1Idx),
4913 "only one unique literal operand is allowed");
4914 return false;
4915}
4916
4917bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4918 const unsigned Opc = Inst.getOpcode();
4919 if (isPermlane16(Opc)) {
4920 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4921 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4922
4923 if (OpSel & ~3)
4924 return false;
4925 }
4926
4927 uint64_t TSFlags = MII.get(Opc).TSFlags;
4928
4929 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4930 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4931 if (OpSelIdx != -1) {
4932 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4933 return false;
4934 }
4935 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4936 if (OpSelHiIdx != -1) {
4937 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4938 return false;
4939 }
4940 }
4941
4942 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4943 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4944 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4945 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4946 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4947 if (OpSel & 3)
4948 return false;
4949 }
4950
4951 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4952 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4953 // the first SGPR and use it for both the low and high operations.
4954 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4955 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4956 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4957 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4958 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4959
4960 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4961 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4962 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4963 unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4964
4965 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4966
4967 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
4968 unsigned Mask = 1U << Index;
4969 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
4970 };
4971
4972 if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
4973 !VerifyOneSGPR(/*Index=*/0))
4974 return false;
4975 if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
4976 !VerifyOneSGPR(/*Index=*/1))
4977 return false;
4978
4979 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4980 if (Src2Idx != -1) {
4981 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4982 if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
4983 !VerifyOneSGPR(/*Index=*/2))
4984 return false;
4985 }
4986 }
4987
4988 return true;
4989}
4990
4991bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4992 if (!hasTrue16Insts())
4993 return true;
4994 const MCRegisterInfo *MRI = getMRI();
4995 const unsigned Opc = Inst.getOpcode();
4996 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4997 if (OpSelIdx == -1)
4998 return true;
4999 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
5000 // If the value is 0 we could have a default OpSel Operand, so conservatively
5001 // allow it.
5002 if (OpSelOpValue == 0)
5003 return true;
5004 unsigned OpCount = 0;
5005 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
5006 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
5007 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
5008 if (OpIdx == -1)
5009 continue;
5010 const MCOperand &Op = Inst.getOperand(OpIdx);
5011 if (Op.isReg() &&
5012 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
5013 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
5014 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
5015 if (OpSelOpIsHi != VGPRSuffixIsHi)
5016 return false;
5017 }
5018 ++OpCount;
5019 }
5020
5021 return true;
5022}
5023
5024bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
5025 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
5026
5027 const unsigned Opc = Inst.getOpcode();
5028 uint64_t TSFlags = MII.get(Opc).TSFlags;
5029
5030 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
5031 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
5032 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
5033 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
5034 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
5035 !(TSFlags & SIInstrFlags::IsSWMMAC))
5036 return true;
5037
5038 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
5039 if (NegIdx == -1)
5040 return true;
5041
5042 unsigned Neg = Inst.getOperand(NegIdx).getImm();
5043
5044 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
5045 // on some src operands but not allowed on other.
5046 // It is convenient that such instructions don't have src_modifiers operand
5047 // for src operands that don't allow neg because they also don't allow opsel.
5048
5049 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5050 AMDGPU::OpName::src1_modifiers,
5051 AMDGPU::OpName::src2_modifiers};
5052
5053 for (unsigned i = 0; i < 3; ++i) {
5054 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
5055 if (Neg & (1 << i))
5056 return false;
5057 }
5058 }
5059
5060 return true;
5061}
5062
5063bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
5064 const OperandVector &Operands) {
5065 const unsigned Opc = Inst.getOpcode();
5066 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
5067 if (DppCtrlIdx >= 0) {
5068 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
5069
5070 if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
5071 AMDGPU::isDPALU_DPP(MII.get(Opc), MII, getSTI())) {
5072 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5073 // only on GFX12.
5074 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5075 Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
5076 : "DP ALU dpp only supports row_newbcast");
5077 return false;
5078 }
5079 }
5080
5081 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
5082 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5083
5084 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
5085 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5086 if (Src1Idx >= 0) {
5087 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5088 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5089 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
5090 Error(getOperandLoc(Operands, Src1Idx),
5091 "invalid operand for instruction");
5092 return false;
5093 }
5094 if (Src1.isImm()) {
5095 Error(getInstLoc(Operands),
5096 "src1 immediate operand invalid for instruction");
5097 return false;
5098 }
5099 }
5100 }
5101
5102 return true;
5103}
5104
5105// Check if VCC register matches wavefront size
5106bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5107 return (Reg == AMDGPU::VCC && isWave64()) ||
5108 (Reg == AMDGPU::VCC_LO && isWave32());
5109}
5110
5111// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5112bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5113 const OperandVector &Operands) {
5114 unsigned Opcode = Inst.getOpcode();
5115 const MCInstrDesc &Desc = MII.get(Opcode);
5116 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5117 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5118 !HasMandatoryLiteral && !isVOPD(Opcode))
5119 return true;
5120
5121 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
5122
5123 std::optional<unsigned> LiteralOpIdx;
5124 std::optional<uint64_t> LiteralValue;
5125
5126 for (int OpIdx : OpIndices) {
5127 if (OpIdx == -1)
5128 continue;
5129
5130 const MCOperand &MO = Inst.getOperand(OpIdx);
5131 if (!MO.isImm() && !MO.isExpr())
5132 continue;
5133 if (!isSISrcOperand(Desc, OpIdx))
5134 continue;
5135
5136 std::optional<int64_t> Imm;
5137 if (MO.isImm())
5138 Imm = MO.getImm();
5139 else if (MO.isExpr() && isLitExpr(MO.getExpr()))
5140 Imm = getLitValue(MO.getExpr());
5141
5142 bool IsAnotherLiteral = false;
5143 bool IsForcedLit = findMCOperand(Operands, OpIdx).isForcedLit();
5144 bool IsForcedLit64 = findMCOperand(Operands, OpIdx).isForcedLit64();
5145 if (!Imm.has_value()) {
5146 // Literal value not known, so we conservately assume it's different.
5147 IsAnotherLiteral = true;
5148 } else if (IsForcedLit || IsForcedLit64 || !isInlineConstant(Inst, OpIdx)) {
5149 uint64_t Value = *Imm;
5150 bool IsForcedFP64 =
5151 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5153 HasMandatoryLiteral);
5154 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
5155 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5156 bool IsValid32Op =
5157 IsForcedLit || AMDGPU::isValid32BitLiteral(Value, IsFP64);
5158
5159 if (((!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5160 !IsForcedFP64) ||
5161 (IsForcedLit64 && !HasMandatoryLiteral)) &&
5162 (!has64BitLiterals() || Desc.getSize() != 4)) {
5163 Error(getOperandLoc(Operands, OpIdx),
5164 "invalid operand for instruction");
5165 return false;
5166 }
5167
5168 // Only src0 can use lit64 in VOP* encoding.
5169 if (!IsForcedFP64 && (IsForcedLit64 || !IsValid32Op) &&
5170 OpIdx != getNamedOperandIdx(Opcode, OpName::src0)) {
5171 Error(getOperandLoc(Operands, OpIdx),
5172 "invalid operand for instruction");
5173 return false;
5174 }
5175
5176 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5177 Value = Hi_32(Value);
5178
5179 IsAnotherLiteral = !LiteralValue || *LiteralValue != Value;
5181 }
5182
5183 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5184 !getFeatureBits()[FeatureVOP3Literal]) {
5185 Error(getOperandLoc(Operands, OpIdx),
5186 "literal operands are not supported");
5187 return false;
5188 }
5189
5190 if (LiteralOpIdx && IsAnotherLiteral) {
5191 Error(getLaterLoc(getOperandLoc(Operands, OpIdx),
5192 getOperandLoc(Operands, *LiteralOpIdx)),
5193 "only one unique literal operand is allowed");
5194 return false;
5195 }
5196
5197 if (IsAnotherLiteral)
5198 LiteralOpIdx = OpIdx;
5199 }
5200
5201 return true;
5202}
5203
5204// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5205static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5206 const MCRegisterInfo *MRI) {
5207 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5208 if (OpIdx < 0)
5209 return -1;
5210
5211 const MCOperand &Op = Inst.getOperand(OpIdx);
5212 if (!Op.isReg())
5213 return -1;
5214
5215 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5216 auto Reg = Sub ? Sub : Op.getReg();
5217 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5218 return AGPR32.contains(Reg) ? 1 : 0;
5219}
5220
5221bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5222 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5223 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5225 SIInstrFlags::DS)) == 0)
5226 return true;
5227
5228 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5229 ? AMDGPU::OpName::data0
5230 : AMDGPU::OpName::vdata;
5231
5232 const MCRegisterInfo *MRI = getMRI();
5233 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5234 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5235
5236 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5237 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5238 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5239 return false;
5240 }
5241
5242 auto FB = getFeatureBits();
5243 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5244 if (DataAreg < 0 || DstAreg < 0)
5245 return true;
5246 return DstAreg == DataAreg;
5247 }
5248
5249 return DstAreg < 1 && DataAreg < 1;
5250}
5251
5252bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5253 auto FB = getFeatureBits();
5254 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5255 return true;
5256
5257 unsigned Opc = Inst.getOpcode();
5258 const MCRegisterInfo *MRI = getMRI();
5259 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5260 // unaligned VGPR. All others only allow even aligned VGPRs.
5261 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5262 return true;
5263
5264 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5265 switch (Opc) {
5266 default:
5267 break;
5268 case AMDGPU::DS_LOAD_TR6_B96:
5269 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5270 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5271 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5272 return true;
5273 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5274 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5275 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5276 // allows unaligned VGPR for vdst, but other operands still only allow
5277 // even aligned VGPRs.
5278 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
5279 if (VAddrIdx != -1) {
5280 const MCOperand &Op = Inst.getOperand(VAddrIdx);
5281 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5282 if ((Sub - AMDGPU::VGPR0) & 1)
5283 return false;
5284 }
5285 return true;
5286 }
5287 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5288 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5289 return true;
5290 }
5291 }
5292
5293 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5294 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5295 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5296 const MCOperand &Op = Inst.getOperand(I);
5297 if (!Op.isReg())
5298 continue;
5299
5300 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5301 if (!Sub)
5302 continue;
5303
5304 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5305 return false;
5306 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5307 return false;
5308 }
5309
5310 return true;
5311}
5312
5313SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5314 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5315 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5316 if (Op.isBLGP())
5317 return Op.getStartLoc();
5318 }
5319 return SMLoc();
5320}
5321
5322bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5323 const OperandVector &Operands) {
5324 unsigned Opc = Inst.getOpcode();
5325 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5326 if (BlgpIdx == -1)
5327 return true;
5328 SMLoc BLGPLoc = getBLGPLoc(Operands);
5329 if (!BLGPLoc.isValid())
5330 return true;
5331 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5332 auto FB = getFeatureBits();
5333 bool UsesNeg = false;
5334 if (FB[AMDGPU::FeatureGFX940Insts]) {
5335 switch (Opc) {
5336 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5337 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5338 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5339 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5340 UsesNeg = true;
5341 }
5342 }
5343
5344 if (IsNeg == UsesNeg)
5345 return true;
5346
5347 Error(BLGPLoc,
5348 UsesNeg ? "invalid modifier: blgp is not supported"
5349 : "invalid modifier: neg is not supported");
5350
5351 return false;
5352}
5353
5354bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5355 const OperandVector &Operands) {
5356 if (!isGFX11Plus())
5357 return true;
5358
5359 unsigned Opc = Inst.getOpcode();
5360 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5361 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5362 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5363 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5364 return true;
5365
5366 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5367 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5368 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5369 if (Reg == AMDGPU::SGPR_NULL)
5370 return true;
5371
5372 Error(getOperandLoc(Operands, Src0Idx), "src0 must be null");
5373 return false;
5374}
5375
5376bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5377 const OperandVector &Operands) {
5378 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5379 if ((TSFlags & SIInstrFlags::DS) == 0)
5380 return true;
5381 if (TSFlags & SIInstrFlags::GWS)
5382 return validateGWS(Inst, Operands);
5383 // Only validate GDS for non-GWS instructions.
5384 if (hasGDS())
5385 return true;
5386 int GDSIdx =
5387 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5388 if (GDSIdx < 0)
5389 return true;
5390 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5391 if (GDS) {
5392 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5393 Error(S, "gds modifier is not supported on this GPU");
5394 return false;
5395 }
5396 return true;
5397}
5398
5399// gfx90a has an undocumented limitation:
5400// DS_GWS opcodes must use even aligned registers.
5401bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5402 const OperandVector &Operands) {
5403 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5404 return true;
5405
5406 int Opc = Inst.getOpcode();
5407 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5408 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5409 return true;
5410
5411 const MCRegisterInfo *MRI = getMRI();
5412 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5413 int Data0Pos =
5414 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5415 assert(Data0Pos != -1);
5416 auto Reg = Inst.getOperand(Data0Pos).getReg();
5417 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5418 if (RegIdx & 1) {
5419 Error(getOperandLoc(Operands, Data0Pos), "vgpr must be even aligned");
5420 return false;
5421 }
5422
5423 return true;
5424}
5425
5426bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5427 const OperandVector &Operands,
5428 SMLoc IDLoc) {
5429 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5430 AMDGPU::OpName::cpol);
5431 if (CPolPos == -1)
5432 return true;
5433
5434 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5435
5436 if (!isGFX1250Plus()) {
5437 if (CPol & CPol::SCAL) {
5438 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5439 StringRef CStr(S.getPointer());
5440 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5441 Error(S, "scale_offset is not supported on this GPU");
5442 }
5443 if (CPol & CPol::NV) {
5444 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5445 StringRef CStr(S.getPointer());
5446 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5447 Error(S, "nv is not supported on this GPU");
5448 }
5449 }
5450
5451 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
5452 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5453 StringRef CStr(S.getPointer());
5454 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5455 Error(S, "scale_offset is not supported for this instruction");
5456 }
5457
5458 if (isGFX12Plus())
5459 return validateTHAndScopeBits(Inst, Operands, CPol);
5460
5461 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5462 if (TSFlags & SIInstrFlags::SMRD) {
5463 if (CPol && (isSI() || isCI())) {
5464 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5465 Error(S, "cache policy is not supported for SMRD instructions");
5466 return false;
5467 }
5468 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5469 Error(IDLoc, "invalid cache policy for SMEM instruction");
5470 return false;
5471 }
5472 }
5473
5474 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5475 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5478 if (!(TSFlags & AllowSCCModifier)) {
5479 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5480 StringRef CStr(S.getPointer());
5481 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5482 Error(S,
5483 "scc modifier is not supported for this instruction on this GPU");
5484 return false;
5485 }
5486 }
5487
5489 return true;
5490
5491 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5492 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5493 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5494 : "instruction must use glc");
5495 return false;
5496 }
5497 } else {
5498 if (CPol & CPol::GLC) {
5499 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5500 StringRef CStr(S.getPointer());
5502 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5503 Error(S, isGFX940() ? "instruction must not use sc0"
5504 : "instruction must not use glc");
5505 return false;
5506 }
5507 }
5508
5509 return true;
5510}
5511
5512bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5513 const OperandVector &Operands,
5514 const unsigned CPol) {
5515 const unsigned TH = CPol & AMDGPU::CPol::TH;
5516 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5517
5518 const unsigned Opcode = Inst.getOpcode();
5519 const MCInstrDesc &TID = MII.get(Opcode);
5520
5521 auto PrintError = [&](StringRef Msg) {
5522 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5523 Error(S, Msg);
5524 return false;
5525 };
5526
5527 if ((TH & AMDGPU::CPol::TH_ATOMIC_RETURN) &&
5529 return PrintError("th:TH_ATOMIC_RETURN requires a destination operand");
5530
5531 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5534 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5535
5536 if (TH == 0)
5537 return true;
5538
5539 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5540 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5541 (TH == AMDGPU::CPol::TH_NT_HT)))
5542 return PrintError("invalid th value for SMEM instruction");
5543
5544 if (TH == AMDGPU::CPol::TH_BYPASS) {
5545 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5547 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5549 return PrintError("scope and th combination is not valid");
5550 }
5551
5552 unsigned THType = AMDGPU::getTemporalHintType(TID);
5553 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5554 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5555 return PrintError("invalid th value for atomic instructions");
5556 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5557 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5558 return PrintError("invalid th value for store instructions");
5559 } else {
5560 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5561 return PrintError("invalid th value for load instructions");
5562 }
5563
5564 return true;
5565}
5566
5567bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5568 const OperandVector &Operands) {
5569 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5570 if (Desc.mayStore() &&
5572 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5573 if (Loc != getInstLoc(Operands)) {
5574 Error(Loc, "TFE modifier has no meaning for store instructions");
5575 return false;
5576 }
5577 }
5578
5579 return true;
5580}
5581
5582bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5583 const OperandVector &Operands) {
5584 unsigned Opc = Inst.getOpcode();
5585 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5586 const MCInstrDesc &Desc = MII.get(Opc);
5587
5588 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) -> bool {
5589 int FmtIdx = AMDGPU::getNamedOperandIdx(Opc, FmtOp);
5590 if (FmtIdx == -1)
5591 return true;
5592 unsigned Fmt = Inst.getOperand(FmtIdx).getImm();
5593 int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
5594 unsigned RegSize =
5595 TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[SrcIdx], HwMode))
5596 .getSizeInBits();
5597
5599 return true;
5600
5601 Error(getOperandLoc(Operands, SrcIdx),
5602 "wrong register tuple size for " +
5603 Twine(WMMAMods::ModMatrixFmt[Fmt]));
5604 return false;
5605 };
5606
5607 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5608 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5609}
5610
5611bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc,
5612 const OperandVector &Operands) {
5613 if (!validateLdsDirect(Inst, Operands))
5614 return false;
5615 if (!validateTrue16OpSel(Inst)) {
5616 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5617 "op_sel operand conflicts with 16-bit operand suffix");
5618 return false;
5619 }
5620 if (!validateSOPLiteral(Inst, Operands))
5621 return false;
5622 if (!validateVOPLiteral(Inst, Operands)) {
5623 return false;
5624 }
5625 if (!validateConstantBusLimitations(Inst, Operands)) {
5626 return false;
5627 }
5628 if (!validateVOPD(Inst, Operands)) {
5629 return false;
5630 }
5631 if (!validateIntClampSupported(Inst)) {
5632 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5633 "integer clamping is not supported on this GPU");
5634 return false;
5635 }
5636 if (!validateOpSel(Inst)) {
5637 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5638 "invalid op_sel operand");
5639 return false;
5640 }
5641 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5642 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5643 "invalid neg_lo operand");
5644 return false;
5645 }
5646 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5647 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5648 "invalid neg_hi operand");
5649 return false;
5650 }
5651 if (!validateDPP(Inst, Operands)) {
5652 return false;
5653 }
5654 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5655 if (!validateMIMGD16(Inst)) {
5656 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5657 "d16 modifier is not supported on this GPU");
5658 return false;
5659 }
5660 if (!validateMIMGDim(Inst, Operands)) {
5661 Error(IDLoc, "missing dim operand");
5662 return false;
5663 }
5664 if (!validateTensorR128(Inst)) {
5665 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5666 "instruction must set modifier r128=0");
5667 return false;
5668 }
5669 if (!validateMIMGMSAA(Inst)) {
5670 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5671 "invalid dim; must be MSAA type");
5672 return false;
5673 }
5674 if (!validateMIMGDataSize(Inst, IDLoc)) {
5675 return false;
5676 }
5677 if (!validateMIMGAddrSize(Inst, IDLoc))
5678 return false;
5679 if (!validateMIMGAtomicDMask(Inst)) {
5680 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5681 "invalid atomic image dmask");
5682 return false;
5683 }
5684 if (!validateMIMGGatherDMask(Inst)) {
5685 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5686 "invalid image_gather dmask: only one bit must be set");
5687 return false;
5688 }
5689 if (!validateMovrels(Inst, Operands)) {
5690 return false;
5691 }
5692 if (!validateOffset(Inst, Operands)) {
5693 return false;
5694 }
5695 if (!validateMAIAccWrite(Inst, Operands)) {
5696 return false;
5697 }
5698 if (!validateMAISrc2(Inst, Operands)) {
5699 return false;
5700 }
5701 if (!validateMFMA(Inst, Operands)) {
5702 return false;
5703 }
5704 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5705 return false;
5706 }
5707
5708 if (!validateAGPRLdSt(Inst)) {
5709 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5710 ? "invalid register class: data and dst should be all VGPR or AGPR"
5711 : "invalid register class: agpr loads and stores not supported on this GPU"
5712 );
5713 return false;
5714 }
5715 if (!validateVGPRAlign(Inst)) {
5716 Error(IDLoc,
5717 "invalid register class: vgpr tuples must be 64 bit aligned");
5718 return false;
5719 }
5720 if (!validateDS(Inst, Operands)) {
5721 return false;
5722 }
5723
5724 if (!validateBLGP(Inst, Operands)) {
5725 return false;
5726 }
5727
5728 if (!validateDivScale(Inst)) {
5729 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5730 return false;
5731 }
5732 if (!validateWaitCnt(Inst, Operands)) {
5733 return false;
5734 }
5735 if (!validateTFE(Inst, Operands)) {
5736 return false;
5737 }
5738 if (!validateWMMA(Inst, Operands)) {
5739 return false;
5740 }
5741
5742 return true;
5743}
5744
5746 const FeatureBitset &FBS,
5747 unsigned VariantID = 0);
5748
5749static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5750 const FeatureBitset &AvailableFeatures,
5751 unsigned VariantID);
5752
5753bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5754 const FeatureBitset &FBS) {
5755 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5756}
5757
5758bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5759 const FeatureBitset &FBS,
5760 ArrayRef<unsigned> Variants) {
5761 for (auto Variant : Variants) {
5762 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5763 return true;
5764 }
5765
5766 return false;
5767}
5768
5769bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5770 SMLoc IDLoc) {
5771 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5772
5773 // Check if requested instruction variant is supported.
5774 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5775 return false;
5776
5777 // This instruction is not supported.
5778 // Clear any other pending errors because they are no longer relevant.
5779 getParser().clearPendingErrors();
5780
5781 // Requested instruction variant is not supported.
5782 // Check if any other variants are supported.
5783 StringRef VariantName = getMatchedVariantName();
5784 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5785 return Error(IDLoc,
5786 Twine(VariantName,
5787 " variant of this instruction is not supported"));
5788 }
5789
5790 // Check if this instruction may be used with a different wavesize.
5791 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5792 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5793 // FIXME: Use getAvailableFeatures, and do not manually recompute
5794 FeatureBitset FeaturesWS32 = getFeatureBits();
5795 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5796 .flip(AMDGPU::FeatureWavefrontSize32);
5797 FeatureBitset AvailableFeaturesWS32 =
5798 ComputeAvailableFeatures(FeaturesWS32);
5799
5800 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5801 return Error(IDLoc, "instruction requires wavesize=32");
5802 }
5803
5804 // Finally check if this instruction is supported on any other GPU.
5805 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5806 return Error(IDLoc, "instruction not supported on this GPU (" +
5807 getSTI().getCPU() + ")" + ": " + Mnemo);
5808 }
5809
5810 // Instruction not supported on any GPU. Probably a typo.
5811 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5812 return Error(IDLoc, "invalid instruction" + Suggestion);
5813}
5814
5815static bool isInvalidVOPDY(const OperandVector &Operands,
5816 uint64_t InvalidOprIdx) {
5817 assert(InvalidOprIdx < Operands.size());
5818 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5819 if (Op.isToken() && InvalidOprIdx > 1) {
5820 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5821 return PrevOp.isToken() && PrevOp.getToken() == "::";
5822 }
5823 return false;
5824}
5825
5826bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5827 OperandVector &Operands,
5828 MCStreamer &Out,
5829 uint64_t &ErrorInfo,
5830 bool MatchingInlineAsm) {
5831 MCInst Inst;
5832 Inst.setLoc(IDLoc);
5833 unsigned Result = Match_Success;
5834 for (auto Variant : getMatchedVariants()) {
5835 uint64_t EI;
5836 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5837 Variant);
5838 // We order match statuses from least to most specific. We use most specific
5839 // status as resulting
5840 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5841 if (R == Match_Success || R == Match_MissingFeature ||
5842 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5843 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5844 Result != Match_MissingFeature)) {
5845 Result = R;
5846 ErrorInfo = EI;
5847 }
5848 if (R == Match_Success)
5849 break;
5850 }
5851
5852 if (Result == Match_Success) {
5853 if (!validateInstruction(Inst, IDLoc, Operands)) {
5854 return true;
5855 }
5856 Out.emitInstruction(Inst, getSTI());
5857 return false;
5858 }
5859
5860 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5861 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5862 return true;
5863 }
5864
5865 switch (Result) {
5866 default: break;
5867 case Match_MissingFeature:
5868 // It has been verified that the specified instruction
5869 // mnemonic is valid. A match was found but it requires
5870 // features which are not supported on this GPU.
5871 return Error(IDLoc, "operands are not valid for this GPU or mode");
5872
5873 case Match_InvalidOperand: {
5874 SMLoc ErrorLoc = IDLoc;
5875 if (ErrorInfo != ~0ULL) {
5876 if (ErrorInfo >= Operands.size()) {
5877 return Error(IDLoc, "too few operands for instruction");
5878 }
5879 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5880 if (ErrorLoc == SMLoc())
5881 ErrorLoc = IDLoc;
5882
5883 if (isInvalidVOPDY(Operands, ErrorInfo))
5884 return Error(ErrorLoc, "invalid VOPDY instruction");
5885 }
5886 return Error(ErrorLoc, "invalid operand for instruction");
5887 }
5888
5889 case Match_MnemonicFail:
5890 llvm_unreachable("Invalid instructions should have been handled already");
5891 }
5892 llvm_unreachable("Implement any new match types added!");
5893}
5894
5895bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5896 int64_t Tmp = -1;
5897 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5898 return true;
5899 }
5900 if (getParser().parseAbsoluteExpression(Tmp)) {
5901 return true;
5902 }
5903 Ret = static_cast<uint32_t>(Tmp);
5904 return false;
5905}
5906
5907bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5908 if (!getSTI().getTargetTriple().isAMDGCN())
5909 return TokError("directive only supported for amdgcn architecture");
5910
5911 std::string TargetIDDirective;
5912 SMLoc TargetStart = getTok().getLoc();
5913 if (getParser().parseEscapedString(TargetIDDirective))
5914 return true;
5915
5916 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5917 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5918 return getParser().Error(TargetRange.Start,
5919 (Twine(".amdgcn_target directive's target id ") +
5920 Twine(TargetIDDirective) +
5921 Twine(" does not match the specified target id ") +
5922 Twine(getTargetStreamer().getTargetID()->toString())).str());
5923
5924 return false;
5925}
5926
5927bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5928 return Error(Range.Start, "value out of range", Range);
5929}
5930
5931bool AMDGPUAsmParser::calculateGPRBlocks(
5932 const FeatureBitset &Features, const MCExpr *VCCUsed,
5933 const MCExpr *FlatScrUsed, bool XNACKUsed,
5934 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5935 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5936 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5937 // TODO(scott.linder): These calculations are duplicated from
5938 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5939 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5940 MCContext &Ctx = getContext();
5941
5942 const MCExpr *NumSGPRs = NextFreeSGPR;
5943 int64_t EvaluatedSGPRs;
5944
5945 if (Version.Major >= 10)
5947 else {
5948 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(getSTI());
5949
5950 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5951 !Features.test(FeatureSGPRInitBug) &&
5952 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5953 return OutOfRangeError(SGPRRange);
5954
5955 const MCExpr *ExtraSGPRs =
5956 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5957 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5958
5959 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5960 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5961 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5962 return OutOfRangeError(SGPRRange);
5963
5964 if (Features.test(FeatureSGPRInitBug))
5965 NumSGPRs =
5967 }
5968
5969 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5970 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5971 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5972 unsigned Granule) -> const MCExpr * {
5973 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5974 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5975 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5976 const MCExpr *AlignToGPR =
5977 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5978 const MCExpr *DivGPR =
5979 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5980 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5981 return SubGPR;
5982 };
5983
5984 VGPRBlocks = GetNumGPRBlocks(
5985 NextFreeVGPR,
5986 IsaInfo::getVGPREncodingGranule(getSTI(), EnableWavefrontSize32));
5987 SGPRBlocks =
5988 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(getSTI()));
5989
5990 return false;
5991}
5992
5993bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5994 if (!getSTI().getTargetTriple().isAMDGCN())
5995 return TokError("directive only supported for amdgcn architecture");
5996
5997 if (!isHsaAbi(getSTI()))
5998 return TokError("directive only supported for amdhsa OS");
5999
6000 StringRef KernelName;
6001 if (getParser().parseIdentifier(KernelName))
6002 return true;
6003
6004 AMDGPU::MCKernelDescriptor KD =
6006 &getSTI(), getContext());
6007
6008 StringSet<> Seen;
6009
6010 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
6011
6012 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
6013 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
6014
6015 SMRange VGPRRange;
6016 const MCExpr *NextFreeVGPR = ZeroExpr;
6017 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
6018 const MCExpr *NamedBarCnt = ZeroExpr;
6019 uint64_t SharedVGPRCount = 0;
6020 uint64_t PreloadLength = 0;
6021 uint64_t PreloadOffset = 0;
6022 SMRange SGPRRange;
6023 const MCExpr *NextFreeSGPR = ZeroExpr;
6024
6025 // Count the number of user SGPRs implied from the enabled feature bits.
6026 unsigned ImpliedUserSGPRCount = 0;
6027
6028 // Track if the asm explicitly contains the directive for the user SGPR
6029 // count.
6030 std::optional<unsigned> ExplicitUserSGPRCount;
6031 const MCExpr *ReserveVCC = OneExpr;
6032 const MCExpr *ReserveFlatScr = OneExpr;
6033 std::optional<bool> EnableWavefrontSize32;
6034
6035 while (true) {
6036 while (trySkipToken(AsmToken::EndOfStatement));
6037
6038 StringRef ID;
6039 SMRange IDRange = getTok().getLocRange();
6040 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
6041 return true;
6042
6043 if (ID == ".end_amdhsa_kernel")
6044 break;
6045
6046 if (!Seen.insert(ID).second)
6047 return TokError(".amdhsa_ directives cannot be repeated");
6048
6049 SMLoc ValStart = getLoc();
6050 const MCExpr *ExprVal;
6051 if (getParser().parseExpression(ExprVal))
6052 return true;
6053 SMLoc ValEnd = getLoc();
6054 SMRange ValRange = SMRange(ValStart, ValEnd);
6055
6056 int64_t IVal = 0;
6057 uint64_t Val = IVal;
6058 bool EvaluatableExpr;
6059 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6060 if (IVal < 0)
6061 return OutOfRangeError(ValRange);
6062 Val = IVal;
6063 }
6064
6065#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6066 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6067 return OutOfRangeError(RANGE); \
6068 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6069 getContext());
6070
6071// Some fields use the parsed value immediately which requires the expression to
6072// be solvable.
6073#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6074 if (!(RESOLVED)) \
6075 return Error(IDRange.Start, "directive should have resolvable expression", \
6076 IDRange);
6077
6078 if (ID == ".amdhsa_group_segment_fixed_size") {
6080 CHAR_BIT>(Val))
6081 return OutOfRangeError(ValRange);
6082 KD.group_segment_fixed_size = ExprVal;
6083 } else if (ID == ".amdhsa_private_segment_fixed_size") {
6085 CHAR_BIT>(Val))
6086 return OutOfRangeError(ValRange);
6087 KD.private_segment_fixed_size = ExprVal;
6088 } else if (ID == ".amdhsa_kernarg_size") {
6089 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
6090 return OutOfRangeError(ValRange);
6091 KD.kernarg_size = ExprVal;
6092 } else if (ID == ".amdhsa_user_sgpr_count") {
6093 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6094 ExplicitUserSGPRCount = Val;
6095 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6096 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6098 return Error(IDRange.Start,
6099 "directive is not supported with architected flat scratch",
6100 IDRange);
6102 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6103 ExprVal, ValRange);
6104 if (Val)
6105 ImpliedUserSGPRCount += 4;
6106 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6107 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6108 if (!hasKernargPreload())
6109 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6110
6111 if (Val > getMaxNumUserSGPRs())
6112 return OutOfRangeError(ValRange);
6113 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6114 ValRange);
6115 if (Val) {
6116 ImpliedUserSGPRCount += Val;
6117 PreloadLength = Val;
6118 }
6119 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6120 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6121 if (!hasKernargPreload())
6122 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6123
6124 if (Val >= 1024)
6125 return OutOfRangeError(ValRange);
6126 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6127 ValRange);
6128 if (Val)
6129 PreloadOffset = Val;
6130 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6131 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6133 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6134 ValRange);
6135 if (Val)
6136 ImpliedUserSGPRCount += 2;
6137 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6138 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6140 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6141 ValRange);
6142 if (Val)
6143 ImpliedUserSGPRCount += 2;
6144 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6145 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6147 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6148 ExprVal, ValRange);
6149 if (Val)
6150 ImpliedUserSGPRCount += 2;
6151 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6152 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6154 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6155 ValRange);
6156 if (Val)
6157 ImpliedUserSGPRCount += 2;
6158 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6160 return Error(IDRange.Start,
6161 "directive is not supported with architected flat scratch",
6162 IDRange);
6163 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6165 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6166 ExprVal, ValRange);
6167 if (Val)
6168 ImpliedUserSGPRCount += 2;
6169 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6170 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6172 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6173 ExprVal, ValRange);
6174 if (Val)
6175 ImpliedUserSGPRCount += 1;
6176 } else if (ID == ".amdhsa_wavefront_size32") {
6177 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6178 if (IVersion.Major < 10)
6179 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6180 EnableWavefrontSize32 = Val;
6182 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6183 ValRange);
6184 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6186 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6187 ValRange);
6188 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6190 return Error(IDRange.Start,
6191 "directive is not supported with architected flat scratch",
6192 IDRange);
6194 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6195 ValRange);
6196 } else if (ID == ".amdhsa_enable_private_segment") {
6198 return Error(
6199 IDRange.Start,
6200 "directive is not supported without architected flat scratch",
6201 IDRange);
6203 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6204 ValRange);
6205 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6207 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6208 ValRange);
6209 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6211 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6212 ValRange);
6213 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6215 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6216 ValRange);
6217 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6219 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6220 ValRange);
6221 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6223 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6224 ValRange);
6225 } else if (ID == ".amdhsa_next_free_vgpr") {
6226 VGPRRange = ValRange;
6227 NextFreeVGPR = ExprVal;
6228 } else if (ID == ".amdhsa_next_free_sgpr") {
6229 SGPRRange = ValRange;
6230 NextFreeSGPR = ExprVal;
6231 } else if (ID == ".amdhsa_accum_offset") {
6232 if (!isGFX90A())
6233 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6234 AccumOffset = ExprVal;
6235 } else if (ID == ".amdhsa_named_barrier_count") {
6236 if (!isGFX1250Plus())
6237 return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
6238 NamedBarCnt = ExprVal;
6239 } else if (ID == ".amdhsa_reserve_vcc") {
6240 if (EvaluatableExpr && !isUInt<1>(Val))
6241 return OutOfRangeError(ValRange);
6242 ReserveVCC = ExprVal;
6243 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6244 if (IVersion.Major < 7)
6245 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6247 return Error(IDRange.Start,
6248 "directive is not supported with architected flat scratch",
6249 IDRange);
6250 if (EvaluatableExpr && !isUInt<1>(Val))
6251 return OutOfRangeError(ValRange);
6252 ReserveFlatScr = ExprVal;
6253 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6254 if (IVersion.Major < 8)
6255 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6256 if (!isUInt<1>(Val))
6257 return OutOfRangeError(ValRange);
6258 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6259 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6260 IDRange);
6261 } else if (ID == ".amdhsa_float_round_mode_32") {
6263 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6264 ValRange);
6265 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6267 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6268 ValRange);
6269 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6271 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6272 ValRange);
6273 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6275 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6276 ValRange);
6277 } else if (ID == ".amdhsa_dx10_clamp") {
6278 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6279 return Error(IDRange.Start, "directive unsupported on gfx1170+",
6280 IDRange);
6282 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6283 ValRange);
6284 } else if (ID == ".amdhsa_ieee_mode") {
6285 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6286 return Error(IDRange.Start, "directive unsupported on gfx1170+",
6287 IDRange);
6289 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6290 ValRange);
6291 } else if (ID == ".amdhsa_fp16_overflow") {
6292 if (IVersion.Major < 9)
6293 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6295 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6296 ValRange);
6297 } else if (ID == ".amdhsa_tg_split") {
6298 if (!isGFX90A())
6299 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6300 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6301 ExprVal, ValRange);
6302 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6303 if (!supportsWGP(getSTI()))
6304 return Error(IDRange.Start,
6305 "directive unsupported on " + getSTI().getCPU(), IDRange);
6307 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6308 ValRange);
6309 } else if (ID == ".amdhsa_memory_ordered") {
6310 if (IVersion.Major < 10)
6311 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6313 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6314 ValRange);
6315 } else if (ID == ".amdhsa_forward_progress") {
6316 if (IVersion.Major < 10)
6317 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6319 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6320 ValRange);
6321 } else if (ID == ".amdhsa_shared_vgpr_count") {
6322 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6323 if (IVersion.Major < 10 || IVersion.Major >= 12)
6324 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6325 IDRange);
6326 SharedVGPRCount = Val;
6328 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6329 ValRange);
6330 } else if (ID == ".amdhsa_inst_pref_size") {
6331 if (IVersion.Major < 11)
6332 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6333 if (IVersion.Major == 11) {
6335 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6336 ValRange);
6337 } else {
6339 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6340 ValRange);
6341 }
6342 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6345 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6346 ExprVal, ValRange);
6347 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6349 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6350 ExprVal, ValRange);
6351 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6354 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6355 ExprVal, ValRange);
6356 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6358 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6359 ExprVal, ValRange);
6360 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6362 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6363 ExprVal, ValRange);
6364 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6366 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6367 ExprVal, ValRange);
6368 } else if (ID == ".amdhsa_exception_int_div_zero") {
6370 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6371 ExprVal, ValRange);
6372 } else if (ID == ".amdhsa_round_robin_scheduling") {
6373 if (IVersion.Major < 12)
6374 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6376 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6377 ValRange);
6378 } else {
6379 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6380 }
6381
6382#undef PARSE_BITS_ENTRY
6383 }
6384
6385 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6386 return TokError(".amdhsa_next_free_vgpr directive is required");
6387
6388 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6389 return TokError(".amdhsa_next_free_sgpr directive is required");
6390
6391 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6392 if (UserSGPRCount > getMaxNumUserSGPRs())
6393 return TokError("too many user SGPRs enabled, found " +
6394 Twine(UserSGPRCount) + ", but only " +
6395 Twine(getMaxNumUserSGPRs()) + " are supported.");
6396
6397 // Consider the case where the total number of UserSGPRs with trailing
6398 // allocated preload SGPRs, is greater than the number of explicitly
6399 // referenced SGPRs.
6400 if (PreloadLength) {
6401 MCContext &Ctx = getContext();
6402 NextFreeSGPR = AMDGPUMCExpr::createMax(
6403 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6404 }
6405
6406 const MCExpr *VGPRBlocks;
6407 const MCExpr *SGPRBlocks;
6408 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6409 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6410 EnableWavefrontSize32, NextFreeVGPR,
6411 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6412 SGPRBlocks))
6413 return true;
6414
6415 int64_t EvaluatedVGPRBlocks;
6416 bool VGPRBlocksEvaluatable =
6417 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6418 if (VGPRBlocksEvaluatable &&
6420 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6421 return OutOfRangeError(VGPRRange);
6422 }
6424 KD.compute_pgm_rsrc1, VGPRBlocks,
6425 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6426 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6427
6428 int64_t EvaluatedSGPRBlocks;
6429 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6431 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6432 return OutOfRangeError(SGPRRange);
6434 KD.compute_pgm_rsrc1, SGPRBlocks,
6435 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6436 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6437
6438 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6439 return TokError("amdgpu_user_sgpr_count smaller than implied by "
6440 "enabled user SGPRs");
6441
6442 if (isGFX1250Plus()) {
6445 MCConstantExpr::create(UserSGPRCount, getContext()),
6446 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6447 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6448 } else {
6451 MCConstantExpr::create(UserSGPRCount, getContext()),
6452 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6453 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6454 }
6455
6456 int64_t IVal = 0;
6457 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6458 return TokError("Kernarg size should be resolvable");
6459 uint64_t kernarg_size = IVal;
6460 if (PreloadLength && kernarg_size &&
6461 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6462 return TokError("Kernarg preload length + offset is larger than the "
6463 "kernarg segment size");
6464
6465 if (isGFX90A()) {
6466 if (!Seen.contains(".amdhsa_accum_offset"))
6467 return TokError(".amdhsa_accum_offset directive is required");
6468 int64_t EvaluatedAccum;
6469 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6470 uint64_t UEvaluatedAccum = EvaluatedAccum;
6471 if (AccumEvaluatable &&
6472 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6473 return TokError("accum_offset should be in range [4..256] in "
6474 "increments of 4");
6475
6476 int64_t EvaluatedNumVGPR;
6477 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6478 AccumEvaluatable &&
6479 UEvaluatedAccum >
6480 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6481 return TokError("accum_offset exceeds total VGPR allocation");
6482 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6484 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6487 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6488 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6489 getContext());
6490 }
6491
6492 if (isGFX1250Plus())
6494 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6495 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6496 getContext());
6497
6498 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6499 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6500 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6501 return TokError("shared_vgpr_count directive not valid on "
6502 "wavefront size 32");
6503 }
6504
6505 if (VGPRBlocksEvaluatable &&
6506 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6507 63)) {
6508 return TokError("shared_vgpr_count*2 + "
6509 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6510 "exceed 63\n");
6511 }
6512 }
6513
6514 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6515 NextFreeVGPR, NextFreeSGPR,
6516 ReserveVCC, ReserveFlatScr);
6517 return false;
6518}
6519
6520bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6521 uint32_t Version;
6522 if (ParseAsAbsoluteExpression(Version))
6523 return true;
6524
6525 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6526 return false;
6527}
6528
6529bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6530 AMDGPUMCKernelCodeT &C) {
6531 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6532 // assembly for backwards compatibility.
6533 if (ID == "max_scratch_backing_memory_byte_size") {
6534 Parser.eatToEndOfStatement();
6535 return false;
6536 }
6537
6538 SmallString<40> ErrStr;
6539 raw_svector_ostream Err(ErrStr);
6540 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6541 return TokError(Err.str());
6542 }
6543 Lex();
6544
6545 if (ID == "enable_wavefront_size32") {
6546 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6547 if (!isGFX10Plus())
6548 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6549 if (!isWave32())
6550 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6551 } else {
6552 if (!isWave64())
6553 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6554 }
6555 }
6556
6557 if (ID == "wavefront_size") {
6558 if (C.wavefront_size == 5) {
6559 if (!isGFX10Plus())
6560 return TokError("wavefront_size=5 is only allowed on GFX10+");
6561 if (!isWave32())
6562 return TokError("wavefront_size=5 requires +WavefrontSize32");
6563 } else if (C.wavefront_size == 6) {
6564 if (!isWave64())
6565 return TokError("wavefront_size=6 requires +WavefrontSize64");
6566 }
6567 }
6568
6569 return false;
6570}
6571
6572bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6573 AMDGPUMCKernelCodeT KernelCode;
6574 KernelCode.initDefault(getSTI(), getContext());
6575
6576 while (true) {
6577 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6578 // will set the current token to EndOfStatement.
6579 while(trySkipToken(AsmToken::EndOfStatement));
6580
6581 StringRef ID;
6582 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6583 return true;
6584
6585 if (ID == ".end_amd_kernel_code_t")
6586 break;
6587
6588 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6589 return true;
6590 }
6591
6592 KernelCode.validate(&getSTI(), getContext());
6593 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6594
6595 return false;
6596}
6597
6598bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6599 StringRef KernelName;
6600 if (!parseId(KernelName, "expected symbol name"))
6601 return true;
6602
6603 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6605
6606 KernelScope.initialize(getContext());
6607 return false;
6608}
6609
6610bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6611 if (!getSTI().getTargetTriple().isAMDGCN()) {
6612 return Error(getLoc(),
6613 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6614 "architectures");
6615 }
6616
6617 auto TargetIDDirective = getLexer().getTok().getStringContents();
6618 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6619 return Error(getParser().getTok().getLoc(), "target id must match options");
6620
6621 getTargetStreamer().EmitISAVersion();
6622 Lex();
6623
6624 return false;
6625}
6626
6627bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6628 assert(isHsaAbi(getSTI()));
6629
6630 std::string HSAMetadataString;
6631 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6632 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6633 return true;
6634
6635 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6636 return Error(getLoc(), "invalid HSA metadata");
6637
6638 return false;
6639}
6640
6641/// Common code to parse out a block of text (typically YAML) between start and
6642/// end directives.
6643bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6644 const char *AssemblerDirectiveEnd,
6645 std::string &CollectString) {
6646
6647 raw_string_ostream CollectStream(CollectString);
6648
6649 getLexer().setSkipSpace(false);
6650
6651 bool FoundEnd = false;
6652 while (!isToken(AsmToken::Eof)) {
6653 while (isToken(AsmToken::Space)) {
6654 CollectStream << getTokenStr();
6655 Lex();
6656 }
6657
6658 if (trySkipId(AssemblerDirectiveEnd)) {
6659 FoundEnd = true;
6660 break;
6661 }
6662
6663 CollectStream << Parser.parseStringToEndOfStatement()
6664 << getContext().getAsmInfo().getSeparatorString();
6665
6666 Parser.eatToEndOfStatement();
6667 }
6668
6669 getLexer().setSkipSpace(true);
6670
6671 if (isToken(AsmToken::Eof) && !FoundEnd) {
6672 return TokError(Twine("expected directive ") +
6673 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6674 }
6675
6676 return false;
6677}
6678
6679/// Parse the assembler directive for new MsgPack-format PAL metadata.
6680bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6681 std::string String;
6682 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6684 return true;
6685
6686 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6687 if (!PALMetadata->setFromString(String))
6688 return Error(getLoc(), "invalid PAL metadata");
6689 return false;
6690}
6691
6692/// Parse the assembler directive for old linear-format PAL metadata.
6693bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6694 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6695 return Error(getLoc(),
6696 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6697 "not available on non-amdpal OSes")).str());
6698 }
6699
6700 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6701 PALMetadata->setLegacy();
6702 for (;;) {
6703 uint32_t Key, Value;
6704 if (ParseAsAbsoluteExpression(Key)) {
6705 return TokError(Twine("invalid value in ") +
6707 }
6708 if (!trySkipToken(AsmToken::Comma)) {
6709 return TokError(Twine("expected an even number of values in ") +
6711 }
6712 if (ParseAsAbsoluteExpression(Value)) {
6713 return TokError(Twine("invalid value in ") +
6715 }
6716 PALMetadata->setRegister(Key, Value);
6717 if (!trySkipToken(AsmToken::Comma))
6718 break;
6719 }
6720 return false;
6721}
6722
6723/// ParseDirectiveAMDGPULDS
6724/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6725bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6726 if (getParser().checkForValidSection())
6727 return true;
6728
6729 StringRef Name;
6730 SMLoc NameLoc = getLoc();
6731 if (getParser().parseIdentifier(Name))
6732 return TokError("expected identifier in directive");
6733
6734 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6735 if (getParser().parseComma())
6736 return true;
6737
6738 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(getSTI());
6739
6740 int64_t Size;
6741 SMLoc SizeLoc = getLoc();
6742 if (getParser().parseAbsoluteExpression(Size))
6743 return true;
6744 if (Size < 0)
6745 return Error(SizeLoc, "size must be non-negative");
6746 if (Size > LocalMemorySize)
6747 return Error(SizeLoc, "size is too large");
6748
6749 int64_t Alignment = 4;
6750 if (trySkipToken(AsmToken::Comma)) {
6751 SMLoc AlignLoc = getLoc();
6752 if (getParser().parseAbsoluteExpression(Alignment))
6753 return true;
6754 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6755 return Error(AlignLoc, "alignment must be a power of two");
6756
6757 // Alignment larger than the size of LDS is possible in theory, as long
6758 // as the linker manages to place to symbol at address 0, but we do want
6759 // to make sure the alignment fits nicely into a 32-bit integer.
6760 if (Alignment >= 1u << 31)
6761 return Error(AlignLoc, "alignment is too large");
6762 }
6763
6764 if (parseEOL())
6765 return true;
6766
6767 Symbol->redefineIfPossible();
6768 if (!Symbol->isUndefined())
6769 return Error(NameLoc, "invalid symbol redefinition");
6770
6771 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6772 return false;
6773}
6774
6775bool AMDGPUAsmParser::ParseDirectiveAMDGPUInfo() {
6776 if (getParser().checkForValidSection())
6777 return true;
6778
6779 StringRef FuncName;
6780 if (getParser().parseIdentifier(FuncName))
6781 return TokError("expected symbol name after .amdgpu_info");
6782
6783 MCSymbol *FuncSym = getContext().getOrCreateSymbol(FuncName);
6784 AMDGPU::InfoSectionData ParsedInfoData;
6785 AMDGPU::FuncInfo FI;
6786 FI.Sym = FuncSym;
6787 bool HasScalarAttrs = false;
6788
6789 while (true) {
6790 while (trySkipToken(AsmToken::EndOfStatement))
6791 ;
6792
6793 StringRef ID;
6794 SMLoc IDLoc = getLoc();
6795 if (!parseId(ID, "expected directive or .end_amdgpu_info"))
6796 return true;
6797
6798 if (ID == ".end_amdgpu_info")
6799 break;
6800
6801 // Every per-entry directive shares the `.amdgpu_` namespace prefix; strip
6802 // it once and dispatch on the distinguishing suffix below. The unstripped
6803 // ID is preserved for diagnostics.
6804 StringRef Dir = ID;
6805 if (!Dir.consume_front(".amdgpu_"))
6806 return Error(IDLoc, "unknown .amdgpu_info directive '" + ID + "'");
6807
6808 if (Dir == "flags") {
6809 int64_t Val;
6810 if (getParser().parseAbsoluteExpression(Val))
6811 return true;
6812 auto Flags = static_cast<AMDGPU::FuncInfoFlags>(Val);
6813 FI.UsesVCC = !!(Flags & AMDGPU::FuncInfoFlags::FUNC_USES_VCC);
6814 FI.UsesFlatScratch =
6815 !!(Flags & AMDGPU::FuncInfoFlags::FUNC_USES_FLAT_SCRATCH);
6816 FI.HasDynStack = !!(Flags & AMDGPU::FuncInfoFlags::FUNC_HAS_DYN_STACK);
6817 HasScalarAttrs = true;
6818 } else if (Dir == "num_sgpr") {
6819 int64_t Val;
6820 if (getParser().parseAbsoluteExpression(Val))
6821 return true;
6822 FI.NumSGPR = static_cast<uint32_t>(Val);
6823 HasScalarAttrs = true;
6824 } else if (Dir == "num_vgpr") {
6825 int64_t Val;
6826 if (getParser().parseAbsoluteExpression(Val))
6827 return true;
6828 FI.NumArchVGPR = static_cast<uint32_t>(Val);
6829 HasScalarAttrs = true;
6830 } else if (Dir == "num_agpr") {
6831 int64_t Val;
6832 if (getParser().parseAbsoluteExpression(Val))
6833 return true;
6834 FI.NumAccVGPR = static_cast<uint32_t>(Val);
6835 HasScalarAttrs = true;
6836 } else if (Dir == "private_segment_size") {
6837 int64_t Val;
6838 if (getParser().parseAbsoluteExpression(Val))
6839 return true;
6840 FI.PrivateSegmentSize = static_cast<uint32_t>(Val);
6841 HasScalarAttrs = true;
6842 } else if (Dir == "use") {
6843 StringRef ResName;
6844 if (getParser().parseIdentifier(ResName))
6845 return TokError("expected resource symbol for .amdgpu_use");
6846 ParsedInfoData.Uses.push_back(
6847 {FuncSym, getContext().getOrCreateSymbol(ResName)});
6848 } else if (Dir == "call") {
6849 StringRef DstName;
6850 if (getParser().parseIdentifier(DstName))
6851 return TokError("expected callee symbol for .amdgpu_call");
6852 ParsedInfoData.Calls.push_back(
6853 {FuncSym, getContext().getOrCreateSymbol(DstName)});
6854 } else if (Dir == "indirect_call") {
6855 std::string TypeId;
6856 if (getParser().parseEscapedString(TypeId))
6857 return TokError("expected type ID string for .amdgpu_indirect_call");
6858 ParsedInfoData.IndirectCalls.push_back({FuncSym, std::move(TypeId)});
6859 } else if (Dir == "typeid") {
6860 std::string TypeId;
6861 if (getParser().parseEscapedString(TypeId))
6862 return TokError("expected type ID string for .amdgpu_typeid");
6863 ParsedInfoData.TypeIds.push_back({FuncSym, std::move(TypeId)});
6864 } else {
6865 return Error(IDLoc, "unknown .amdgpu_info directive '" + ID + "'");
6866 }
6867 }
6868
6869 if (HasScalarAttrs)
6870 ParsedInfoData.Funcs.push_back(std::move(FI));
6871
6872 AMDGPU::InfoSectionData &Data = InfoData ? *InfoData : InfoData.emplace();
6873 for (AMDGPU::FuncInfo &Func : ParsedInfoData.Funcs)
6874 Data.Funcs.push_back(std::move(Func));
6875 for (std::pair<MCSymbol *, MCSymbol *> &Use : ParsedInfoData.Uses)
6876 Data.Uses.push_back(Use);
6877 for (std::pair<MCSymbol *, MCSymbol *> &Call : ParsedInfoData.Calls)
6878 Data.Calls.push_back(Call);
6879 for (std::pair<MCSymbol *, std::string> &IndirectCall :
6880 ParsedInfoData.IndirectCalls)
6881 Data.IndirectCalls.push_back(std::move(IndirectCall));
6882 for (std::pair<MCSymbol *, std::string> &TypeId : ParsedInfoData.TypeIds)
6883 Data.TypeIds.push_back(std::move(TypeId));
6884
6885 return false;
6886}
6887
6888void AMDGPUAsmParser::onEndOfFile() {
6889 if (InfoData)
6890 getTargetStreamer().emitAMDGPUInfo(*InfoData);
6891}
6892
6893bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6894 StringRef IDVal = DirectiveID.getString();
6895
6896 if (isHsaAbi(getSTI())) {
6897 if (IDVal == ".amdhsa_kernel")
6898 return ParseDirectiveAMDHSAKernel();
6899
6900 if (IDVal == ".amdhsa_code_object_version")
6901 return ParseDirectiveAMDHSACodeObjectVersion();
6902
6903 // TODO: Restructure/combine with PAL metadata directive.
6905 return ParseDirectiveHSAMetadata();
6906 } else {
6907 if (IDVal == ".amd_kernel_code_t")
6908 return ParseDirectiveAMDKernelCodeT();
6909
6910 if (IDVal == ".amdgpu_hsa_kernel")
6911 return ParseDirectiveAMDGPUHsaKernel();
6912
6913 if (IDVal == ".amd_amdgpu_isa")
6914 return ParseDirectiveISAVersion();
6915
6917 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6918 Twine(" directive is "
6919 "not available on non-amdhsa OSes"))
6920 .str());
6921 }
6922 }
6923
6924 if (IDVal == ".amdgcn_target")
6925 return ParseDirectiveAMDGCNTarget();
6926
6927 if (IDVal == ".amdgpu_lds")
6928 return ParseDirectiveAMDGPULDS();
6929
6930 if (IDVal == ".amdgpu_info")
6931 return ParseDirectiveAMDGPUInfo();
6932
6933 if (IDVal == PALMD::AssemblerDirectiveBegin)
6934 return ParseDirectivePALMetadataBegin();
6935
6936 if (IDVal == PALMD::AssemblerDirective)
6937 return ParseDirectivePALMetadata();
6938
6939 return true;
6940}
6941
6942bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6943 MCRegister Reg) {
6944 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6945 return isGFX9Plus();
6946
6947 // GFX10+ has 2 more SGPRs 104 and 105.
6948 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6949 return hasSGPR104_SGPR105();
6950
6951 switch (Reg.id()) {
6952 case SRC_SHARED_BASE_LO:
6953 case SRC_SHARED_BASE:
6954 case SRC_SHARED_LIMIT_LO:
6955 case SRC_SHARED_LIMIT:
6956 case SRC_PRIVATE_BASE_LO:
6957 case SRC_PRIVATE_BASE:
6958 case SRC_PRIVATE_LIMIT_LO:
6959 case SRC_PRIVATE_LIMIT:
6960 return isGFX9Plus();
6961 case SRC_FLAT_SCRATCH_BASE_LO:
6962 case SRC_FLAT_SCRATCH_BASE_HI:
6963 return hasGloballyAddressableScratch();
6964 case SRC_POPS_EXITING_WAVE_ID:
6965 return isGFX9Plus() && !isGFX11Plus();
6966 case TBA:
6967 case TBA_LO:
6968 case TBA_HI:
6969 case TMA:
6970 case TMA_LO:
6971 case TMA_HI:
6972 return !isGFX9Plus();
6973 case XNACK_MASK:
6974 case XNACK_MASK_LO:
6975 case XNACK_MASK_HI:
6976 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6977 case SGPR_NULL:
6978 return isGFX10Plus();
6979 case SRC_EXECZ:
6980 case SRC_VCCZ:
6981 return !isGFX11Plus();
6982 default:
6983 break;
6984 }
6985
6986 if (isCI())
6987 return true;
6988
6989 if (isSI() || isGFX10Plus()) {
6990 // No flat_scr on SI.
6991 // On GFX10Plus flat scratch is not a valid register operand and can only be
6992 // accessed with s_setreg/s_getreg.
6993 switch (Reg.id()) {
6994 case FLAT_SCR:
6995 case FLAT_SCR_LO:
6996 case FLAT_SCR_HI:
6997 return false;
6998 default:
6999 return true;
7000 }
7001 }
7002
7003 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
7004 // SI/CI have.
7005 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
7006 return hasSGPR102_SGPR103();
7007
7008 return true;
7009}
7010
7011ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
7012 StringRef Mnemonic,
7013 OperandMode Mode) {
7014 ParseStatus Res = parseVOPD(Operands);
7015 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
7016 return Res;
7017
7018 // Try to parse with a custom parser
7019 Res = MatchOperandParserImpl(Operands, Mnemonic);
7020
7021 // If we successfully parsed the operand or if there as an error parsing,
7022 // we are done.
7023 //
7024 // If we are parsing after we reach EndOfStatement then this means we
7025 // are appending default values to the Operands list. This is only done
7026 // by custom parser, so we shouldn't continue on to the generic parsing.
7027 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
7028 return Res;
7029
7030 SMLoc RBraceLoc;
7031 SMLoc LBraceLoc = getLoc();
7032 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
7033 unsigned Prefix = Operands.size();
7034
7035 for (;;) {
7036 auto Loc = getLoc();
7037 Res = parseReg(Operands);
7038 if (Res.isNoMatch())
7039 Error(Loc, "expected a register");
7040 if (!Res.isSuccess())
7041 return ParseStatus::Failure;
7042
7043 RBraceLoc = getLoc();
7044 if (trySkipToken(AsmToken::RBrac))
7045 break;
7046
7047 if (!skipToken(AsmToken::Comma,
7048 "expected a comma or a closing square bracket"))
7049 return ParseStatus::Failure;
7050 }
7051
7052 if (Operands.size() - Prefix > 1) {
7053 Operands.insert(Operands.begin() + Prefix,
7054 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
7055 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
7056 }
7057
7058 return ParseStatus::Success;
7059 }
7060
7061 return parseRegOrImm(Operands);
7062}
7063
7064StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
7065 // Clear any forced encodings from the previous instruction.
7066 setForcedEncodingSize(0);
7067 setForcedDPP(false);
7068 setForcedSDWA(false);
7069
7070 if (Name.consume_back("_e64_dpp")) {
7071 setForcedDPP(true);
7072 setForcedEncodingSize(64);
7073 return Name;
7074 }
7075 if (Name.consume_back("_e64")) {
7076 setForcedEncodingSize(64);
7077 return Name;
7078 }
7079 if (Name.consume_back("_e32")) {
7080 setForcedEncodingSize(32);
7081 return Name;
7082 }
7083 if (Name.consume_back("_dpp")) {
7084 setForcedDPP(true);
7085 return Name;
7086 }
7087 if (Name.consume_back("_sdwa")) {
7088 setForcedSDWA(true);
7089 return Name;
7090 }
7091 return Name;
7092}
7093
7094static void applyMnemonicAliases(StringRef &Mnemonic,
7095 const FeatureBitset &Features,
7096 unsigned VariantID);
7097
7098bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
7099 StringRef Name, SMLoc NameLoc,
7100 OperandVector &Operands) {
7101 // Add the instruction mnemonic
7102 Name = parseMnemonicSuffix(Name);
7103
7104 // If the target architecture uses MnemonicAlias, call it here to parse
7105 // operands correctly.
7106 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
7107
7108 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
7109
7110 bool IsMIMG = Name.starts_with("image_");
7111
7112 while (!trySkipToken(AsmToken::EndOfStatement)) {
7113 OperandMode Mode = OperandMode_Default;
7114 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
7115 Mode = OperandMode_NSA;
7116 ParseStatus Res = parseOperand(Operands, Name, Mode);
7117
7118 if (!Res.isSuccess()) {
7119 checkUnsupportedInstruction(Name, NameLoc);
7120 if (!Parser.hasPendingError()) {
7121 // FIXME: use real operand location rather than the current location.
7122 StringRef Msg = Res.isFailure() ? "failed parsing operand."
7123 : "not a valid operand.";
7124 Error(getLoc(), Msg);
7125 }
7126 while (!trySkipToken(AsmToken::EndOfStatement)) {
7127 lex();
7128 }
7129 return true;
7130 }
7131
7132 // Eat the comma or space if there is one.
7133 trySkipToken(AsmToken::Comma);
7134 }
7135
7136 return false;
7137}
7138
7139//===----------------------------------------------------------------------===//
7140// Utility functions
7141//===----------------------------------------------------------------------===//
7142
7143ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
7144 OperandVector &Operands) {
7145 SMLoc S = getLoc();
7146 if (!trySkipId(Name))
7147 return ParseStatus::NoMatch;
7148
7149 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
7150 return ParseStatus::Success;
7151}
7152
7153ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
7154 int64_t &IntVal) {
7155
7156 if (!trySkipId(Prefix, AsmToken::Colon))
7157 return ParseStatus::NoMatch;
7158
7160}
7161
7162ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
7163 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7164 std::function<bool(int64_t &)> ConvertResult) {
7165 SMLoc S = getLoc();
7166 int64_t Value = 0;
7167
7168 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
7169 if (!Res.isSuccess())
7170 return Res;
7171
7172 if (ConvertResult && !ConvertResult(Value)) {
7173 Error(S, "invalid " + StringRef(Prefix) + " value.");
7174 }
7175
7176 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
7177 return ParseStatus::Success;
7178}
7179
7180ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7181 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7182 bool (*ConvertResult)(int64_t &)) {
7183 SMLoc S = getLoc();
7184 if (!trySkipId(Prefix, AsmToken::Colon))
7185 return ParseStatus::NoMatch;
7186
7187 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
7188 return ParseStatus::Failure;
7189
7190 unsigned Val = 0;
7191 const unsigned MaxSize = 4;
7192
7193 // FIXME: How to verify the number of elements matches the number of src
7194 // operands?
7195 for (int I = 0; ; ++I) {
7196 int64_t Op;
7197 SMLoc Loc = getLoc();
7198 if (!parseExpr(Op))
7199 return ParseStatus::Failure;
7200
7201 if (Op != 0 && Op != 1)
7202 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
7203
7204 Val |= (Op << I);
7205
7206 if (trySkipToken(AsmToken::RBrac))
7207 break;
7208
7209 if (I + 1 == MaxSize)
7210 return Error(getLoc(), "expected a closing square bracket");
7211
7212 if (!skipToken(AsmToken::Comma, "expected a comma"))
7213 return ParseStatus::Failure;
7214 }
7215
7216 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
7217 return ParseStatus::Success;
7218}
7219
7220ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7221 OperandVector &Operands,
7222 AMDGPUOperand::ImmTy ImmTy,
7223 bool IgnoreNegative) {
7224 int64_t Bit;
7225 SMLoc S = getLoc();
7226
7227 if (trySkipId(Name)) {
7228 Bit = 1;
7229 } else if (trySkipId("no", Name)) {
7230 if (IgnoreNegative)
7231 return ParseStatus::Success;
7232 Bit = 0;
7233 } else {
7234 return ParseStatus::NoMatch;
7235 }
7236
7237 if (Name == "r128" && !hasMIMG_R128())
7238 return Error(S, "r128 modifier is not supported on this GPU");
7239 if (Name == "a16" && !hasA16())
7240 return Error(S, "a16 modifier is not supported on this GPU");
7241
7242 if (Bit == 0 && Name == "gds") {
7243 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7244 if (Mnemo.starts_with("ds_gws"))
7245 return Error(S, "nogds is not allowed");
7246 }
7247
7248 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7249 ImmTy = AMDGPUOperand::ImmTyR128A16;
7250
7251 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
7252 return ParseStatus::Success;
7253}
7254
7255unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7256 bool &Disabling) const {
7257 Disabling = Id.consume_front("no");
7258
7259 if (isGFX940() && !Mnemo.starts_with("s_")) {
7260 return StringSwitch<unsigned>(Id)
7261 .Case("nt", AMDGPU::CPol::NT)
7262 .Case("sc0", AMDGPU::CPol::SC0)
7263 .Case("sc1", AMDGPU::CPol::SC1)
7264 .Default(0);
7265 }
7266
7267 return StringSwitch<unsigned>(Id)
7268 .Case("dlc", AMDGPU::CPol::DLC)
7269 .Case("glc", AMDGPU::CPol::GLC)
7270 .Case("scc", AMDGPU::CPol::SCC)
7271 .Case("slc", AMDGPU::CPol::SLC)
7272 .Default(0);
7273}
7274
7275ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7276 if (isGFX12Plus()) {
7277 SMLoc StringLoc = getLoc();
7278
7279 int64_t CPolVal = 0;
7280 ParseStatus ResTH = ParseStatus::NoMatch;
7281 ParseStatus ResScope = ParseStatus::NoMatch;
7282 ParseStatus ResNV = ParseStatus::NoMatch;
7283 ParseStatus ResScal = ParseStatus::NoMatch;
7284
7285 for (;;) {
7286 if (ResTH.isNoMatch()) {
7287 int64_t TH;
7288 ResTH = parseTH(Operands, TH);
7289 if (ResTH.isFailure())
7290 return ResTH;
7291 if (ResTH.isSuccess()) {
7292 CPolVal |= TH;
7293 continue;
7294 }
7295 }
7296
7297 if (ResScope.isNoMatch()) {
7298 int64_t Scope;
7299 ResScope = parseScope(Operands, Scope);
7300 if (ResScope.isFailure())
7301 return ResScope;
7302 if (ResScope.isSuccess()) {
7303 CPolVal |= Scope;
7304 continue;
7305 }
7306 }
7307
7308 // NV bit exists on GFX12+, but does something starting from GFX1250.
7309 // Allow parsing on all GFX12 and fail on validation for better
7310 // diagnostics.
7311 if (ResNV.isNoMatch()) {
7312 if (trySkipId("nv")) {
7313 ResNV = ParseStatus::Success;
7314 CPolVal |= CPol::NV;
7315 continue;
7316 } else if (trySkipId("no", "nv")) {
7317 ResNV = ParseStatus::Success;
7318 continue;
7319 }
7320 }
7321
7322 if (ResScal.isNoMatch()) {
7323 if (trySkipId("scale_offset")) {
7324 ResScal = ParseStatus::Success;
7325 CPolVal |= CPol::SCAL;
7326 continue;
7327 } else if (trySkipId("no", "scale_offset")) {
7328 ResScal = ParseStatus::Success;
7329 continue;
7330 }
7331 }
7332
7333 break;
7334 }
7335
7336 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7337 ResScal.isNoMatch())
7338 return ParseStatus::NoMatch;
7339
7340 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
7341 AMDGPUOperand::ImmTyCPol));
7342 return ParseStatus::Success;
7343 }
7344
7345 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7346 SMLoc OpLoc = getLoc();
7347 unsigned Enabled = 0, Seen = 0;
7348 for (;;) {
7349 SMLoc S = getLoc();
7350 bool Disabling;
7351 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7352 if (!CPol)
7353 break;
7354
7355 lex();
7356
7357 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7358 return Error(S, "dlc modifier is not supported on this GPU");
7359
7360 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7361 return Error(S, "scc modifier is not supported on this GPU");
7362
7363 if (Seen & CPol)
7364 return Error(S, "duplicate cache policy modifier");
7365
7366 if (!Disabling)
7367 Enabled |= CPol;
7368
7369 Seen |= CPol;
7370 }
7371
7372 if (!Seen)
7373 return ParseStatus::NoMatch;
7374
7375 Operands.push_back(
7376 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7377 return ParseStatus::Success;
7378}
7379
7380ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7381 int64_t &Scope) {
7382 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7384
7385 ParseStatus Res = parseStringOrIntWithPrefix(
7386 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7387 Scope);
7388
7389 if (Res.isSuccess())
7390 Scope = Scopes[Scope];
7391
7392 return Res;
7393}
7394
7395ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7396 TH = AMDGPU::CPol::TH_RT; // default
7397
7398 StringRef Value;
7399 SMLoc StringLoc;
7400 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7401 if (!Res.isSuccess())
7402 return Res;
7403
7404 if (Value == "TH_DEFAULT")
7406 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7407 Value == "TH_LOAD_NT_WB") {
7408 return Error(StringLoc, "invalid th value");
7409 } else if (Value.consume_front("TH_ATOMIC_")) {
7411 } else if (Value.consume_front("TH_LOAD_")) {
7413 } else if (Value.consume_front("TH_STORE_")) {
7415 } else {
7416 return Error(StringLoc, "invalid th value");
7417 }
7418
7419 if (Value == "BYPASS")
7421
7422 if (TH != 0) {
7424 TH |= StringSwitch<int64_t>(Value)
7425 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7426 .Case("RT", AMDGPU::CPol::TH_RT)
7427 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7428 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7429 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7431 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7432 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7434 .Default(0xffffffff);
7435 else
7436 TH |= StringSwitch<int64_t>(Value)
7437 .Case("RT", AMDGPU::CPol::TH_RT)
7438 .Case("NT", AMDGPU::CPol::TH_NT)
7439 .Case("HT", AMDGPU::CPol::TH_HT)
7440 .Case("LU", AMDGPU::CPol::TH_LU)
7441 .Case("WB", AMDGPU::CPol::TH_WB)
7442 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7443 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7444 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7445 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7446 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7447 .Default(0xffffffff);
7448 }
7449
7450 if (TH == 0xffffffff)
7451 return Error(StringLoc, "invalid th value");
7452
7453 return ParseStatus::Success;
7454}
7455
7456static void
7458 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7459 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7460 std::optional<unsigned> InsertAt = std::nullopt) {
7461 auto i = OptionalIdx.find(ImmT);
7462 if (i != OptionalIdx.end()) {
7463 unsigned Idx = i->second;
7464 const AMDGPUOperand &Op =
7465 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7466 if (InsertAt)
7467 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7468 else
7469 Op.addImmOperands(Inst, 1);
7470 } else {
7471 if (InsertAt.has_value())
7472 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7473 else
7475 }
7476}
7477
7478ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7479 StringRef &Value,
7480 SMLoc &StringLoc) {
7481 if (!trySkipId(Prefix, AsmToken::Colon))
7482 return ParseStatus::NoMatch;
7483
7484 StringLoc = getLoc();
7485 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7487}
7488
7489ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7490 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7491 int64_t &IntVal) {
7492 if (!trySkipId(Name, AsmToken::Colon))
7493 return ParseStatus::NoMatch;
7494
7495 SMLoc StringLoc = getLoc();
7496
7497 StringRef Value;
7498 if (isToken(AsmToken::Identifier)) {
7499 Value = getTokenStr();
7500 lex();
7501
7502 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7503 if (Value == Ids[IntVal])
7504 break;
7505 } else if (!parseExpr(IntVal))
7506 return ParseStatus::Failure;
7507
7508 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7509 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7510
7511 return ParseStatus::Success;
7512}
7513
7514ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7515 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7516 AMDGPUOperand::ImmTy Type) {
7517 SMLoc S = getLoc();
7518 int64_t IntVal;
7519
7520 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7521 if (Res.isSuccess())
7522 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7523
7524 return Res;
7525}
7526
7527//===----------------------------------------------------------------------===//
7528// MTBUF format
7529//===----------------------------------------------------------------------===//
7530
7531bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7532 int64_t MaxVal,
7533 int64_t &Fmt) {
7534 int64_t Val;
7535 SMLoc Loc = getLoc();
7536
7537 auto Res = parseIntWithPrefix(Pref, Val);
7538 if (Res.isFailure())
7539 return false;
7540 if (Res.isNoMatch())
7541 return true;
7542
7543 if (Val < 0 || Val > MaxVal) {
7544 Error(Loc, Twine("out of range ", StringRef(Pref)));
7545 return false;
7546 }
7547
7548 Fmt = Val;
7549 return true;
7550}
7551
7552ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7553 AMDGPUOperand::ImmTy ImmTy) {
7554 const char *Pref = "index_key";
7555 int64_t ImmVal = 0;
7556 SMLoc Loc = getLoc();
7557 auto Res = parseIntWithPrefix(Pref, ImmVal);
7558 if (!Res.isSuccess())
7559 return Res;
7560
7561 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7562 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7563 (ImmVal < 0 || ImmVal > 1))
7564 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7565
7566 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7567 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7568
7569 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7570 return ParseStatus::Success;
7571}
7572
7573ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7574 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7575}
7576
7577ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7578 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7579}
7580
7581ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7582 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7583}
7584
7585ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7586 StringRef Name,
7587 AMDGPUOperand::ImmTy Type) {
7588 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixFmt,
7589 Type);
7590}
7591
7592ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7593 return tryParseMatrixFMT(Operands, "matrix_a_fmt",
7594 AMDGPUOperand::ImmTyMatrixAFMT);
7595}
7596
7597ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7598 return tryParseMatrixFMT(Operands, "matrix_b_fmt",
7599 AMDGPUOperand::ImmTyMatrixBFMT);
7600}
7601
7602ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7603 StringRef Name,
7604 AMDGPUOperand::ImmTy Type) {
7605 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixScale,
7606 Type);
7607}
7608
7609ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7610 return tryParseMatrixScale(Operands, "matrix_a_scale",
7611 AMDGPUOperand::ImmTyMatrixAScale);
7612}
7613
7614ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7615 return tryParseMatrixScale(Operands, "matrix_b_scale",
7616 AMDGPUOperand::ImmTyMatrixBScale);
7617}
7618
7619ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7620 StringRef Name,
7621 AMDGPUOperand::ImmTy Type) {
7622 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixScaleFmt,
7623 Type);
7624}
7625
7626ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7627 return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
7628 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7629}
7630
7631ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7632 return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
7633 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7634}
7635
7636// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7637// values to live in a joint format operand in the MCInst encoding.
7638ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7639 using namespace llvm::AMDGPU::MTBUFFormat;
7640
7641 int64_t Dfmt = DFMT_UNDEF;
7642 int64_t Nfmt = NFMT_UNDEF;
7643
7644 // dfmt and nfmt can appear in either order, and each is optional.
7645 for (int I = 0; I < 2; ++I) {
7646 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7647 return ParseStatus::Failure;
7648
7649 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7650 return ParseStatus::Failure;
7651
7652 // Skip optional comma between dfmt/nfmt
7653 // but guard against 2 commas following each other.
7654 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7655 !peekToken().is(AsmToken::Comma)) {
7656 trySkipToken(AsmToken::Comma);
7657 }
7658 }
7659
7660 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7661 return ParseStatus::NoMatch;
7662
7663 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7664 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7665
7666 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7667 return ParseStatus::Success;
7668}
7669
7670ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7671 using namespace llvm::AMDGPU::MTBUFFormat;
7672
7673 int64_t Fmt = UFMT_UNDEF;
7674
7675 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7676 return ParseStatus::Failure;
7677
7678 if (Fmt == UFMT_UNDEF)
7679 return ParseStatus::NoMatch;
7680
7681 Format = Fmt;
7682 return ParseStatus::Success;
7683}
7684
7685bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7686 int64_t &Nfmt,
7687 StringRef FormatStr,
7688 SMLoc Loc) {
7689 using namespace llvm::AMDGPU::MTBUFFormat;
7690 int64_t Format;
7691
7692 Format = getDfmt(FormatStr);
7693 if (Format != DFMT_UNDEF) {
7694 Dfmt = Format;
7695 return true;
7696 }
7697
7698 Format = getNfmt(FormatStr, getSTI());
7699 if (Format != NFMT_UNDEF) {
7700 Nfmt = Format;
7701 return true;
7702 }
7703
7704 Error(Loc, "unsupported format");
7705 return false;
7706}
7707
7708ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7709 SMLoc FormatLoc,
7710 int64_t &Format) {
7711 using namespace llvm::AMDGPU::MTBUFFormat;
7712
7713 int64_t Dfmt = DFMT_UNDEF;
7714 int64_t Nfmt = NFMT_UNDEF;
7715 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7716 return ParseStatus::Failure;
7717
7718 if (trySkipToken(AsmToken::Comma)) {
7719 StringRef Str;
7720 SMLoc Loc = getLoc();
7721 if (!parseId(Str, "expected a format string") ||
7722 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7723 return ParseStatus::Failure;
7724 if (Dfmt == DFMT_UNDEF)
7725 return Error(Loc, "duplicate numeric format");
7726 if (Nfmt == NFMT_UNDEF)
7727 return Error(Loc, "duplicate data format");
7728 }
7729
7730 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7731 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7732
7733 if (isGFX10Plus()) {
7734 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7735 if (Ufmt == UFMT_UNDEF)
7736 return Error(FormatLoc, "unsupported format");
7737 Format = Ufmt;
7738 } else {
7739 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7740 }
7741
7742 return ParseStatus::Success;
7743}
7744
7745ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7746 SMLoc Loc,
7747 int64_t &Format) {
7748 using namespace llvm::AMDGPU::MTBUFFormat;
7749
7750 auto Id = getUnifiedFormat(FormatStr, getSTI());
7751 if (Id == UFMT_UNDEF)
7752 return ParseStatus::NoMatch;
7753
7754 if (!isGFX10Plus())
7755 return Error(Loc, "unified format is not supported on this GPU");
7756
7757 Format = Id;
7758 return ParseStatus::Success;
7759}
7760
7761ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7762 using namespace llvm::AMDGPU::MTBUFFormat;
7763 SMLoc Loc = getLoc();
7764
7765 if (!parseExpr(Format))
7766 return ParseStatus::Failure;
7767 if (!isValidFormatEncoding(Format, getSTI()))
7768 return Error(Loc, "out of range format");
7769
7770 return ParseStatus::Success;
7771}
7772
7773ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7774 using namespace llvm::AMDGPU::MTBUFFormat;
7775
7776 if (!trySkipId("format", AsmToken::Colon))
7777 return ParseStatus::NoMatch;
7778
7779 if (trySkipToken(AsmToken::LBrac)) {
7780 StringRef FormatStr;
7781 SMLoc Loc = getLoc();
7782 if (!parseId(FormatStr, "expected a format string"))
7783 return ParseStatus::Failure;
7784
7785 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7786 if (Res.isNoMatch())
7787 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7788 if (!Res.isSuccess())
7789 return Res;
7790
7791 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7792 return ParseStatus::Failure;
7793
7794 return ParseStatus::Success;
7795 }
7796
7797 return parseNumericFormat(Format);
7798}
7799
7800ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7801 using namespace llvm::AMDGPU::MTBUFFormat;
7802
7803 int64_t Format = getDefaultFormatEncoding(getSTI());
7804 ParseStatus Res;
7805 SMLoc Loc = getLoc();
7806
7807 // Parse legacy format syntax.
7808 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7809 if (Res.isFailure())
7810 return Res;
7811
7812 bool FormatFound = Res.isSuccess();
7813
7814 Operands.push_back(
7815 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7816
7817 if (FormatFound)
7818 trySkipToken(AsmToken::Comma);
7819
7820 if (isToken(AsmToken::EndOfStatement)) {
7821 // We are expecting an soffset operand,
7822 // but let matcher handle the error.
7823 return ParseStatus::Success;
7824 }
7825
7826 // Parse soffset.
7827 Res = parseRegOrImm(Operands);
7828 if (!Res.isSuccess())
7829 return Res;
7830
7831 trySkipToken(AsmToken::Comma);
7832
7833 if (!FormatFound) {
7834 Res = parseSymbolicOrNumericFormat(Format);
7835 if (Res.isFailure())
7836 return Res;
7837 if (Res.isSuccess()) {
7838 auto Size = Operands.size();
7839 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7840 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7841 Op.setImm(Format);
7842 }
7843 return ParseStatus::Success;
7844 }
7845
7846 if (isId("format") && peekToken().is(AsmToken::Colon))
7847 return Error(getLoc(), "duplicate format");
7848 return ParseStatus::Success;
7849}
7850
7851ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7852 ParseStatus Res =
7853 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7854 if (Res.isNoMatch()) {
7855 Res = parseIntWithPrefix("inst_offset", Operands,
7856 AMDGPUOperand::ImmTyInstOffset);
7857 }
7858 return Res;
7859}
7860
7861ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7862 ParseStatus Res =
7863 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7864 if (Res.isNoMatch())
7865 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7866 return Res;
7867}
7868
7869ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7870 ParseStatus Res =
7871 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7872 if (Res.isNoMatch()) {
7873 Res =
7874 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7875 }
7876 return Res;
7877}
7878
7879//===----------------------------------------------------------------------===//
7880// Exp
7881//===----------------------------------------------------------------------===//
7882
7883void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7884 OptionalImmIndexMap OptionalIdx;
7885
7886 unsigned OperandIdx[4];
7887 unsigned EnMask = 0;
7888 int SrcIdx = 0;
7889
7890 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7891 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7892
7893 // Add the register arguments
7894 if (Op.isReg()) {
7895 assert(SrcIdx < 4);
7896 OperandIdx[SrcIdx] = Inst.size();
7897 Op.addRegOperands(Inst, 1);
7898 ++SrcIdx;
7899 continue;
7900 }
7901
7902 if (Op.isOff()) {
7903 assert(SrcIdx < 4);
7904 OperandIdx[SrcIdx] = Inst.size();
7905 Inst.addOperand(MCOperand::createReg(MCRegister()));
7906 ++SrcIdx;
7907 continue;
7908 }
7909
7910 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7911 Op.addImmOperands(Inst, 1);
7912 continue;
7913 }
7914
7915 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7916 continue;
7917
7918 // Handle optional arguments
7919 OptionalIdx[Op.getImmTy()] = i;
7920 }
7921
7922 assert(SrcIdx == 4);
7923
7924 bool Compr = false;
7925 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7926 Compr = true;
7927 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7928 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7929 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7930 }
7931
7932 for (auto i = 0; i < SrcIdx; ++i) {
7933 if (Inst.getOperand(OperandIdx[i]).getReg()) {
7934 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7935 }
7936 }
7937
7938 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7939 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7940
7941 Inst.addOperand(MCOperand::createImm(EnMask));
7942}
7943
7944//===----------------------------------------------------------------------===//
7945// s_waitcnt
7946//===----------------------------------------------------------------------===//
7947
7948static bool
7950 const AMDGPU::IsaVersion ISA,
7951 int64_t &IntVal,
7952 int64_t CntVal,
7953 bool Saturate,
7954 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7955 unsigned (*decode)(const IsaVersion &Version, unsigned))
7956{
7957 bool Failed = false;
7958
7959 IntVal = encode(ISA, IntVal, CntVal);
7960 if (CntVal != decode(ISA, IntVal)) {
7961 if (Saturate) {
7962 IntVal = encode(ISA, IntVal, -1);
7963 } else {
7964 Failed = true;
7965 }
7966 }
7967 return Failed;
7968}
7969
7970bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7971
7972 SMLoc CntLoc = getLoc();
7973 StringRef CntName = getTokenStr();
7974
7975 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7976 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7977 return false;
7978
7979 int64_t CntVal;
7980 SMLoc ValLoc = getLoc();
7981 if (!parseExpr(CntVal))
7982 return false;
7983
7984 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7985
7986 bool Failed = true;
7987 bool Sat = CntName.ends_with("_sat");
7988
7989 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7990 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7991 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7992 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7993 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7994 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7995 } else {
7996 Error(CntLoc, "invalid counter name " + CntName);
7997 return false;
7998 }
7999
8000 if (Failed) {
8001 Error(ValLoc, "too large value for " + CntName);
8002 return false;
8003 }
8004
8005 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8006 return false;
8007
8008 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
8009 if (isToken(AsmToken::EndOfStatement)) {
8010 Error(getLoc(), "expected a counter name");
8011 return false;
8012 }
8013 }
8014
8015 return true;
8016}
8017
8018ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
8019 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
8020 int64_t Waitcnt = getWaitcntBitMask(ISA);
8021 SMLoc S = getLoc();
8022
8023 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8024 while (!isToken(AsmToken::EndOfStatement)) {
8025 if (!parseCnt(Waitcnt))
8026 return ParseStatus::Failure;
8027 }
8028 } else {
8029 if (!parseExpr(Waitcnt))
8030 return ParseStatus::Failure;
8031 }
8032
8033 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
8034 return ParseStatus::Success;
8035}
8036
8037bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
8038 SMLoc FieldLoc = getLoc();
8039 StringRef FieldName = getTokenStr();
8040 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
8041 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
8042 return false;
8043
8044 SMLoc ValueLoc = getLoc();
8045 StringRef ValueName = getTokenStr();
8046 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
8047 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
8048 return false;
8049
8050 unsigned Shift;
8051 if (FieldName == "instid0") {
8052 Shift = 0;
8053 } else if (FieldName == "instskip") {
8054 Shift = 4;
8055 } else if (FieldName == "instid1") {
8056 Shift = 7;
8057 } else {
8058 Error(FieldLoc, "invalid field name " + FieldName);
8059 return false;
8060 }
8061
8062 int Value;
8063 if (Shift == 4) {
8064 // Parse values for instskip.
8065 Value = StringSwitch<int>(ValueName)
8066 .Case("SAME", 0)
8067 .Case("NEXT", 1)
8068 .Case("SKIP_1", 2)
8069 .Case("SKIP_2", 3)
8070 .Case("SKIP_3", 4)
8071 .Case("SKIP_4", 5)
8072 .Default(-1);
8073 } else {
8074 // Parse values for instid0 and instid1.
8075 Value = StringSwitch<int>(ValueName)
8076 .Case("NO_DEP", 0)
8077 .Case("VALU_DEP_1", 1)
8078 .Case("VALU_DEP_2", 2)
8079 .Case("VALU_DEP_3", 3)
8080 .Case("VALU_DEP_4", 4)
8081 .Case("TRANS32_DEP_1", 5)
8082 .Case("TRANS32_DEP_2", 6)
8083 .Case("TRANS32_DEP_3", 7)
8084 .Case("FMA_ACCUM_CYCLE_1", 8)
8085 .Case("SALU_CYCLE_1", 9)
8086 .Case("SALU_CYCLE_2", 10)
8087 .Case("SALU_CYCLE_3", 11)
8088 .Default(-1);
8089 }
8090 if (Value < 0) {
8091 Error(ValueLoc, "invalid value name " + ValueName);
8092 return false;
8093 }
8094
8095 Delay |= Value << Shift;
8096 return true;
8097}
8098
8099ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
8100 int64_t Delay = 0;
8101 SMLoc S = getLoc();
8102
8103 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8104 do {
8105 if (!parseDelay(Delay))
8106 return ParseStatus::Failure;
8107 } while (trySkipToken(AsmToken::Pipe));
8108 } else {
8109 if (!parseExpr(Delay))
8110 return ParseStatus::Failure;
8111 }
8112
8113 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
8114 return ParseStatus::Success;
8115}
8116
8117bool
8118AMDGPUOperand::isSWaitCnt() const {
8119 return isImm();
8120}
8121
8122bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
8123
8124//===----------------------------------------------------------------------===//
8125// DepCtr
8126//===----------------------------------------------------------------------===//
8127
8128void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
8129 StringRef DepCtrName) {
8130 switch (ErrorId) {
8131 case OPR_ID_UNKNOWN:
8132 Error(Loc, Twine("invalid counter name ", DepCtrName));
8133 return;
8134 case OPR_ID_UNSUPPORTED:
8135 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
8136 return;
8137 case OPR_ID_DUPLICATE:
8138 Error(Loc, Twine("duplicate counter name ", DepCtrName));
8139 return;
8140 case OPR_VAL_INVALID:
8141 Error(Loc, Twine("invalid value for ", DepCtrName));
8142 return;
8143 default:
8144 assert(false);
8145 }
8146}
8147
8148bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
8149
8150 using namespace llvm::AMDGPU::DepCtr;
8151
8152 SMLoc DepCtrLoc = getLoc();
8153 StringRef DepCtrName = getTokenStr();
8154
8155 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
8156 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
8157 return false;
8158
8159 int64_t ExprVal;
8160 if (!parseExpr(ExprVal))
8161 return false;
8162
8163 unsigned PrevOprMask = UsedOprMask;
8164 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
8165
8166 if (CntVal < 0) {
8167 depCtrError(DepCtrLoc, CntVal, DepCtrName);
8168 return false;
8169 }
8170
8171 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8172 return false;
8173
8174 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
8175 if (isToken(AsmToken::EndOfStatement)) {
8176 Error(getLoc(), "expected a counter name");
8177 return false;
8178 }
8179 }
8180
8181 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8182 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8183 return true;
8184}
8185
8186ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
8187 using namespace llvm::AMDGPU::DepCtr;
8188
8189 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
8190 SMLoc Loc = getLoc();
8191
8192 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8193 unsigned UsedOprMask = 0;
8194 while (!isToken(AsmToken::EndOfStatement)) {
8195 if (!parseDepCtr(DepCtr, UsedOprMask))
8196 return ParseStatus::Failure;
8197 }
8198 } else {
8199 if (!parseExpr(DepCtr))
8200 return ParseStatus::Failure;
8201 }
8202
8203 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
8204 return ParseStatus::Success;
8205}
8206
8207bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8208
8209//===----------------------------------------------------------------------===//
8210// hwreg
8211//===----------------------------------------------------------------------===//
8212
8213ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8214 OperandInfoTy &Offset,
8215 OperandInfoTy &Width) {
8216 using namespace llvm::AMDGPU::Hwreg;
8217
8218 if (!trySkipId("hwreg", AsmToken::LParen))
8219 return ParseStatus::NoMatch;
8220
8221 // The register may be specified by name or using a numeric code
8222 HwReg.Loc = getLoc();
8223 if (isToken(AsmToken::Identifier) &&
8224 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8225 HwReg.IsSymbolic = true;
8226 lex(); // skip register name
8227 } else if (!parseExpr(HwReg.Val, "a register name")) {
8228 return ParseStatus::Failure;
8229 }
8230
8231 if (trySkipToken(AsmToken::RParen))
8232 return ParseStatus::Success;
8233
8234 // parse optional params
8235 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
8236 return ParseStatus::Failure;
8237
8238 Offset.Loc = getLoc();
8239 if (!parseExpr(Offset.Val))
8240 return ParseStatus::Failure;
8241
8242 if (!skipToken(AsmToken::Comma, "expected a comma"))
8243 return ParseStatus::Failure;
8244
8245 Width.Loc = getLoc();
8246 if (!parseExpr(Width.Val) ||
8247 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8248 return ParseStatus::Failure;
8249
8250 return ParseStatus::Success;
8251}
8252
8253ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8254 using namespace llvm::AMDGPU::Hwreg;
8255
8256 int64_t ImmVal = 0;
8257 SMLoc Loc = getLoc();
8258
8259 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8260 HwregId::Default);
8261 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8262 HwregOffset::Default);
8263 struct : StructuredOpField {
8264 using StructuredOpField::StructuredOpField;
8265 bool validate(AMDGPUAsmParser &Parser) const override {
8266 if (!isUIntN(Width, Val - 1))
8267 return Error(Parser, "only values from 1 to 32 are legal");
8268 return true;
8269 }
8270 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8271 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
8272
8273 if (Res.isNoMatch())
8274 Res = parseHwregFunc(HwReg, Offset, Width);
8275
8276 if (Res.isSuccess()) {
8277 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
8278 return ParseStatus::Failure;
8279 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
8280 }
8281
8282 if (Res.isNoMatch() &&
8283 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
8285
8286 if (!Res.isSuccess())
8287 return ParseStatus::Failure;
8288
8289 if (!isUInt<16>(ImmVal))
8290 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8291 Operands.push_back(
8292 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8293 return ParseStatus::Success;
8294}
8295
8296bool AMDGPUOperand::isHwreg() const {
8297 return isImmTy(ImmTyHwreg);
8298}
8299
8300//===----------------------------------------------------------------------===//
8301// sendmsg
8302//===----------------------------------------------------------------------===//
8303
8304bool
8305AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8306 OperandInfoTy &Op,
8307 OperandInfoTy &Stream) {
8308 using namespace llvm::AMDGPU::SendMsg;
8309
8310 Msg.Loc = getLoc();
8311 if (isToken(AsmToken::Identifier) &&
8312 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8313 Msg.IsSymbolic = true;
8314 lex(); // skip message name
8315 } else if (!parseExpr(Msg.Val, "a message name")) {
8316 return false;
8317 }
8318
8319 if (trySkipToken(AsmToken::Comma)) {
8320 Op.IsDefined = true;
8321 Op.Loc = getLoc();
8322 if (isToken(AsmToken::Identifier) &&
8323 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8325 lex(); // skip operation name
8326 } else if (!parseExpr(Op.Val, "an operation name")) {
8327 return false;
8328 }
8329
8330 if (trySkipToken(AsmToken::Comma)) {
8331 Stream.IsDefined = true;
8332 Stream.Loc = getLoc();
8333 if (!parseExpr(Stream.Val))
8334 return false;
8335 }
8336 }
8337
8338 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
8339}
8340
8341bool
8342AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8343 const OperandInfoTy &Op,
8344 const OperandInfoTy &Stream) {
8345 using namespace llvm::AMDGPU::SendMsg;
8346
8347 // Validation strictness depends on whether message is specified
8348 // in a symbolic or in a numeric form. In the latter case
8349 // only encoding possibility is checked.
8350 bool Strict = Msg.IsSymbolic;
8351
8352 if (Strict) {
8353 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8354 Error(Msg.Loc, "specified message id is not supported on this GPU");
8355 return false;
8356 }
8357 } else {
8358 if (!isValidMsgId(Msg.Val, getSTI())) {
8359 Error(Msg.Loc, "invalid message id");
8360 return false;
8361 }
8362 }
8363 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
8364 if (Op.IsDefined) {
8365 Error(Op.Loc, "message does not support operations");
8366 } else {
8367 Error(Msg.Loc, "missing message operation");
8368 }
8369 return false;
8370 }
8371 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
8372 if (Op.Val == OPR_ID_UNSUPPORTED)
8373 Error(Op.Loc, "specified operation id is not supported on this GPU");
8374 else
8375 Error(Op.Loc, "invalid operation id");
8376 return false;
8377 }
8378 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
8379 Stream.IsDefined) {
8380 Error(Stream.Loc, "message operation does not support streams");
8381 return false;
8382 }
8383 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
8384 Error(Stream.Loc, "invalid message stream id");
8385 return false;
8386 }
8387 return true;
8388}
8389
8390ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8391 using namespace llvm::AMDGPU::SendMsg;
8392
8393 int64_t ImmVal = 0;
8394 SMLoc Loc = getLoc();
8395
8396 if (trySkipId("sendmsg", AsmToken::LParen)) {
8397 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8398 OperandInfoTy Op(OP_NONE_);
8399 OperandInfoTy Stream(STREAM_ID_NONE_);
8400 if (parseSendMsgBody(Msg, Op, Stream) &&
8401 validateSendMsg(Msg, Op, Stream)) {
8402 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
8403 } else {
8404 return ParseStatus::Failure;
8405 }
8406 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
8407 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8408 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8409 } else {
8410 return ParseStatus::Failure;
8411 }
8412
8413 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8414 return ParseStatus::Success;
8415}
8416
8417bool AMDGPUOperand::isSendMsg() const {
8418 return isImmTy(ImmTySendMsg);
8419}
8420
8421ParseStatus AMDGPUAsmParser::parseWaitEvent(OperandVector &Operands) {
8422 using namespace llvm::AMDGPU::WaitEvent;
8423
8424 SMLoc Loc = getLoc();
8425 int64_t ImmVal = 0;
8426
8427 StructuredOpField DontWaitExportReady("dont_wait_export_ready", "bit value",
8428 1, 0);
8429 StructuredOpField ExportReady("export_ready", "bit value", 1, 0);
8430
8431 StructuredOpField *TargetBitfield =
8432 isGFX11() ? &DontWaitExportReady : &ExportReady;
8433
8434 ParseStatus Res = parseStructuredOpFields({TargetBitfield});
8435 if (Res.isNoMatch() && parseExpr(ImmVal, "structured immediate"))
8437 else if (Res.isSuccess()) {
8438 if (!validateStructuredOpFields({TargetBitfield}))
8439 return ParseStatus::Failure;
8440 ImmVal = TargetBitfield->Val;
8441 }
8442
8443 if (!Res.isSuccess())
8444 return ParseStatus::Failure;
8445
8446 if (!isUInt<16>(ImmVal))
8447 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8448
8449 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc,
8450 AMDGPUOperand::ImmTyWaitEvent));
8451 return ParseStatus::Success;
8452}
8453
8454bool AMDGPUOperand::isWaitEvent() const { return isImmTy(ImmTyWaitEvent); }
8455
8456//===----------------------------------------------------------------------===//
8457// v_interp
8458//===----------------------------------------------------------------------===//
8459
8460ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8461 StringRef Str;
8462 SMLoc S = getLoc();
8463
8464 if (!parseId(Str))
8465 return ParseStatus::NoMatch;
8466
8467 int Slot = StringSwitch<int>(Str)
8468 .Case("p10", 0)
8469 .Case("p20", 1)
8470 .Case("p0", 2)
8471 .Default(-1);
8472
8473 if (Slot == -1)
8474 return Error(S, "invalid interpolation slot");
8475
8476 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8477 AMDGPUOperand::ImmTyInterpSlot));
8478 return ParseStatus::Success;
8479}
8480
8481ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8482 StringRef Str;
8483 SMLoc S = getLoc();
8484
8485 if (!parseId(Str))
8486 return ParseStatus::NoMatch;
8487
8488 if (!Str.starts_with("attr"))
8489 return Error(S, "invalid interpolation attribute");
8490
8491 StringRef Chan = Str.take_back(2);
8492 int AttrChan = StringSwitch<int>(Chan)
8493 .Case(".x", 0)
8494 .Case(".y", 1)
8495 .Case(".z", 2)
8496 .Case(".w", 3)
8497 .Default(-1);
8498 if (AttrChan == -1)
8499 return Error(S, "invalid or missing interpolation attribute channel");
8500
8501 Str = Str.drop_back(2).drop_front(4);
8502
8503 uint8_t Attr;
8504 if (Str.getAsInteger(10, Attr))
8505 return Error(S, "invalid or missing interpolation attribute number");
8506
8507 if (Attr > 32)
8508 return Error(S, "out of bounds interpolation attribute number");
8509
8510 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8511
8512 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8513 AMDGPUOperand::ImmTyInterpAttr));
8514 Operands.push_back(AMDGPUOperand::CreateImm(
8515 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8516 return ParseStatus::Success;
8517}
8518
8519//===----------------------------------------------------------------------===//
8520// exp
8521//===----------------------------------------------------------------------===//
8522
8523ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8524 using namespace llvm::AMDGPU::Exp;
8525
8526 StringRef Str;
8527 SMLoc S = getLoc();
8528
8529 if (!parseId(Str))
8530 return ParseStatus::NoMatch;
8531
8532 unsigned Id = getTgtId(Str);
8533 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8534 return Error(S, (Id == ET_INVALID)
8535 ? "invalid exp target"
8536 : "exp target is not supported on this GPU");
8537
8538 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8539 AMDGPUOperand::ImmTyExpTgt));
8540 return ParseStatus::Success;
8541}
8542
8543//===----------------------------------------------------------------------===//
8544// parser helpers
8545//===----------------------------------------------------------------------===//
8546
8547bool
8548AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8549 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8550}
8551
8552bool
8553AMDGPUAsmParser::isId(const StringRef Id) const {
8554 return isId(getToken(), Id);
8555}
8556
8557bool
8558AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8559 return getTokenKind() == Kind;
8560}
8561
8562StringRef AMDGPUAsmParser::getId() const {
8563 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8564}
8565
8566bool
8567AMDGPUAsmParser::trySkipId(const StringRef Id) {
8568 if (isId(Id)) {
8569 lex();
8570 return true;
8571 }
8572 return false;
8573}
8574
8575bool
8576AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8577 if (isToken(AsmToken::Identifier)) {
8578 StringRef Tok = getTokenStr();
8579 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8580 lex();
8581 return true;
8582 }
8583 }
8584 return false;
8585}
8586
8587bool
8588AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8589 if (isId(Id) && peekToken().is(Kind)) {
8590 lex();
8591 lex();
8592 return true;
8593 }
8594 return false;
8595}
8596
8597bool
8598AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8599 if (isToken(Kind)) {
8600 lex();
8601 return true;
8602 }
8603 return false;
8604}
8605
8606bool
8607AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8608 const StringRef ErrMsg) {
8609 if (!trySkipToken(Kind)) {
8610 Error(getLoc(), ErrMsg);
8611 return false;
8612 }
8613 return true;
8614}
8615
8616bool
8617AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8618 SMLoc S = getLoc();
8619
8620 const MCExpr *Expr;
8621 if (Parser.parseExpression(Expr))
8622 return false;
8623
8624 if (Expr->evaluateAsAbsolute(Imm))
8625 return true;
8626
8627 if (Expected.empty()) {
8628 Error(S, "expected absolute expression");
8629 } else {
8630 Error(S, Twine("expected ", Expected) +
8631 Twine(" or an absolute expression"));
8632 }
8633 return false;
8634}
8635
8636bool
8637AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8638 SMLoc S = getLoc();
8639
8640 const MCExpr *Expr;
8641 if (Parser.parseExpression(Expr))
8642 return false;
8643
8644 int64_t IntVal;
8645 if (Expr->evaluateAsAbsolute(IntVal)) {
8646 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8647 } else {
8648 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8649 }
8650 return true;
8651}
8652
8653bool
8654AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8655 if (isToken(AsmToken::String)) {
8656 Val = getToken().getStringContents();
8657 lex();
8658 return true;
8659 }
8660 Error(getLoc(), ErrMsg);
8661 return false;
8662}
8663
8664bool
8665AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8666 if (isToken(AsmToken::Identifier)) {
8667 Val = getTokenStr();
8668 lex();
8669 return true;
8670 }
8671 if (!ErrMsg.empty())
8672 Error(getLoc(), ErrMsg);
8673 return false;
8674}
8675
8676AsmToken
8677AMDGPUAsmParser::getToken() const {
8678 return Parser.getTok();
8679}
8680
8681AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8682 return isToken(AsmToken::EndOfStatement)
8683 ? getToken()
8684 : getLexer().peekTok(ShouldSkipSpace);
8685}
8686
8687void
8688AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8689 auto TokCount = getLexer().peekTokens(Tokens);
8690
8691 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8692 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8693}
8694
8696AMDGPUAsmParser::getTokenKind() const {
8697 return getLexer().getKind();
8698}
8699
8700SMLoc
8701AMDGPUAsmParser::getLoc() const {
8702 return getToken().getLoc();
8703}
8704
8705StringRef
8706AMDGPUAsmParser::getTokenStr() const {
8707 return getToken().getString();
8708}
8709
8710void
8711AMDGPUAsmParser::lex() {
8712 Parser.Lex();
8713}
8714
8715const AMDGPUOperand &
8716AMDGPUAsmParser::findMCOperand(const OperandVector &Operands,
8717 int MCOpIdx) const {
8718 for (const auto &Op : Operands) {
8719 const AMDGPUOperand &TargetOp = static_cast<AMDGPUOperand &>(*Op);
8720 if (TargetOp.getMCOpIdx() == MCOpIdx)
8721 return TargetOp;
8722 }
8723 llvm_unreachable("no such MC operand!");
8724}
8725
8726SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8727 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8728}
8729
8730// Returns one of the given locations that comes later in the source.
8731SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8732 return a.getPointer() < b.getPointer() ? b : a;
8733}
8734
8735SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8736 int MCOpIdx) const {
8737 return findMCOperand(Operands, MCOpIdx).getStartLoc();
8738}
8739
8740SMLoc
8741AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8742 const OperandVector &Operands) const {
8743 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8744 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8745 if (Test(Op))
8746 return Op.getStartLoc();
8747 }
8748 return getInstLoc(Operands);
8749}
8750
8751SMLoc
8752AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8753 const OperandVector &Operands) const {
8754 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8755 return getOperandLoc(Test, Operands);
8756}
8757
8758ParseStatus
8759AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8760 if (!trySkipToken(AsmToken::LCurly))
8761 return ParseStatus::NoMatch;
8762
8763 bool First = true;
8764 while (!trySkipToken(AsmToken::RCurly)) {
8765 if (!First &&
8766 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8767 return ParseStatus::Failure;
8768
8769 StringRef Id = getTokenStr();
8770 SMLoc IdLoc = getLoc();
8771 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8772 !skipToken(AsmToken::Colon, "colon expected"))
8773 return ParseStatus::Failure;
8774
8775 const auto *I =
8776 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8777 if (I == Fields.end())
8778 return Error(IdLoc, "unknown field");
8779 if ((*I)->IsDefined)
8780 return Error(IdLoc, "duplicate field");
8781
8782 // TODO: Support symbolic values.
8783 (*I)->Loc = getLoc();
8784 if (!parseExpr((*I)->Val))
8785 return ParseStatus::Failure;
8786 (*I)->IsDefined = true;
8787
8788 First = false;
8789 }
8790 return ParseStatus::Success;
8791}
8792
8793bool AMDGPUAsmParser::validateStructuredOpFields(
8795 return all_of(Fields, [this](const StructuredOpField *F) {
8796 return F->validate(*this);
8797 });
8798}
8799
8800//===----------------------------------------------------------------------===//
8801// swizzle
8802//===----------------------------------------------------------------------===//
8803
8805static unsigned
8806encodeBitmaskPerm(const unsigned AndMask,
8807 const unsigned OrMask,
8808 const unsigned XorMask) {
8809 using namespace llvm::AMDGPU::Swizzle;
8810
8811 return BITMASK_PERM_ENC |
8812 (AndMask << BITMASK_AND_SHIFT) |
8813 (OrMask << BITMASK_OR_SHIFT) |
8814 (XorMask << BITMASK_XOR_SHIFT);
8815}
8816
8817bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8818 const unsigned MaxVal,
8819 const Twine &ErrMsg, SMLoc &Loc) {
8820 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8821 return false;
8822 }
8823 Loc = getLoc();
8824 if (!parseExpr(Op)) {
8825 return false;
8826 }
8827 if (Op < MinVal || Op > MaxVal) {
8828 Error(Loc, ErrMsg);
8829 return false;
8830 }
8831
8832 return true;
8833}
8834
8835bool
8836AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8837 const unsigned MinVal,
8838 const unsigned MaxVal,
8839 const StringRef ErrMsg) {
8840 SMLoc Loc;
8841 for (unsigned i = 0; i < OpNum; ++i) {
8842 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8843 return false;
8844 }
8845
8846 return true;
8847}
8848
8849bool
8850AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8851 using namespace llvm::AMDGPU::Swizzle;
8852
8853 int64_t Lane[LANE_NUM];
8854 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8855 "expected a 2-bit lane id")) {
8857 for (unsigned I = 0; I < LANE_NUM; ++I) {
8858 Imm |= Lane[I] << (LANE_SHIFT * I);
8859 }
8860 return true;
8861 }
8862 return false;
8863}
8864
8865bool
8866AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8867 using namespace llvm::AMDGPU::Swizzle;
8868
8869 SMLoc Loc;
8870 int64_t GroupSize;
8871 int64_t LaneIdx;
8872
8873 if (!parseSwizzleOperand(GroupSize,
8874 2, 32,
8875 "group size must be in the interval [2,32]",
8876 Loc)) {
8877 return false;
8878 }
8879 if (!isPowerOf2_64(GroupSize)) {
8880 Error(Loc, "group size must be a power of two");
8881 return false;
8882 }
8883 if (parseSwizzleOperand(LaneIdx,
8884 0, GroupSize - 1,
8885 "lane id must be in the interval [0,group size - 1]",
8886 Loc)) {
8887 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8888 return true;
8889 }
8890 return false;
8891}
8892
8893bool
8894AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8895 using namespace llvm::AMDGPU::Swizzle;
8896
8897 SMLoc Loc;
8898 int64_t GroupSize;
8899
8900 if (!parseSwizzleOperand(GroupSize,
8901 2, 32,
8902 "group size must be in the interval [2,32]",
8903 Loc)) {
8904 return false;
8905 }
8906 if (!isPowerOf2_64(GroupSize)) {
8907 Error(Loc, "group size must be a power of two");
8908 return false;
8909 }
8910
8911 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8912 return true;
8913}
8914
8915bool
8916AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8917 using namespace llvm::AMDGPU::Swizzle;
8918
8919 SMLoc Loc;
8920 int64_t GroupSize;
8921
8922 if (!parseSwizzleOperand(GroupSize,
8923 1, 16,
8924 "group size must be in the interval [1,16]",
8925 Loc)) {
8926 return false;
8927 }
8928 if (!isPowerOf2_64(GroupSize)) {
8929 Error(Loc, "group size must be a power of two");
8930 return false;
8931 }
8932
8933 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8934 return true;
8935}
8936
8937bool
8938AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8939 using namespace llvm::AMDGPU::Swizzle;
8940
8941 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8942 return false;
8943 }
8944
8945 StringRef Ctl;
8946 SMLoc StrLoc = getLoc();
8947 if (!parseString(Ctl)) {
8948 return false;
8949 }
8950 if (Ctl.size() != BITMASK_WIDTH) {
8951 Error(StrLoc, "expected a 5-character mask");
8952 return false;
8953 }
8954
8955 unsigned AndMask = 0;
8956 unsigned OrMask = 0;
8957 unsigned XorMask = 0;
8958
8959 for (size_t i = 0; i < Ctl.size(); ++i) {
8960 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8961 switch(Ctl[i]) {
8962 default:
8963 Error(StrLoc, "invalid mask");
8964 return false;
8965 case '0':
8966 break;
8967 case '1':
8968 OrMask |= Mask;
8969 break;
8970 case 'p':
8971 AndMask |= Mask;
8972 break;
8973 case 'i':
8974 AndMask |= Mask;
8975 XorMask |= Mask;
8976 break;
8977 }
8978 }
8979
8980 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8981 return true;
8982}
8983
8984bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8985 using namespace llvm::AMDGPU::Swizzle;
8986
8987 if (!AMDGPU::isGFX9Plus(getSTI())) {
8988 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
8989 return false;
8990 }
8991
8992 int64_t Swizzle;
8993 SMLoc Loc;
8994 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8995 "FFT swizzle must be in the interval [0," +
8996 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8997 Loc))
8998 return false;
8999
9000 Imm = FFT_MODE_ENC | Swizzle;
9001 return true;
9002}
9003
9004bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
9005 using namespace llvm::AMDGPU::Swizzle;
9006
9007 if (!AMDGPU::isGFX9Plus(getSTI())) {
9008 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
9009 return false;
9010 }
9011
9012 SMLoc Loc;
9013 int64_t Direction;
9014
9015 if (!parseSwizzleOperand(Direction, 0, 1,
9016 "direction must be 0 (left) or 1 (right)", Loc))
9017 return false;
9018
9019 int64_t RotateSize;
9020 if (!parseSwizzleOperand(
9021 RotateSize, 0, ROTATE_MAX_SIZE,
9022 "number of threads to rotate must be in the interval [0," +
9023 Twine(ROTATE_MAX_SIZE) + Twine(']'),
9024 Loc))
9025 return false;
9026
9028 (RotateSize << ROTATE_SIZE_SHIFT);
9029 return true;
9030}
9031
9032bool
9033AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
9034
9035 SMLoc OffsetLoc = getLoc();
9036
9037 if (!parseExpr(Imm, "a swizzle macro")) {
9038 return false;
9039 }
9040 if (!isUInt<16>(Imm)) {
9041 Error(OffsetLoc, "expected a 16-bit offset");
9042 return false;
9043 }
9044 return true;
9045}
9046
9047bool
9048AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
9049 using namespace llvm::AMDGPU::Swizzle;
9050
9051 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
9052
9053 SMLoc ModeLoc = getLoc();
9054 bool Ok = false;
9055
9056 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
9057 Ok = parseSwizzleQuadPerm(Imm);
9058 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
9059 Ok = parseSwizzleBitmaskPerm(Imm);
9060 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
9061 Ok = parseSwizzleBroadcast(Imm);
9062 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
9063 Ok = parseSwizzleSwap(Imm);
9064 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
9065 Ok = parseSwizzleReverse(Imm);
9066 } else if (trySkipId(IdSymbolic[ID_FFT])) {
9067 Ok = parseSwizzleFFT(Imm);
9068 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
9069 Ok = parseSwizzleRotate(Imm);
9070 } else {
9071 Error(ModeLoc, "expected a swizzle mode");
9072 }
9073
9074 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
9075 }
9076
9077 return false;
9078}
9079
9080ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
9081 SMLoc S = getLoc();
9082 int64_t Imm = 0;
9083
9084 if (trySkipId("offset")) {
9085
9086 bool Ok = false;
9087 if (skipToken(AsmToken::Colon, "expected a colon")) {
9088 if (trySkipId("swizzle")) {
9089 Ok = parseSwizzleMacro(Imm);
9090 } else {
9091 Ok = parseSwizzleOffset(Imm);
9092 }
9093 }
9094
9095 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
9096
9098 }
9099 return ParseStatus::NoMatch;
9100}
9101
9102bool
9103AMDGPUOperand::isSwizzle() const {
9104 return isImmTy(ImmTySwizzle);
9105}
9106
9107//===----------------------------------------------------------------------===//
9108// VGPR Index Mode
9109//===----------------------------------------------------------------------===//
9110
9111int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
9112
9113 using namespace llvm::AMDGPU::VGPRIndexMode;
9114
9115 if (trySkipToken(AsmToken::RParen)) {
9116 return OFF;
9117 }
9118
9119 int64_t Imm = 0;
9120
9121 while (true) {
9122 unsigned Mode = 0;
9123 SMLoc S = getLoc();
9124
9125 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
9126 if (trySkipId(IdSymbolic[ModeId])) {
9127 Mode = 1 << ModeId;
9128 break;
9129 }
9130 }
9131
9132 if (Mode == 0) {
9133 Error(S, (Imm == 0)?
9134 "expected a VGPR index mode or a closing parenthesis" :
9135 "expected a VGPR index mode");
9136 return UNDEF;
9137 }
9138
9139 if (Imm & Mode) {
9140 Error(S, "duplicate VGPR index mode");
9141 return UNDEF;
9142 }
9143 Imm |= Mode;
9144
9145 if (trySkipToken(AsmToken::RParen))
9146 break;
9147 if (!skipToken(AsmToken::Comma,
9148 "expected a comma or a closing parenthesis"))
9149 return UNDEF;
9150 }
9151
9152 return Imm;
9153}
9154
9155ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
9156
9157 using namespace llvm::AMDGPU::VGPRIndexMode;
9158
9159 int64_t Imm = 0;
9160 SMLoc S = getLoc();
9161
9162 if (trySkipId("gpr_idx", AsmToken::LParen)) {
9163 Imm = parseGPRIdxMacro();
9164 if (Imm == UNDEF)
9165 return ParseStatus::Failure;
9166 } else {
9167 if (getParser().parseAbsoluteExpression(Imm))
9168 return ParseStatus::Failure;
9169 if (Imm < 0 || !isUInt<4>(Imm))
9170 return Error(S, "invalid immediate: only 4-bit values are legal");
9171 }
9172
9173 Operands.push_back(
9174 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
9175 return ParseStatus::Success;
9176}
9177
9178bool AMDGPUOperand::isGPRIdxMode() const {
9179 return isImmTy(ImmTyGprIdxMode);
9180}
9181
9182//===----------------------------------------------------------------------===//
9183// sopp branch targets
9184//===----------------------------------------------------------------------===//
9185
9186ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
9187
9188 // Make sure we are not parsing something
9189 // that looks like a label or an expression but is not.
9190 // This will improve error messages.
9191 if (isRegister() || isModifier())
9192 return ParseStatus::NoMatch;
9193
9194 if (!parseExpr(Operands))
9195 return ParseStatus::Failure;
9196
9197 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
9198 assert(Opr.isImm() || Opr.isExpr());
9199 SMLoc Loc = Opr.getStartLoc();
9200
9201 // Currently we do not support arbitrary expressions as branch targets.
9202 // Only labels and absolute expressions are accepted.
9203 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9204 Error(Loc, "expected an absolute expression or a label");
9205 } else if (Opr.isImm() && !Opr.isS16Imm()) {
9206 Error(Loc, "expected a 16-bit signed jump offset");
9207 }
9208
9209 return ParseStatus::Success;
9210}
9211
9212//===----------------------------------------------------------------------===//
9213// Boolean holding registers
9214//===----------------------------------------------------------------------===//
9215
9216ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
9217 return parseReg(Operands);
9218}
9219
9220//===----------------------------------------------------------------------===//
9221// mubuf
9222//===----------------------------------------------------------------------===//
9223
9224void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9225 const OperandVector &Operands,
9226 bool IsAtomic) {
9227 OptionalImmIndexMap OptionalIdx;
9228 unsigned FirstOperandIdx = 1;
9229 bool IsAtomicReturn = false;
9230
9231 if (IsAtomic) {
9232 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
9234 }
9235
9236 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
9237 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9238
9239 // Add the register arguments
9240 if (Op.isReg()) {
9241 Op.addRegOperands(Inst, 1);
9242 // Insert a tied src for atomic return dst.
9243 // This cannot be postponed as subsequent calls to
9244 // addImmOperands rely on correct number of MC operands.
9245 if (IsAtomicReturn && i == FirstOperandIdx)
9246 Op.addRegOperands(Inst, 1);
9247 continue;
9248 }
9249
9250 // Handle the case where soffset is an immediate
9251 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9252 Op.addImmOperands(Inst, 1);
9253 continue;
9254 }
9255
9256 // Handle tokens like 'offen' which are sometimes hard-coded into the
9257 // asm string. There are no MCInst operands for these.
9258 if (Op.isToken()) {
9259 continue;
9260 }
9261 assert(Op.isImm());
9262
9263 // Handle optional arguments
9264 OptionalIdx[Op.getImmTy()] = i;
9265 }
9266
9267 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
9268 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
9269 // Parse a dummy operand as a placeholder for the SWZ operand. This enforces
9270 // agreement between MCInstrDesc.getNumOperands and MCInst.getNumOperands.
9272}
9273
9274//===----------------------------------------------------------------------===//
9275// smrd
9276//===----------------------------------------------------------------------===//
9277
9278bool AMDGPUOperand::isSMRDOffset8() const {
9279 return isImmLiteral() && isUInt<8>(getImm());
9280}
9281
9282bool AMDGPUOperand::isSMEMOffset() const {
9283 // Offset range is checked later by validator.
9284 return isImmLiteral();
9285}
9286
9287bool AMDGPUOperand::isSMRDLiteralOffset() const {
9288 // 32-bit literals are only supported on CI and we only want to use them
9289 // when the offset is > 8-bits.
9290 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
9291}
9292
9293//===----------------------------------------------------------------------===//
9294// vop3
9295//===----------------------------------------------------------------------===//
9296
9297static bool ConvertOmodMul(int64_t &Mul) {
9298 if (Mul != 1 && Mul != 2 && Mul != 4)
9299 return false;
9300
9301 Mul >>= 1;
9302 return true;
9303}
9304
9305static bool ConvertOmodDiv(int64_t &Div) {
9306 if (Div == 1) {
9307 Div = 0;
9308 return true;
9309 }
9310
9311 if (Div == 2) {
9312 Div = 3;
9313 return true;
9314 }
9315
9316 return false;
9317}
9318
9319// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9320// This is intentional and ensures compatibility with sp3.
9321// See bug 35397 for details.
9322bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9323 if (BoundCtrl == 0 || BoundCtrl == 1) {
9324 if (!isGFX11Plus())
9325 BoundCtrl = 1;
9326 return true;
9327 }
9328 return false;
9329}
9330
9331void AMDGPUAsmParser::onBeginOfFile() {
9332 if (!getParser().getStreamer().getTargetStreamer() ||
9333 getSTI().getTargetTriple().getArch() == Triple::r600)
9334 return;
9335
9336 if (!getTargetStreamer().getTargetID())
9337 getTargetStreamer().initializeTargetID(getSTI(),
9338 getSTI().getFeatureString());
9339
9340 if (isHsaAbi(getSTI()))
9341 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9342}
9343
9344/// Parse AMDGPU specific expressions.
9345///
9346/// expr ::= or(expr, ...) |
9347/// max(expr, ...)
9348///
9349bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9350 using AGVK = AMDGPUMCExpr::VariantKind;
9351
9352 if (isToken(AsmToken::Identifier)) {
9353 StringRef TokenId = getTokenStr();
9354 AGVK VK = StringSwitch<AGVK>(TokenId)
9355 .Case("max", AGVK::AGVK_Max)
9356 .Case("or", AGVK::AGVK_Or)
9357 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
9358 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9359 .Case("alignto", AGVK::AGVK_AlignTo)
9360 .Case("occupancy", AGVK::AGVK_Occupancy)
9361 .Case("instprefsize", AGVK::AGVK_InstPrefSize)
9362 .Default(AGVK::AGVK_None);
9363
9364 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
9366 uint64_t CommaCount = 0;
9367 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9368 lex(); // Eat '('
9369 while (true) {
9370 if (trySkipToken(AsmToken::RParen)) {
9371 if (Exprs.empty()) {
9372 Error(getToken().getLoc(),
9373 "empty " + Twine(TokenId) + " expression");
9374 return true;
9375 }
9376 if (CommaCount + 1 != Exprs.size()) {
9377 Error(getToken().getLoc(),
9378 "mismatch of commas in " + Twine(TokenId) + " expression");
9379 return true;
9380 }
9381 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
9382 return false;
9383 }
9384 const MCExpr *Expr;
9385 if (getParser().parseExpression(Expr, EndLoc))
9386 return true;
9387 Exprs.push_back(Expr);
9388 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
9389 if (LastTokenWasComma)
9390 CommaCount++;
9391 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
9392 Error(getToken().getLoc(),
9393 "unexpected token in " + Twine(TokenId) + " expression");
9394 return true;
9395 }
9396 }
9397 }
9398 }
9399 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
9400}
9401
9402ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9403 StringRef Name = getTokenStr();
9404 if (Name == "mul") {
9405 return parseIntWithPrefix("mul", Operands,
9406 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
9407 }
9408
9409 if (Name == "div") {
9410 return parseIntWithPrefix("div", Operands,
9411 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
9412 }
9413
9414 return ParseStatus::NoMatch;
9415}
9416
9417// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9418// the number of src operands present, then copies that bit into src0_modifiers.
9419static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9420 int Opc = Inst.getOpcode();
9421 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9422 if (OpSelIdx == -1)
9423 return;
9424
9425 int SrcNum;
9426 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9427 AMDGPU::OpName::src2};
9428 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
9429 ++SrcNum)
9430 ;
9431 assert(SrcNum > 0);
9432
9433 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9434
9435 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
9436 if (DstIdx == -1)
9437 return;
9438
9439 const MCOperand &DstOp = Inst.getOperand(DstIdx);
9440 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
9441 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9442 if (DstOp.isReg() &&
9443 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
9444 if (AMDGPU::isHi16Reg(DstOp.getReg(), MRI))
9445 ModVal |= SISrcMods::DST_OP_SEL;
9446 } else {
9447 if ((OpSel & (1 << SrcNum)) != 0)
9448 ModVal |= SISrcMods::DST_OP_SEL;
9449 }
9450 Inst.getOperand(ModIdx).setImm(ModVal);
9451}
9452
9453void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9454 const OperandVector &Operands) {
9455 cvtVOP3P(Inst, Operands);
9456 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9457}
9458
9459void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9460 OptionalImmIndexMap &OptionalIdx) {
9461 cvtVOP3P(Inst, Operands, OptionalIdx);
9462 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9463}
9464
9465static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9466 return
9467 // 1. This operand is input modifiers
9468 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9469 // 2. This is not last operand
9470 && Desc.NumOperands > (OpNum + 1)
9471 // 3. Next operand is register class
9472 && Desc.operands()[OpNum + 1].RegClass != -1
9473 // 4. Next register is not tied to any other operand
9474 && Desc.getOperandConstraint(OpNum + 1,
9476}
9477
9478void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) {
9479 unsigned Opc = Inst.getOpcode();
9480 constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9481 AMDGPU::OpName::src2};
9482 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9483 AMDGPU::OpName::src1_modifiers,
9484 AMDGPU::OpName::src2_modifiers};
9485 for (int J = 0; J < 3; ++J) {
9486 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9487 if (OpIdx == -1)
9488 // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but
9489 // no src1. So continue instead of break.
9490 continue;
9491
9492 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9493 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9494
9495 if ((OpSel & (1 << J)) != 0)
9496 ModVal |= SISrcMods::OP_SEL_0;
9497 // op_sel[3] is encoded in src0_modifiers.
9498 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9499 ModVal |= SISrcMods::DST_OP_SEL;
9500
9501 Inst.getOperand(ModIdx).setImm(ModVal);
9502 }
9503}
9504
9505void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9506{
9507 OptionalImmIndexMap OptionalIdx;
9508 unsigned Opc = Inst.getOpcode();
9509
9510 unsigned I = 1;
9511 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9512 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9513 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9514 }
9515
9516 for (unsigned E = Operands.size(); I != E; ++I) {
9517 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9519 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9520 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9521 Op.isInterpAttrChan()) {
9522 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9523 } else if (Op.isImmModifier()) {
9524 OptionalIdx[Op.getImmTy()] = I;
9525 } else {
9526 llvm_unreachable("unhandled operand type");
9527 }
9528 }
9529
9530 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9531 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9532 AMDGPUOperand::ImmTyHigh);
9533
9534 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9535 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9536 AMDGPUOperand::ImmTyClamp);
9537
9538 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9539 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9540 AMDGPUOperand::ImmTyOModSI);
9541
9542 // Some v_interp instructions use op_sel[3] for dst.
9543 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9544 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9545 AMDGPUOperand::ImmTyOpSel);
9546 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9547 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9548
9549 cvtOpSelHelper(Inst, OpSel);
9550 }
9551}
9552
9553void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9554{
9555 OptionalImmIndexMap OptionalIdx;
9556 unsigned Opc = Inst.getOpcode();
9557
9558 unsigned I = 1;
9559 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9560 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9561 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9562 }
9563
9564 for (unsigned E = Operands.size(); I != E; ++I) {
9565 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9567 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9568 } else if (Op.isImmModifier()) {
9569 OptionalIdx[Op.getImmTy()] = I;
9570 } else {
9571 llvm_unreachable("unhandled operand type");
9572 }
9573 }
9574
9575 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9576
9577 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9578 if (OpSelIdx != -1)
9579 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9580
9581 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9582
9583 if (OpSelIdx == -1)
9584 return;
9585
9586 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9587 cvtOpSelHelper(Inst, OpSel);
9588}
9589
9590void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9591 const OperandVector &Operands) {
9592 OptionalImmIndexMap OptionalIdx;
9593 unsigned Opc = Inst.getOpcode();
9594 unsigned I = 1;
9595 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9596
9597 const MCInstrDesc &Desc = MII.get(Opc);
9598
9599 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9600 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9601
9602 for (unsigned E = Operands.size(); I != E; ++I) {
9603 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9604 int NumOperands = Inst.getNumOperands();
9605 // The order of operands in MCInst and parsed operands are different.
9606 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9607 // indices for parsing scale values correctly.
9608 if (NumOperands == CbszOpIdx) {
9611 }
9612 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9613 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9614 } else if (Op.isImmModifier()) {
9615 OptionalIdx[Op.getImmTy()] = I;
9616 } else {
9617 Op.addRegOrImmOperands(Inst, 1);
9618 }
9619 }
9620
9621 // Insert CBSZ and BLGP operands for F8F6F4 variants
9622 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9623 if (CbszIdx != OptionalIdx.end()) {
9624 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9625 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9626 }
9627
9628 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9629 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9630 if (BlgpIdx != OptionalIdx.end()) {
9631 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9632 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9633 }
9634
9635 // Add dummy src_modifiers
9638
9639 // Handle op_sel fields
9640
9641 unsigned OpSel = 0;
9642 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9643 if (OpselIdx != OptionalIdx.end()) {
9644 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9645 .getImm();
9646 }
9647
9648 unsigned OpSelHi = 0;
9649 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9650 if (OpselHiIdx != OptionalIdx.end()) {
9651 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9652 .getImm();
9653 }
9654 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9655 AMDGPU::OpName::src1_modifiers};
9656
9657 for (unsigned J = 0; J < 2; ++J) {
9658 unsigned ModVal = 0;
9659 if (OpSel & (1 << J))
9660 ModVal |= SISrcMods::OP_SEL_0;
9661 if (OpSelHi & (1 << J))
9662 ModVal |= SISrcMods::OP_SEL_1;
9663
9664 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9665 Inst.getOperand(ModIdx).setImm(ModVal);
9666 }
9667}
9668
9669void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9670 OptionalImmIndexMap &OptionalIdx) {
9671 unsigned Opc = Inst.getOpcode();
9672
9673 unsigned I = 1;
9674 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9675 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9676 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9677 }
9678
9679 for (unsigned E = Operands.size(); I != E; ++I) {
9680 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9682 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9683 } else if (Op.isImmModifier()) {
9684 OptionalIdx[Op.getImmTy()] = I;
9685 } else {
9686 Op.addRegOrImmOperands(Inst, 1);
9687 }
9688 }
9689
9690 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))
9691 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9692 AMDGPUOperand::ImmTyScaleSel);
9693
9694 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9695 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9696 AMDGPUOperand::ImmTyClamp);
9697
9698 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9699 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9700 Inst.addOperand(Inst.getOperand(0));
9701 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9702 AMDGPUOperand::ImmTyByteSel);
9703 }
9704
9705 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9706 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9707 AMDGPUOperand::ImmTyOModSI);
9708
9709 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9710 // it has src2 register operand that is tied to dst operand
9711 // we don't allow modifiers for this operand in assembler so src2_modifiers
9712 // should be 0.
9713 if (isMAC(Opc)) {
9714 auto *it = Inst.begin();
9715 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9716 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9717 ++it;
9718 // Copy the operand to ensure it's not invalidated when Inst grows.
9719 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9720 }
9721}
9722
9723void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9724 OptionalImmIndexMap OptionalIdx;
9725 cvtVOP3(Inst, Operands, OptionalIdx);
9726}
9727
9728void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9729 OptionalImmIndexMap &OptIdx) {
9730 const int Opc = Inst.getOpcode();
9731 const MCInstrDesc &Desc = MII.get(Opc);
9732
9733 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9734
9735 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9736 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9737 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9738 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9739 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx11 ||
9740 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx11 ||
9741 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9742 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12 ||
9743 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx13 ||
9744 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx13) {
9745 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9746 Inst.addOperand(Inst.getOperand(0));
9747 }
9748
9749 // Append vdst_in only if a previous converter (cvtVOP3DPP for DPP variants,
9750 // cvtVOP3 for byte_sel variants) hasn't already placed it. Use the position
9751 // of the named operand to detect that, the same way cvtVOP3DPP does
9752 // internally.
9753 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9754 if (VdstInIdx != -1 && VdstInIdx == static_cast<int>(Inst.getNumOperands()))
9755 Inst.addOperand(Inst.getOperand(0));
9756
9757 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9758 if (BitOp3Idx != -1) {
9759 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9760 }
9761
9762 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9763 // instruction, and then figure out where to actually put the modifiers
9764
9765 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9766 if (OpSelIdx != -1) {
9767 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9768 }
9769
9770 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9771 if (OpSelHiIdx != -1) {
9772 int DefaultVal = IsPacked ? -1 : 0;
9773 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9774 DefaultVal);
9775 }
9776
9777 int MatrixAFMTIdx =
9778 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
9779 if (MatrixAFMTIdx != -1) {
9780 addOptionalImmOperand(Inst, Operands, OptIdx,
9781 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9782 }
9783
9784 int MatrixBFMTIdx =
9785 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
9786 if (MatrixBFMTIdx != -1) {
9787 addOptionalImmOperand(Inst, Operands, OptIdx,
9788 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9789 }
9790
9791 int MatrixAScaleIdx =
9792 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
9793 if (MatrixAScaleIdx != -1) {
9794 addOptionalImmOperand(Inst, Operands, OptIdx,
9795 AMDGPUOperand::ImmTyMatrixAScale, 0);
9796 }
9797
9798 int MatrixBScaleIdx =
9799 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
9800 if (MatrixBScaleIdx != -1) {
9801 addOptionalImmOperand(Inst, Operands, OptIdx,
9802 AMDGPUOperand::ImmTyMatrixBScale, 0);
9803 }
9804
9805 int MatrixAScaleFmtIdx =
9806 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9807 if (MatrixAScaleFmtIdx != -1) {
9808 addOptionalImmOperand(Inst, Operands, OptIdx,
9809 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9810 }
9811
9812 int MatrixBScaleFmtIdx =
9813 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9814 if (MatrixBScaleFmtIdx != -1) {
9815 addOptionalImmOperand(Inst, Operands, OptIdx,
9816 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9817 }
9818
9819 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9820 addOptionalImmOperand(Inst, Operands, OptIdx,
9821 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9822
9823 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9824 addOptionalImmOperand(Inst, Operands, OptIdx,
9825 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9826
9827 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9828 if (NegLoIdx != -1)
9829 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9830
9831 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9832 if (NegHiIdx != -1)
9833 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9834
9835 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9836 AMDGPU::OpName::src2};
9837 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9838 AMDGPU::OpName::src1_modifiers,
9839 AMDGPU::OpName::src2_modifiers};
9840
9841 unsigned OpSel = 0;
9842 unsigned OpSelHi = 0;
9843 unsigned NegLo = 0;
9844 unsigned NegHi = 0;
9845
9846 if (OpSelIdx != -1)
9847 OpSel = Inst.getOperand(OpSelIdx).getImm();
9848
9849 if (OpSelHiIdx != -1)
9850 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9851
9852 if (NegLoIdx != -1)
9853 NegLo = Inst.getOperand(NegLoIdx).getImm();
9854
9855 if (NegHiIdx != -1)
9856 NegHi = Inst.getOperand(NegHiIdx).getImm();
9857
9858 for (int J = 0; J < 3; ++J) {
9859 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9860 if (OpIdx == -1)
9861 break;
9862
9863 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9864
9865 if (ModIdx == -1)
9866 continue;
9867
9868 uint32_t ModVal = 0;
9869
9870 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9871 if (SrcOp.isReg() && getMRI()
9872 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9873 .contains(SrcOp.getReg())) {
9874 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9875 if (VGPRSuffixIsHi)
9876 ModVal |= SISrcMods::OP_SEL_0;
9877 } else {
9878 if ((OpSel & (1 << J)) != 0)
9879 ModVal |= SISrcMods::OP_SEL_0;
9880 }
9881
9882 if ((OpSelHi & (1 << J)) != 0)
9883 ModVal |= SISrcMods::OP_SEL_1;
9884
9885 if ((NegLo & (1 << J)) != 0)
9886 ModVal |= SISrcMods::NEG;
9887
9888 if ((NegHi & (1 << J)) != 0)
9889 ModVal |= SISrcMods::NEG_HI;
9890
9891 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9892 }
9893}
9894
9895void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9896 OptionalImmIndexMap OptIdx;
9897 cvtVOP3(Inst, Operands, OptIdx);
9898 cvtVOP3P(Inst, Operands, OptIdx);
9899}
9900
9902 unsigned i, unsigned Opc,
9903 AMDGPU::OpName OpName) {
9904 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9905 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9906 else
9907 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9908}
9909
9910void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9911 unsigned Opc = Inst.getOpcode();
9912
9913 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9914 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9915 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9916 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9917 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9918
9919 OptionalImmIndexMap OptIdx;
9920 for (unsigned i = 5; i < Operands.size(); ++i) {
9921 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9922 OptIdx[Op.getImmTy()] = i;
9923 }
9924
9925 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9926 addOptionalImmOperand(Inst, Operands, OptIdx,
9927 AMDGPUOperand::ImmTyIndexKey8bit);
9928
9929 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9930 addOptionalImmOperand(Inst, Operands, OptIdx,
9931 AMDGPUOperand::ImmTyIndexKey16bit);
9932
9933 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
9934 addOptionalImmOperand(Inst, Operands, OptIdx,
9935 AMDGPUOperand::ImmTyIndexKey32bit);
9936
9937 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9938 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9939
9940 cvtVOP3P(Inst, Operands, OptIdx);
9941}
9942
9943//===----------------------------------------------------------------------===//
9944// VOPD
9945//===----------------------------------------------------------------------===//
9946
9947ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9948 if (!hasVOPD(getSTI()))
9949 return ParseStatus::NoMatch;
9950
9951 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9952 SMLoc S = getLoc();
9953 lex();
9954 lex();
9955 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9956 SMLoc OpYLoc = getLoc();
9957 StringRef OpYName;
9958 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9959 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9960 return ParseStatus::Success;
9961 }
9962 return Error(OpYLoc, "expected a VOPDY instruction after ::");
9963 }
9964 return ParseStatus::NoMatch;
9965}
9966
9967// Create VOPD MCInst operands using parsed assembler operands.
9968void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9969 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9970
9971 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9972 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9974 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9975 return;
9976 }
9977 if (Op.isReg()) {
9978 Op.addRegOperands(Inst, 1);
9979 return;
9980 }
9981 if (Op.isImm()) {
9982 Op.addImmOperands(Inst, 1);
9983 return;
9984 }
9985 llvm_unreachable("Unhandled operand type in cvtVOPD");
9986 };
9987
9988 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
9989
9990 // MCInst operands are ordered as follows:
9991 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9992
9993 for (auto CompIdx : VOPD::COMPONENTS) {
9994 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9995 }
9996
9997 for (auto CompIdx : VOPD::COMPONENTS) {
9998 const auto &CInfo = InstInfo[CompIdx];
9999 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
10000 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
10001 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
10002 if (CInfo.hasSrc2Acc())
10003 addOp(CInfo.getIndexOfDstInParsedOperands());
10004 }
10005
10006 int BitOp3Idx =
10007 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
10008 if (BitOp3Idx != -1) {
10009 OptionalImmIndexMap OptIdx;
10010 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
10011 if (Op.isImm())
10012 OptIdx[Op.getImmTy()] = Operands.size() - 1;
10013
10014 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
10015 }
10016}
10017
10018//===----------------------------------------------------------------------===//
10019// dpp
10020//===----------------------------------------------------------------------===//
10021
10022bool AMDGPUOperand::isDPP8() const {
10023 return isImmTy(ImmTyDPP8);
10024}
10025
10026bool AMDGPUOperand::isDPPCtrl() const {
10027 using namespace AMDGPU::DPP;
10028
10029 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
10030 if (result) {
10031 int64_t Imm = getImm();
10032 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
10033 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
10034 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
10035 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
10036 (Imm == DppCtrl::WAVE_SHL1) ||
10037 (Imm == DppCtrl::WAVE_ROL1) ||
10038 (Imm == DppCtrl::WAVE_SHR1) ||
10039 (Imm == DppCtrl::WAVE_ROR1) ||
10040 (Imm == DppCtrl::ROW_MIRROR) ||
10041 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
10042 (Imm == DppCtrl::BCAST15) ||
10043 (Imm == DppCtrl::BCAST31) ||
10044 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
10045 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
10046 }
10047 return false;
10048}
10049
10050//===----------------------------------------------------------------------===//
10051// mAI
10052//===----------------------------------------------------------------------===//
10053
10054bool AMDGPUOperand::isBLGP() const {
10055 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
10056}
10057
10058bool AMDGPUOperand::isS16Imm() const {
10059 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
10060}
10061
10062bool AMDGPUOperand::isU16Imm() const {
10063 return isImmLiteral() && isUInt<16>(getImm());
10064}
10065
10066//===----------------------------------------------------------------------===//
10067// dim
10068//===----------------------------------------------------------------------===//
10069
10070bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
10071 // We want to allow "dim:1D" etc.,
10072 // but the initial 1 is tokenized as an integer.
10073 std::string Token;
10074 if (isToken(AsmToken::Integer)) {
10075 SMLoc Loc = getToken().getEndLoc();
10076 Token = std::string(getTokenStr());
10077 lex();
10078 if (getLoc() != Loc)
10079 return false;
10080 }
10081
10082 StringRef Suffix;
10083 if (!parseId(Suffix))
10084 return false;
10085 Token += Suffix;
10086
10087 StringRef DimId = Token;
10088 DimId.consume_front("SQ_RSRC_IMG_");
10089
10090 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
10091 if (!DimInfo)
10092 return false;
10093
10094 Encoding = DimInfo->Encoding;
10095 return true;
10096}
10097
10098ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
10099 if (!isGFX10Plus())
10100 return ParseStatus::NoMatch;
10101
10102 SMLoc S = getLoc();
10103
10104 if (!trySkipId("dim", AsmToken::Colon))
10105 return ParseStatus::NoMatch;
10106
10107 unsigned Encoding;
10108 SMLoc Loc = getLoc();
10109 if (!parseDimId(Encoding))
10110 return Error(Loc, "invalid dim value");
10111
10112 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
10113 AMDGPUOperand::ImmTyDim));
10114 return ParseStatus::Success;
10115}
10116
10117//===----------------------------------------------------------------------===//
10118// dpp
10119//===----------------------------------------------------------------------===//
10120
10121ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
10122 SMLoc S = getLoc();
10123
10124 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
10125 return ParseStatus::NoMatch;
10126
10127 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
10128
10129 int64_t Sels[8];
10130
10131 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
10132 return ParseStatus::Failure;
10133
10134 for (size_t i = 0; i < 8; ++i) {
10135 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
10136 return ParseStatus::Failure;
10137
10138 SMLoc Loc = getLoc();
10139 if (getParser().parseAbsoluteExpression(Sels[i]))
10140 return ParseStatus::Failure;
10141 if (0 > Sels[i] || 7 < Sels[i])
10142 return Error(Loc, "expected a 3-bit value");
10143 }
10144
10145 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10146 return ParseStatus::Failure;
10147
10148 unsigned DPP8 = 0;
10149 for (size_t i = 0; i < 8; ++i)
10150 DPP8 |= (Sels[i] << (i * 3));
10151
10152 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
10153 return ParseStatus::Success;
10154}
10155
10156bool
10157AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
10158 const OperandVector &Operands) {
10159 if (Ctrl == "row_newbcast")
10160 return isGFX90A();
10161
10162 if (Ctrl == "row_share" ||
10163 Ctrl == "row_xmask")
10164 return isGFX10Plus();
10165
10166 if (Ctrl == "wave_shl" ||
10167 Ctrl == "wave_shr" ||
10168 Ctrl == "wave_rol" ||
10169 Ctrl == "wave_ror" ||
10170 Ctrl == "row_bcast")
10171 return isVI() || isGFX9();
10172
10173 return Ctrl == "row_mirror" ||
10174 Ctrl == "row_half_mirror" ||
10175 Ctrl == "quad_perm" ||
10176 Ctrl == "row_shl" ||
10177 Ctrl == "row_shr" ||
10178 Ctrl == "row_ror";
10179}
10180
10181int64_t
10182AMDGPUAsmParser::parseDPPCtrlPerm() {
10183 // quad_perm:[%d,%d,%d,%d]
10184
10185 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
10186 return -1;
10187
10188 int64_t Val = 0;
10189 for (int i = 0; i < 4; ++i) {
10190 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
10191 return -1;
10192
10193 int64_t Temp;
10194 SMLoc Loc = getLoc();
10195 if (getParser().parseAbsoluteExpression(Temp))
10196 return -1;
10197 if (Temp < 0 || Temp > 3) {
10198 Error(Loc, "expected a 2-bit value");
10199 return -1;
10200 }
10201
10202 Val += (Temp << i * 2);
10203 }
10204
10205 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10206 return -1;
10207
10208 return Val;
10209}
10210
10211int64_t
10212AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10213 using namespace AMDGPU::DPP;
10214
10215 // sel:%d
10216
10217 int64_t Val;
10218 SMLoc Loc = getLoc();
10219
10220 if (getParser().parseAbsoluteExpression(Val))
10221 return -1;
10222
10223 struct DppCtrlCheck {
10224 int64_t Ctrl;
10225 int Lo;
10226 int Hi;
10227 };
10228
10229 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
10230 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10231 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10232 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10233 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10234 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10235 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10236 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10237 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10238 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10239 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10240 .Default({-1, 0, 0});
10241
10242 bool Valid;
10243 if (Check.Ctrl == -1) {
10244 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
10245 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10246 } else {
10247 Valid = Check.Lo <= Val && Val <= Check.Hi;
10248 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
10249 }
10250
10251 if (!Valid) {
10252 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
10253 return -1;
10254 }
10255
10256 return Val;
10257}
10258
10259ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10260 using namespace AMDGPU::DPP;
10261
10262 if (!isToken(AsmToken::Identifier) ||
10263 !isSupportedDPPCtrl(getTokenStr(), Operands))
10264 return ParseStatus::NoMatch;
10265
10266 SMLoc S = getLoc();
10267 int64_t Val = -1;
10268 StringRef Ctrl;
10269
10270 parseId(Ctrl);
10271
10272 if (Ctrl == "row_mirror") {
10273 Val = DppCtrl::ROW_MIRROR;
10274 } else if (Ctrl == "row_half_mirror") {
10275 Val = DppCtrl::ROW_HALF_MIRROR;
10276 } else {
10277 if (skipToken(AsmToken::Colon, "expected a colon")) {
10278 if (Ctrl == "quad_perm") {
10279 Val = parseDPPCtrlPerm();
10280 } else {
10281 Val = parseDPPCtrlSel(Ctrl);
10282 }
10283 }
10284 }
10285
10286 if (Val == -1)
10287 return ParseStatus::Failure;
10288
10289 Operands.push_back(
10290 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10291 return ParseStatus::Success;
10292}
10293
10294void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10295 bool IsDPP8) {
10296 OptionalImmIndexMap OptionalIdx;
10297 unsigned Opc = Inst.getOpcode();
10298 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10299
10300 // MAC instructions are special because they have 'old'
10301 // operand which is not tied to dst (but assumed to be).
10302 // They also have dummy unused src2_modifiers.
10303 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
10304 int Src2ModIdx =
10305 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
10306 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10307 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
10308
10309 unsigned I = 1;
10310 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10311 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10312 }
10313
10314 int Fi = 0;
10315 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
10316 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10317 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx13 ||
10318 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10319 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx13 ||
10320 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10321 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx13 ||
10322 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
10323 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx13;
10324
10325 for (unsigned E = Operands.size(); I != E; ++I) {
10326
10327 if (IsMAC) {
10328 int NumOperands = Inst.getNumOperands();
10329 if (OldIdx == NumOperands) {
10330 // Handle old operand
10331 constexpr int DST_IDX = 0;
10332 Inst.addOperand(Inst.getOperand(DST_IDX));
10333 } else if (Src2ModIdx == NumOperands) {
10334 // Add unused dummy src2_modifiers
10336 }
10337 }
10338
10339 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10340 Inst.addOperand(Inst.getOperand(0));
10341 }
10342
10343 if (IsVOP3CvtSrDpp) {
10344 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10346 Inst.addOperand(MCOperand::createReg(MCRegister()));
10347 }
10348 }
10349
10350 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10352 if (TiedTo != -1) {
10353 assert((unsigned)TiedTo < Inst.getNumOperands());
10354 // handle tied old or src2 for MAC instructions
10355 Inst.addOperand(Inst.getOperand(TiedTo));
10356 }
10357 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10358 // Add the register arguments
10359 if (IsDPP8 && Op.isDppFI()) {
10360 Fi = Op.getImm();
10361 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10362 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10363 } else if (Op.isReg()) {
10364 Op.addRegOperands(Inst, 1);
10365 } else if (Op.isImm() &&
10366 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10367 Op.addImmOperands(Inst, 1);
10368 } else if (Op.isImm()) {
10369 OptionalIdx[Op.getImmTy()] = I;
10370 } else {
10371 llvm_unreachable("unhandled operand type");
10372 }
10373 }
10374
10375 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10376 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10377 AMDGPUOperand::ImmTyClamp);
10378
10379 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
10380 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10381 Inst.addOperand(Inst.getOperand(0));
10382 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10383 AMDGPUOperand::ImmTyByteSel);
10384 }
10385
10386 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10387 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
10388
10389 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10390 cvtVOP3P(Inst, Operands, OptionalIdx);
10391 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10392 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10393 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
10394 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
10395 }
10396
10397 if (IsDPP8) {
10398 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
10399 using namespace llvm::AMDGPU::DPP;
10400 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10401 } else {
10402 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
10403 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10404 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10405 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10406
10407 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
10408 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10409 AMDGPUOperand::ImmTyDppFI);
10410 }
10411}
10412
10413void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10414 OptionalImmIndexMap OptionalIdx;
10415
10416 unsigned I = 1;
10417 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10418 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10419 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10420 }
10421
10422 int Fi = 0;
10423 for (unsigned E = Operands.size(); I != E; ++I) {
10424 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10426 if (TiedTo != -1) {
10427 assert((unsigned)TiedTo < Inst.getNumOperands());
10428 // handle tied old or src2 for MAC instructions
10429 Inst.addOperand(Inst.getOperand(TiedTo));
10430 }
10431 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10432 // Add the register arguments
10433 if (Op.isReg() && validateVccOperand(Op.getReg())) {
10434 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10435 // Skip it.
10436 continue;
10437 }
10438
10439 if (IsDPP8) {
10440 if (Op.isDPP8()) {
10441 Op.addImmOperands(Inst, 1);
10442 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10443 Op.addRegWithFPInputModsOperands(Inst, 2);
10444 } else if (Op.isDppFI()) {
10445 Fi = Op.getImm();
10446 } else if (Op.isReg()) {
10447 Op.addRegOperands(Inst, 1);
10448 } else {
10449 llvm_unreachable("Invalid operand type");
10450 }
10451 } else {
10453 Op.addRegWithFPInputModsOperands(Inst, 2);
10454 } else if (Op.isReg()) {
10455 Op.addRegOperands(Inst, 1);
10456 } else if (Op.isDPPCtrl()) {
10457 Op.addImmOperands(Inst, 1);
10458 } else if (Op.isImm()) {
10459 // Handle optional arguments
10460 OptionalIdx[Op.getImmTy()] = I;
10461 } else {
10462 llvm_unreachable("Invalid operand type");
10463 }
10464 }
10465 }
10466
10467 if (IsDPP8) {
10468 using namespace llvm::AMDGPU::DPP;
10469 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10470 } else {
10471 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10472 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10473 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10474 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
10475 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10476 AMDGPUOperand::ImmTyDppFI);
10477 }
10478 }
10479}
10480
10481//===----------------------------------------------------------------------===//
10482// sdwa
10483//===----------------------------------------------------------------------===//
10484
10485ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10486 StringRef Prefix,
10487 AMDGPUOperand::ImmTy Type) {
10488 return parseStringOrIntWithPrefix(
10489 Operands, Prefix,
10490 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10491 Type);
10492}
10493
10494ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10495 return parseStringOrIntWithPrefix(
10496 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10497 AMDGPUOperand::ImmTySDWADstUnused);
10498}
10499
10500void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10501 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
10502}
10503
10504void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10505 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
10506}
10507
10508void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10509 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
10510}
10511
10512void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10513 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
10514}
10515
10516void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10517 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
10518}
10519
10520void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10521 uint64_t BasicInstType,
10522 bool SkipDstVcc,
10523 bool SkipSrcVcc) {
10524 using namespace llvm::AMDGPU::SDWA;
10525
10526 OptionalImmIndexMap OptionalIdx;
10527 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10528 bool SkippedVcc = false;
10529
10530 unsigned I = 1;
10531 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10532 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10533 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10534 }
10535
10536 for (unsigned E = Operands.size(); I != E; ++I) {
10537 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10538 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10539 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10540 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10541 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10542 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10543 // Skip VCC only if we didn't skip it on previous iteration.
10544 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10545 if (BasicInstType == SIInstrFlags::VOP2 &&
10546 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10547 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10548 SkippedVcc = true;
10549 continue;
10550 }
10551 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
10552 SkippedVcc = true;
10553 continue;
10554 }
10555 }
10557 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10558 } else if (Op.isImm()) {
10559 // Handle optional arguments
10560 OptionalIdx[Op.getImmTy()] = I;
10561 } else {
10562 llvm_unreachable("Invalid operand type");
10563 }
10564 SkippedVcc = false;
10565 }
10566
10567 const unsigned Opc = Inst.getOpcode();
10568 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10569 Opc != AMDGPU::V_NOP_sdwa_vi) {
10570 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10571 switch (BasicInstType) {
10572 case SIInstrFlags::VOP1:
10573 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10574 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10575 AMDGPUOperand::ImmTyClamp, 0);
10576
10577 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10578 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10579 AMDGPUOperand::ImmTyOModSI, 0);
10580
10581 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10582 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10583 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10584
10585 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10586 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10587 AMDGPUOperand::ImmTySDWADstUnused,
10588 DstUnused::UNUSED_PRESERVE);
10589
10590 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10591 break;
10592
10593 case SIInstrFlags::VOP2:
10594 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10595 AMDGPUOperand::ImmTyClamp, 0);
10596
10597 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10598 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10599
10600 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10601 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10602 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10603 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10604 break;
10605
10606 case SIInstrFlags::VOPC:
10607 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10608 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10609 AMDGPUOperand::ImmTyClamp, 0);
10610 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10611 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10612 break;
10613
10614 default:
10615 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10616 }
10617 }
10618
10619 // special case v_mac_{f16, f32}:
10620 // it has src2 register operand that is tied to dst operand
10621 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10622 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10623 auto *it = Inst.begin();
10624 std::advance(
10625 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10626 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10627 }
10628}
10629
10630/// Force static initialization.
10631extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10636
10637#define GET_MATCHER_IMPLEMENTATION
10638#define GET_MNEMONIC_SPELL_CHECKER
10639#define GET_MNEMONIC_CHECKER
10640#include "AMDGPUGenAsmMatcher.inc"
10641
10642ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10643 unsigned MCK) {
10644 switch (MCK) {
10645 case MCK_addr64:
10646 return parseTokenOp("addr64", Operands);
10647 case MCK_done:
10648 return parseNamedBit("done", Operands, AMDGPUOperand::ImmTyDone, true);
10649 case MCK_idxen:
10650 return parseTokenOp("idxen", Operands);
10651 case MCK_lds:
10652 return parseNamedBit("lds", Operands, AMDGPUOperand::ImmTyLDS,
10653 /*IgnoreNegative=*/true);
10654 case MCK_offen:
10655 return parseTokenOp("offen", Operands);
10656 case MCK_off:
10657 return parseTokenOp("off", Operands);
10658 case MCK_row_95_en:
10659 return parseNamedBit("row_en", Operands, AMDGPUOperand::ImmTyRowEn, true);
10660 case MCK_gds:
10661 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10662 case MCK_tfe:
10663 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10664 }
10665 return tryCustomParseOperand(Operands, MCK);
10666}
10667
10668// This function should be defined after auto-generated include so that we have
10669// MatchClassKind enum defined
10670unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10671 unsigned Kind) {
10672 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10673 // But MatchInstructionImpl() expects to meet token and fails to validate
10674 // operand. This method checks if we are given immediate operand but expect to
10675 // get corresponding token.
10676 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10677 switch (Kind) {
10678 case MCK_addr64:
10679 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10680 case MCK_gds:
10681 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10682 case MCK_lds:
10683 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10684 case MCK_idxen:
10685 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10686 case MCK_offen:
10687 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10688 case MCK_tfe:
10689 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10690 case MCK_done:
10691 return Operand.isDone() ? Match_Success : Match_InvalidOperand;
10692 case MCK_row_95_en:
10693 return Operand.isRowEn() ? Match_Success : Match_InvalidOperand;
10694 case MCK_SSrc_b32:
10695 // When operands have expression values, they will return true for isToken,
10696 // because it is not possible to distinguish between a token and an
10697 // expression at parse time. MatchInstructionImpl() will always try to
10698 // match an operand as a token, when isToken returns true, and when the
10699 // name of the expression is not a valid token, the match will fail,
10700 // so we need to handle it here.
10701 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10702 case MCK_SSrc_f32:
10703 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10704 case MCK_SOPPBrTarget:
10705 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10706 case MCK_VReg32OrOff:
10707 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10708 case MCK_InterpSlot:
10709 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10710 case MCK_InterpAttr:
10711 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10712 case MCK_InterpAttrChan:
10713 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10714 case MCK_SReg_64:
10715 case MCK_SReg_64_XEXEC:
10716 // Null is defined as a 32-bit register but
10717 // it should also be enabled with 64-bit operands or larger.
10718 // The following code enables it for SReg_64 and larger operands
10719 // used as source and destination. Remaining source
10720 // operands are handled in isInlinableImm.
10721 case MCK_SReg_96:
10722 case MCK_SReg_128:
10723 case MCK_SReg_256:
10724 case MCK_SReg_512:
10725 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10726 default:
10727 return Match_InvalidOperand;
10728 }
10729}
10730
10731//===----------------------------------------------------------------------===//
10732// endpgm
10733//===----------------------------------------------------------------------===//
10734
10735ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10736 SMLoc S = getLoc();
10737 int64_t Imm = 0;
10738
10739 if (!parseExpr(Imm)) {
10740 // The operand is optional, if not present default to 0
10741 Imm = 0;
10742 }
10743
10744 if (!isUInt<16>(Imm))
10745 return Error(S, "expected a 16-bit value");
10746
10747 Operands.push_back(
10748 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10749 return ParseStatus::Success;
10750}
10751
10752bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10753
10754//===----------------------------------------------------------------------===//
10755// Split Barrier
10756//===----------------------------------------------------------------------===//
10757
10758bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
#define Success
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
Enums shared between the AMDGPU backend (LLVM) and the ELF linker (LLD) for the .amdgpu....
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
@ Default
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(GsymDataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:253
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
BinaryOperator * Mul
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5899
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:130
size_t size() const
Get the array size.
Definition ArrayRef.h:141
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
Register getReg() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
SMLoc getLoc() const
Definition MCInst.h:208
void setLoc(SMLoc loc)
Definition MCInst.h:207
unsigned getOpcode() const
Definition MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232
void addOperand(const MCOperand Op)
Definition MCInst.h:215
iterator begin()
Definition MCInst.h:227
size_t size() const
Definition MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Definition MCInstrInfo.h:80
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
void setImm(int64_t Val)
Definition MCInst.h:89
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
bool isImm() const
Definition MCInst.h:66
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
const MCExpr * getExpr() const
Definition MCInst.h:118
bool isExpr() const
Definition MCInst.h:69
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool regsOverlap(MCRegister RegA, MCRegister RegB) const
Returns true if the two registers are equal or alias each other.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
void setRedefinable(bool Value)
Mark this symbol as redefinable.
Definition MCSymbol.h:210
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Definition Register.h:100
Represents a location in source code.
Definition SMLoc.h:22
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:35
constexpr const char * getPointer() const
Definition SMLoc.h:33
constexpr bool isValid() const
Definition SMLoc.h:28
SMLoc Start
Definition SMLoc.h:49
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:685
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:270
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:655
bool contains(StringRef key) const
Check if the set contains the given key.
Definition StringSet.h:60
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
static constexpr CustomOperand Operands[]
unsigned getAddressableNumSGPRs(const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
unsigned getLocalMemorySize(const MCSubtargetInfo &STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
constexpr const char *const ModMatrixFmt[]
constexpr const char *const ModMatrixScaleFmt[]
constexpr const char *const ModMatrixScale[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
FuncInfoFlags
Per-function flags packed into INFO_FLAGS entries.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:204
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:227
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:213
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:203
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:209
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:212
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:219
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:214
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:228
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:240
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:215
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:206
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:224
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:226
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:216
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:241
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:205
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:231
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
bool isGFX1250Plus(const MCSubtargetInfo &STI)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1433
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ OPERAND_IMMEDIATE
Definition MCInstrDesc.h:61
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
@ Valid
The data is already valid.
Context & getContext() const
Definition BasicBlock.h:99
bool isNull(StringRef S)
Definition YAMLTraits.h:571
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition Error.h:1129
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
StringMapEntry< Value * > ValueName
Definition Value.h:56
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:206
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
void PrintError(const Twine &Msg)
Definition Error.cpp:104
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:299
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:362
Op::Description Desc
Target & getTheR600Target()
The target for R600 GPUs.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:31
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
#define N
RegisterKind Kind
StringLiteral Name
void initDefault(const MCSubtargetInfo &STI, MCContext &Ctx, bool InitMCExpr=true)
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
SmallVector< std::pair< MCSymbol *, std::string >, 4 > IndirectCalls
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 8 > Calls
SmallVector< FuncInfo, 8 > Funcs
SmallVector< std::pair< MCSymbol *, std::string >, 4 > TypeIds
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 4 > Uses
Instruction set architecture version.
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...