LLVM 23.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/MC/MCSymbol.h"
47#include <optional>
48
49using namespace llvm;
50using namespace llvm::AMDGPU;
51using namespace llvm::amdhsa;
52
53namespace {
54
55class AMDGPUAsmParser;
56
57enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
58
59//===----------------------------------------------------------------------===//
60// Operand
61//===----------------------------------------------------------------------===//
62
63class AMDGPUOperand : public MCParsedAsmOperand {
64 enum KindTy {
65 Token,
66 Immediate,
67 Register,
68 Expression
69 } Kind;
70
71 SMLoc StartLoc, EndLoc;
72 const AMDGPUAsmParser *AsmParser;
73
74public:
75 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
76 : Kind(Kind_), AsmParser(AsmParser_) {}
77
78 using Ptr = std::unique_ptr<AMDGPUOperand>;
79
80 struct Modifiers {
81 bool Abs = false;
82 bool Neg = false;
83 bool Sext = false;
84 LitModifier Lit = LitModifier::None;
85
86 bool hasFPModifiers() const { return Abs || Neg; }
87 bool hasIntModifiers() const { return Sext; }
88 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
89 bool isForcedLit() const { return Lit == LitModifier::Lit; }
90 bool isForcedLit64() const { return Lit == LitModifier::Lit64; }
91
92 int64_t getFPModifiersOperand() const {
93 int64_t Operand = 0;
94 Operand |= Abs ? SISrcMods::ABS : 0u;
95 Operand |= Neg ? SISrcMods::NEG : 0u;
96 return Operand;
97 }
98
99 int64_t getIntModifiersOperand() const {
100 int64_t Operand = 0;
101 Operand |= Sext ? SISrcMods::SEXT : 0u;
102 return Operand;
103 }
104
105 int64_t getModifiersOperand() const {
106 assert(!(hasFPModifiers() && hasIntModifiers())
107 && "fp and int modifiers should not be used simultaneously");
108 if (hasFPModifiers())
109 return getFPModifiersOperand();
110 if (hasIntModifiers())
111 return getIntModifiersOperand();
112 return 0;
113 }
114
115 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
116 };
117
118 enum ImmTy {
119 ImmTyNone,
120 ImmTyGDS,
121 ImmTyLDS,
122 ImmTyOffen,
123 ImmTyIdxen,
124 ImmTyAddr64,
125 ImmTyOffset,
126 ImmTyInstOffset,
127 ImmTyOffset0,
128 ImmTyOffset1,
129 ImmTySMEMOffsetMod,
130 ImmTyCPol,
131 ImmTyTFE,
132 ImmTyIsAsync,
133 ImmTyD16,
134 ImmTyClamp,
135 ImmTyOModSI,
136 ImmTySDWADstSel,
137 ImmTySDWASrc0Sel,
138 ImmTySDWASrc1Sel,
139 ImmTySDWADstUnused,
140 ImmTyDMask,
141 ImmTyDim,
142 ImmTyUNorm,
143 ImmTyDA,
144 ImmTyR128A16,
145 ImmTyA16,
146 ImmTyLWE,
147 ImmTyExpTgt,
148 ImmTyExpCompr,
149 ImmTyExpVM,
150 ImmTyDone,
151 ImmTyRowEn,
152 ImmTyFORMAT,
153 ImmTyHwreg,
154 ImmTyOff,
155 ImmTySendMsg,
156 ImmTyWaitEvent,
157 ImmTyInterpSlot,
158 ImmTyInterpAttr,
159 ImmTyInterpAttrChan,
160 ImmTyOpSel,
161 ImmTyOpSelHi,
162 ImmTyNegLo,
163 ImmTyNegHi,
164 ImmTyIndexKey8bit,
165 ImmTyIndexKey16bit,
166 ImmTyIndexKey32bit,
167 ImmTyDPP8,
168 ImmTyDppCtrl,
169 ImmTyDppRowMask,
170 ImmTyDppBankMask,
171 ImmTyDppBoundCtrl,
172 ImmTyDppFI,
173 ImmTySwizzle,
174 ImmTyGprIdxMode,
175 ImmTyHigh,
176 ImmTyBLGP,
177 ImmTyCBSZ,
178 ImmTyABID,
179 ImmTyEndpgm,
180 ImmTyWaitVDST,
181 ImmTyWaitEXP,
182 ImmTyWaitVAVDst,
183 ImmTyWaitVMVSrc,
184 ImmTyBitOp3,
185 ImmTyMatrixAFMT,
186 ImmTyMatrixBFMT,
187 ImmTyMatrixAScale,
188 ImmTyMatrixBScale,
189 ImmTyMatrixAScaleFmt,
190 ImmTyMatrixBScaleFmt,
191 ImmTyMatrixAReuse,
192 ImmTyMatrixBReuse,
193 ImmTyScaleSel,
194 ImmTyByteSel,
195 };
196
197private:
198 struct TokOp {
199 const char *Data;
200 unsigned Length;
201 };
202
203 struct ImmOp {
204 int64_t Val;
205 ImmTy Type;
206 bool IsFPImm;
207 Modifiers Mods;
208 };
209
210 struct RegOp {
211 MCRegister RegNo;
212 Modifiers Mods;
213 };
214
215 union {
216 TokOp Tok;
217 ImmOp Imm;
218 RegOp Reg;
219 const MCExpr *Expr;
220 };
221
222 // The index of the associated MCInst operand.
223 mutable int MCOpIdx = -1;
224
225public:
226 bool isToken() const override { return Kind == Token; }
227
228 bool isSymbolRefExpr() const {
229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230 }
231
232 bool isImm() const override {
233 return Kind == Immediate;
234 }
235
236 bool isInlinableImm(MVT type) const;
237 bool isLiteralImm(MVT type) const;
238
239 bool isRegKind() const {
240 return Kind == Register;
241 }
242
243 bool isReg() const override {
244 return isRegKind() && !hasModifiers();
245 }
246
247 bool isRegOrInline(unsigned RCID, MVT type) const {
248 return isRegClass(RCID) || isInlinableImm(type);
249 }
250
251 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
252 return isRegOrInline(RCID, type) || isLiteralImm(type);
253 }
254
255 bool isRegOrImmWithInt16InputMods() const {
256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
257 }
258
259 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
261 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
262 }
263
264 bool isRegOrImmWithInt32InputMods() const {
265 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
266 }
267
268 bool isRegOrInlineImmWithInt16InputMods() const {
269 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
270 }
271
272 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
273 return isRegOrInline(
274 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
275 }
276
277 bool isRegOrInlineImmWithInt32InputMods() const {
278 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
279 }
280
281 bool isRegOrImmWithInt64InputMods() const {
282 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
283 }
284
285 bool isRegOrImmWithFP16InputMods() const {
286 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
287 }
288
289 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
291 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
292 }
293
294 bool isRegOrImmWithFP32InputMods() const {
295 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
296 }
297
298 bool isRegOrImmWithFP64InputMods() const {
299 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
300 }
301
302 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
303 return isRegOrInline(
304 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
305 }
306
307 bool isRegOrInlineImmWithFP32InputMods() const {
308 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
309 }
310
311 bool isRegOrInlineImmWithFP64InputMods() const {
312 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
313 }
314
315 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
316
317 bool isVRegWithFP32InputMods() const {
318 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
319 }
320
321 bool isVRegWithFP64InputMods() const {
322 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
323 }
324
325 bool isPackedFP16InputMods() const {
326 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
327 }
328
329 bool isPackedVGPRFP32InputMods() const {
330 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
331 }
332
333 bool isVReg() const {
334 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
335 isRegClass(AMDGPU::VReg_64RegClassID) ||
336 isRegClass(AMDGPU::VReg_96RegClassID) ||
337 isRegClass(AMDGPU::VReg_128RegClassID) ||
338 isRegClass(AMDGPU::VReg_160RegClassID) ||
339 isRegClass(AMDGPU::VReg_192RegClassID) ||
340 isRegClass(AMDGPU::VReg_256RegClassID) ||
341 isRegClass(AMDGPU::VReg_512RegClassID) ||
342 isRegClass(AMDGPU::VReg_1024RegClassID);
343 }
344
345 bool isVReg32() const {
346 return isRegClass(AMDGPU::VGPR_32RegClassID);
347 }
348
349 bool isVReg32OrOff() const {
350 return isOff() || isVReg32();
351 }
352
353 bool isNull() const {
354 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
355 }
356
357 bool isAV_LdSt_32_Align2_RegOp() const {
358 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
359 isRegClass(AMDGPU::AGPR_32RegClassID);
360 }
361
362 bool isVRegWithInputMods() const;
363 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
364 template <bool IsFake16> bool isT16VRegWithInputMods() const;
365
366 bool isSDWAOperand(MVT type) const;
367 bool isSDWAFP16Operand() const;
368 bool isSDWAFP32Operand() const;
369 bool isSDWAInt16Operand() const;
370 bool isSDWAInt32Operand() const;
371
372 bool isImmTy(ImmTy ImmT) const {
373 return isImm() && Imm.Type == ImmT;
374 }
375
376 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
377
378 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
379
380 bool isImmModifier() const {
381 return isImm() && Imm.Type != ImmTyNone;
382 }
383
384 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
385 bool isDim() const { return isImmTy(ImmTyDim); }
386 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
387 bool isOff() const { return isImmTy(ImmTyOff); }
388 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
389 bool isOffen() const { return isImmTy(ImmTyOffen); }
390 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
391 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
392 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
393 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
394 bool isGDS() const { return isImmTy(ImmTyGDS); }
395 bool isLDS() const { return isImmTy(ImmTyLDS); }
396 bool isCPol() const { return isImmTy(ImmTyCPol); }
397 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
398 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
399 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
400 bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
401 bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
402 bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
403 bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
404 bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
405 bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
406 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
407 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
408 bool isTFE() const { return isImmTy(ImmTyTFE); }
409 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
410 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
411 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
412 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
413 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
414 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
415 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
416 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
417 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
418 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
419 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
420 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
421 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
422 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
423 bool isDone() const { return isImmTy(ImmTyDone); }
424 bool isRowEn() const { return isImmTy(ImmTyRowEn); }
425
426 bool isRegOrImm() const {
427 return isReg() || isImm();
428 }
429
430 bool isRegClass(unsigned RCID) const;
431
432 bool isInlineValue() const;
433
434 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
435 return isRegOrInline(RCID, type) && !hasModifiers();
436 }
437
438 bool isSCSrcB16() const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
440 }
441
442 bool isSCSrcV2B16() const {
443 return isSCSrcB16();
444 }
445
446 bool isSCSrc_b32() const {
447 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
448 }
449
450 bool isSCSrc_b64() const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
452 }
453
454 bool isBoolReg() const;
455
456 bool isSCSrcF16() const {
457 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
458 }
459
460 bool isSCSrcV2F16() const {
461 return isSCSrcF16();
462 }
463
464 bool isSCSrcF32() const {
465 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
466 }
467
468 bool isSCSrcF64() const {
469 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
470 }
471
472 bool isSSrc_b32() const {
473 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
474 }
475
476 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
477
478 bool isSSrcV2B16() const {
479 llvm_unreachable("cannot happen");
480 return isSSrc_b16();
481 }
482
483 bool isSSrc_b64() const {
484 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
485 // See isVSrc64().
486 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
487 (((const MCTargetAsmParser *)AsmParser)
488 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
489 isExpr());
490 }
491
492 bool isSSrc_f32() const {
493 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
494 }
495
496 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
497
498 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
499
500 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
501
502 bool isSSrcV2F16() const {
503 llvm_unreachable("cannot happen");
504 return isSSrc_f16();
505 }
506
507 bool isSSrcV2FP32() const {
508 llvm_unreachable("cannot happen");
509 return isSSrc_f32();
510 }
511
512 bool isSCSrcV2FP32() const {
513 llvm_unreachable("cannot happen");
514 return isSCSrcF32();
515 }
516
517 bool isSSrcV2INT32() const {
518 llvm_unreachable("cannot happen");
519 return isSSrc_b32();
520 }
521
522 bool isSCSrcV2INT32() const {
523 llvm_unreachable("cannot happen");
524 return isSCSrc_b32();
525 }
526
527 bool isSSrcOrLds_b32() const {
528 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
529 isLiteralImm(MVT::i32) || isExpr();
530 }
531
532 bool isVCSrc_b32() const {
533 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
534 }
535
536 bool isVCSrc_b32_Lo256() const {
537 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
538 }
539
540 bool isVCSrc_b64_Lo256() const {
541 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
542 }
543
544 bool isVCSrc_b64() const {
545 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
546 }
547
548 bool isVCSrcT_b16() const {
549 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
550 }
551
552 bool isVCSrcTB16_Lo128() const {
553 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
554 }
555
556 bool isVCSrcFake16B16_Lo128() const {
557 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
558 }
559
560 bool isVCSrc_b16() const {
561 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
562 }
563
564 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
565
566 bool isVCSrc_f32() const {
567 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
568 }
569
570 bool isVCSrc_f64() const {
571 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
572 }
573
574 bool isVCSrcTBF16() const {
575 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
576 }
577
578 bool isVCSrcT_f16() const {
579 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
580 }
581
582 bool isVCSrcT_bf16() const {
583 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
584 }
585
586 bool isVCSrcTBF16_Lo128() const {
587 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
588 }
589
590 bool isVCSrcTF16_Lo128() const {
591 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
592 }
593
594 bool isVCSrcFake16BF16_Lo128() const {
595 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
596 }
597
598 bool isVCSrcFake16F16_Lo128() const {
599 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
600 }
601
602 bool isVCSrc_bf16() const {
603 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
604 }
605
606 bool isVCSrc_f16() const {
607 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
608 }
609
610 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
611
612 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
613
614 bool isVSrc_b32() const {
615 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
616 }
617
618 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
619
620 bool isVSrc_v2b64() const {
621 return isRegOrInlineNoMods(AMDGPU::VS_128RegClassID, MVT::i64) ||
622 isLiteralImm(MVT::i64);
623 }
624
625 bool isVSrc_v2f64() const {
626 return isRegOrInlineNoMods(AMDGPU::VS_128RegClassID, MVT::f64) ||
627 isLiteralImm(MVT::f64);
628 }
629
630 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
631
632 bool isVSrcT_b16_Lo128() const {
633 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
634 }
635
636 bool isVSrcFake16_b16_Lo128() const {
637 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
638 }
639
640 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
641
642 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
643
644 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
645
646 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
647
648 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
649
650 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
651
652 bool isVSrc_f32() const {
653 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
654 }
655
656 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
657
658 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
659
660 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
661
662 bool isVSrcT_bf16_Lo128() const {
663 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
664 }
665
666 bool isVSrcT_f16_Lo128() const {
667 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
668 }
669
670 bool isVSrcFake16_bf16_Lo128() const {
671 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
672 }
673
674 bool isVSrcFake16_f16_Lo128() const {
675 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
676 }
677
678 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
679
680 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
681
682 bool isVSrc_v2bf16() const {
683 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
684 }
685
686 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
687
688 bool isVSrc_v2f16_splat() const { return isVSrc_v2f16(); }
689
690 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
691
692 bool isVISrcB32() const {
693 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
694 }
695
696 bool isVISrcB16() const {
697 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
698 }
699
700 bool isVISrcV2B16() const {
701 return isVISrcB16();
702 }
703
704 bool isVISrcF32() const {
705 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
706 }
707
708 bool isVISrcF16() const {
709 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
710 }
711
712 bool isVISrcV2F16() const {
713 return isVISrcF16() || isVISrcB32();
714 }
715
716 bool isVISrc_64_bf16() const {
717 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
718 }
719
720 bool isVISrc_64_f16() const {
721 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
722 }
723
724 bool isVISrc_64_b32() const {
725 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
726 }
727
728 bool isVISrc_64B64() const {
729 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
730 }
731
732 bool isVISrc_64_f64() const {
733 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
734 }
735
736 bool isVISrc_64V2FP32() const {
737 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
738 }
739
740 bool isVISrc_64V2INT32() const {
741 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
742 }
743
744 bool isVISrc_256_b32() const {
745 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
746 }
747
748 bool isVISrc_256_f32() const {
749 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
750 }
751
752 bool isVISrc_256B64() const {
753 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
754 }
755
756 bool isVISrc_256_f64() const {
757 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
758 }
759
760 bool isVISrc_512_f64() const {
761 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
762 }
763
764 bool isVISrc_128B16() const {
765 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
766 }
767
768 bool isVISrc_128V2B16() const {
769 return isVISrc_128B16();
770 }
771
772 bool isVISrc_128_b32() const {
773 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
774 }
775
776 bool isVISrc_128_f32() const {
777 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
778 }
779
780 bool isVISrc_256V2FP32() const {
781 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
782 }
783
784 bool isVISrc_256V2INT32() const {
785 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
786 }
787
788 bool isVISrc_512_b32() const {
789 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
790 }
791
792 bool isVISrc_512B16() const {
793 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
794 }
795
796 bool isVISrc_512V2B16() const {
797 return isVISrc_512B16();
798 }
799
800 bool isVISrc_512_f32() const {
801 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
802 }
803
804 bool isVISrc_512F16() const {
805 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
806 }
807
808 bool isVISrc_512V2F16() const {
809 return isVISrc_512F16() || isVISrc_512_b32();
810 }
811
812 bool isVISrc_1024_b32() const {
813 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
814 }
815
816 bool isVISrc_1024B16() const {
817 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
818 }
819
820 bool isVISrc_1024V2B16() const {
821 return isVISrc_1024B16();
822 }
823
824 bool isVISrc_1024_f32() const {
825 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
826 }
827
828 bool isVISrc_1024F16() const {
829 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
830 }
831
832 bool isVISrc_1024V2F16() const {
833 return isVISrc_1024F16() || isVISrc_1024_b32();
834 }
835
836 bool isAISrcB32() const {
837 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
838 }
839
840 bool isAISrcB16() const {
841 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
842 }
843
844 bool isAISrcV2B16() const {
845 return isAISrcB16();
846 }
847
848 bool isAISrcF32() const {
849 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
850 }
851
852 bool isAISrcF16() const {
853 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
854 }
855
856 bool isAISrcV2F16() const {
857 return isAISrcF16() || isAISrcB32();
858 }
859
860 bool isAISrc_64B64() const {
861 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
862 }
863
864 bool isAISrc_64_f64() const {
865 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
866 }
867
868 bool isAISrc_128_b32() const {
869 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
870 }
871
872 bool isAISrc_128B16() const {
873 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
874 }
875
876 bool isAISrc_128V2B16() const {
877 return isAISrc_128B16();
878 }
879
880 bool isAISrc_128_f32() const {
881 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
882 }
883
884 bool isAISrc_128F16() const {
885 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
886 }
887
888 bool isAISrc_128V2F16() const {
889 return isAISrc_128F16() || isAISrc_128_b32();
890 }
891
892 bool isVISrc_128_bf16() const {
893 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
894 }
895
896 bool isVISrc_128_f16() const {
897 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
898 }
899
900 bool isVISrc_128V2F16() const {
901 return isVISrc_128_f16() || isVISrc_128_b32();
902 }
903
904 bool isAISrc_256B64() const {
905 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
906 }
907
908 bool isAISrc_256_f64() const {
909 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
910 }
911
912 bool isAISrc_512_b32() const {
913 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
914 }
915
916 bool isAISrc_512B16() const {
917 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
918 }
919
920 bool isAISrc_512V2B16() const {
921 return isAISrc_512B16();
922 }
923
924 bool isAISrc_512_f32() const {
925 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
926 }
927
928 bool isAISrc_512F16() const {
929 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
930 }
931
932 bool isAISrc_512V2F16() const {
933 return isAISrc_512F16() || isAISrc_512_b32();
934 }
935
936 bool isAISrc_1024_b32() const {
937 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
938 }
939
940 bool isAISrc_1024B16() const {
941 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
942 }
943
944 bool isAISrc_1024V2B16() const {
945 return isAISrc_1024B16();
946 }
947
948 bool isAISrc_1024_f32() const {
949 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
950 }
951
952 bool isAISrc_1024F16() const {
953 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
954 }
955
956 bool isAISrc_1024V2F16() const {
957 return isAISrc_1024F16() || isAISrc_1024_b32();
958 }
959
960 bool isKImmFP32() const {
961 return isLiteralImm(MVT::f32);
962 }
963
964 bool isKImmFP16() const {
965 return isLiteralImm(MVT::f16);
966 }
967
968 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
969
970 bool isMem() const override {
971 return false;
972 }
973
974 bool isExpr() const {
975 return Kind == Expression;
976 }
977
978 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
979
980 bool isSWaitCnt() const;
981 bool isDepCtr() const;
982 bool isSDelayALU() const;
983 bool isHwreg() const;
984 bool isSendMsg() const;
985 bool isWaitEvent() const;
986 bool isSplitBarrier() const;
987 bool isSwizzle() const;
988 bool isSMRDOffset8() const;
989 bool isSMEMOffset() const;
990 bool isSMRDLiteralOffset() const;
991 bool isDPP8() const;
992 bool isDPPCtrl() const;
993 bool isBLGP() const;
994 bool isGPRIdxMode() const;
995 bool isS16Imm() const;
996 bool isU16Imm() const;
997 bool isEndpgm() const;
998
999 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
1000 return [this, P]() { return P(*this); };
1001 }
1002
1003 StringRef getToken() const {
1004 assert(isToken());
1005 return StringRef(Tok.Data, Tok.Length);
1006 }
1007
1008 int64_t getImm() const {
1009 assert(isImm());
1010 return Imm.Val;
1011 }
1012
1013 void setImm(int64_t Val) {
1014 assert(isImm());
1015 Imm.Val = Val;
1016 }
1017
1018 ImmTy getImmTy() const {
1019 assert(isImm());
1020 return Imm.Type;
1021 }
1022
1023 MCRegister getReg() const override {
1024 assert(isRegKind());
1025 return Reg.RegNo;
1026 }
1027
1028 SMLoc getStartLoc() const override {
1029 return StartLoc;
1030 }
1031
1032 SMLoc getEndLoc() const override {
1033 return EndLoc;
1034 }
1035
1036 SMRange getLocRange() const {
1037 return SMRange(StartLoc, EndLoc);
1038 }
1039
1040 int getMCOpIdx() const { return MCOpIdx; }
1041
1042 Modifiers getModifiers() const {
1043 assert(isRegKind() || isImmTy(ImmTyNone));
1044 return isRegKind() ? Reg.Mods : Imm.Mods;
1045 }
1046
1047 void setModifiers(Modifiers Mods) {
1048 assert(isRegKind() || isImmTy(ImmTyNone));
1049 if (isRegKind())
1050 Reg.Mods = Mods;
1051 else
1052 Imm.Mods = Mods;
1053 }
1054
1055 bool hasModifiers() const {
1056 return getModifiers().hasModifiers();
1057 }
1058
1059 bool hasFPModifiers() const {
1060 return getModifiers().hasFPModifiers();
1061 }
1062
1063 bool hasIntModifiers() const {
1064 return getModifiers().hasIntModifiers();
1065 }
1066
1067 bool isForcedLit() const {
1068 return isImmLiteral() && getModifiers().isForcedLit();
1069 }
1070
1071 bool isForcedLit64() const {
1072 return isImmLiteral() && getModifiers().isForcedLit64();
1073 }
1074
1075 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1076
1077 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1078
1079 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1080
1081 void addRegOperands(MCInst &Inst, unsigned N) const;
1082
1083 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1084 if (isRegKind())
1085 addRegOperands(Inst, N);
1086 else
1087 addImmOperands(Inst, N);
1088 }
1089
1090 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1091 Modifiers Mods = getModifiers();
1092 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1093 if (isRegKind()) {
1094 addRegOperands(Inst, N);
1095 } else {
1096 addImmOperands(Inst, N, false);
1097 }
1098 }
1099
1100 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1101 assert(!hasIntModifiers());
1102 addRegOrImmWithInputModsOperands(Inst, N);
1103 }
1104
1105 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1106 assert(!hasFPModifiers());
1107 addRegOrImmWithInputModsOperands(Inst, N);
1108 }
1109
1110 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1111 Modifiers Mods = getModifiers();
1112 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1113 assert(isRegKind());
1114 addRegOperands(Inst, N);
1115 }
1116
1117 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1118 assert(!hasIntModifiers());
1119 addRegWithInputModsOperands(Inst, N);
1120 }
1121
1122 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1123 assert(!hasFPModifiers());
1124 addRegWithInputModsOperands(Inst, N);
1125 }
1126
1127 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1128 // clang-format off
1129 switch (Type) {
1130 case ImmTyNone: OS << "None"; break;
1131 case ImmTyGDS: OS << "GDS"; break;
1132 case ImmTyLDS: OS << "LDS"; break;
1133 case ImmTyOffen: OS << "Offen"; break;
1134 case ImmTyIdxen: OS << "Idxen"; break;
1135 case ImmTyAddr64: OS << "Addr64"; break;
1136 case ImmTyOffset: OS << "Offset"; break;
1137 case ImmTyInstOffset: OS << "InstOffset"; break;
1138 case ImmTyOffset0: OS << "Offset0"; break;
1139 case ImmTyOffset1: OS << "Offset1"; break;
1140 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1141 case ImmTyCPol: OS << "CPol"; break;
1142 case ImmTyIndexKey8bit: OS << "index_key"; break;
1143 case ImmTyIndexKey16bit: OS << "index_key"; break;
1144 case ImmTyIndexKey32bit: OS << "index_key"; break;
1145 case ImmTyTFE: OS << "TFE"; break;
1146 case ImmTyIsAsync: OS << "IsAsync"; break;
1147 case ImmTyD16: OS << "D16"; break;
1148 case ImmTyFORMAT: OS << "FORMAT"; break;
1149 case ImmTyClamp: OS << "Clamp"; break;
1150 case ImmTyOModSI: OS << "OModSI"; break;
1151 case ImmTyDPP8: OS << "DPP8"; break;
1152 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1153 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1154 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1155 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1156 case ImmTyDppFI: OS << "DppFI"; break;
1157 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1158 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1159 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1160 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1161 case ImmTyDMask: OS << "DMask"; break;
1162 case ImmTyDim: OS << "Dim"; break;
1163 case ImmTyUNorm: OS << "UNorm"; break;
1164 case ImmTyDA: OS << "DA"; break;
1165 case ImmTyR128A16: OS << "R128A16"; break;
1166 case ImmTyA16: OS << "A16"; break;
1167 case ImmTyLWE: OS << "LWE"; break;
1168 case ImmTyOff: OS << "Off"; break;
1169 case ImmTyExpTgt: OS << "ExpTgt"; break;
1170 case ImmTyExpCompr: OS << "ExpCompr"; break;
1171 case ImmTyExpVM: OS << "ExpVM"; break;
1172 case ImmTyDone: OS << "Done"; break;
1173 case ImmTyRowEn: OS << "RowEn"; break;
1174 case ImmTyHwreg: OS << "Hwreg"; break;
1175 case ImmTySendMsg: OS << "SendMsg"; break;
1176 case ImmTyWaitEvent: OS << "WaitEvent"; break;
1177 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1178 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1179 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1180 case ImmTyOpSel: OS << "OpSel"; break;
1181 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1182 case ImmTyNegLo: OS << "NegLo"; break;
1183 case ImmTyNegHi: OS << "NegHi"; break;
1184 case ImmTySwizzle: OS << "Swizzle"; break;
1185 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1186 case ImmTyHigh: OS << "High"; break;
1187 case ImmTyBLGP: OS << "BLGP"; break;
1188 case ImmTyCBSZ: OS << "CBSZ"; break;
1189 case ImmTyABID: OS << "ABID"; break;
1190 case ImmTyEndpgm: OS << "Endpgm"; break;
1191 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1192 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1193 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1194 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1195 case ImmTyBitOp3: OS << "BitOp3"; break;
1196 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1197 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1198 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1199 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1200 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1201 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1202 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1203 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1204 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1205 case ImmTyByteSel: OS << "ByteSel" ; break;
1206 }
1207 // clang-format on
1208 }
1209
1210 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1211 switch (Kind) {
1212 case Register:
1213 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1214 << " mods: " << Reg.Mods << '>';
1215 break;
1216 case Immediate:
1217 OS << '<' << getImm();
1218 if (getImmTy() != ImmTyNone) {
1219 OS << " type: "; printImmTy(OS, getImmTy());
1220 }
1221 OS << " mods: " << Imm.Mods << '>';
1222 break;
1223 case Token:
1224 OS << '\'' << getToken() << '\'';
1225 break;
1226 case Expression:
1227 OS << "<expr ";
1228 MAI.printExpr(OS, *Expr);
1229 OS << '>';
1230 break;
1231 }
1232 }
1233
1234 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1235 int64_t Val, SMLoc Loc,
1236 ImmTy Type = ImmTyNone,
1237 bool IsFPImm = false) {
1238 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1239 Op->Imm.Val = Val;
1240 Op->Imm.IsFPImm = IsFPImm;
1241 Op->Imm.Type = Type;
1242 Op->Imm.Mods = Modifiers();
1243 Op->StartLoc = Loc;
1244 Op->EndLoc = Loc;
1245 return Op;
1246 }
1247
1248 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1249 StringRef Str, SMLoc Loc,
1250 bool HasExplicitEncodingSize = true) {
1251 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1252 Res->Tok.Data = Str.data();
1253 Res->Tok.Length = Str.size();
1254 Res->StartLoc = Loc;
1255 Res->EndLoc = Loc;
1256 return Res;
1257 }
1258
1259 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1260 MCRegister Reg, SMLoc S, SMLoc E) {
1261 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1262 Op->Reg.RegNo = Reg;
1263 Op->Reg.Mods = Modifiers();
1264 Op->StartLoc = S;
1265 Op->EndLoc = E;
1266 return Op;
1267 }
1268
1269 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1270 const class MCExpr *Expr, SMLoc S) {
1271 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1272 Op->Expr = Expr;
1273 Op->StartLoc = S;
1274 Op->EndLoc = S;
1275 return Op;
1276 }
1277};
1278
1279raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1280 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1281 return OS;
1282}
1283
1284//===----------------------------------------------------------------------===//
1285// AsmParser
1286//===----------------------------------------------------------------------===//
1287
1288// TODO: define GET_SUBTARGET_FEATURE_NAME
1289#define GET_REGISTER_MATCHER
1290#include "AMDGPUGenAsmMatcher.inc"
1291#undef GET_REGISTER_MATCHER
1292#undef GET_SUBTARGET_FEATURE_NAME
1293
1294// Holds info related to the current kernel, e.g. count of SGPRs used.
1295// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1296// .amdgpu_hsa_kernel or at EOF.
1297class KernelScopeInfo {
1298 int SgprIndexUnusedMin = -1;
1299 int VgprIndexUnusedMin = -1;
1300 int AgprIndexUnusedMin = -1;
1301 MCContext *Ctx = nullptr;
1302 MCSubtargetInfo const *MSTI = nullptr;
1303
1304 void usesSgprAt(int i) {
1305 if (i >= SgprIndexUnusedMin) {
1306 SgprIndexUnusedMin = ++i;
1307 if (Ctx) {
1308 MCSymbol* const Sym =
1309 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1310 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1311 }
1312 }
1313 }
1314
1315 void usesVgprAt(int i) {
1316 if (i >= VgprIndexUnusedMin) {
1317 VgprIndexUnusedMin = ++i;
1318 if (Ctx) {
1319 MCSymbol* const Sym =
1320 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1321 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1322 VgprIndexUnusedMin);
1323 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1324 }
1325 }
1326 }
1327
1328 void usesAgprAt(int i) {
1329 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1330 if (!hasMAIInsts(*MSTI))
1331 return;
1332
1333 if (i >= AgprIndexUnusedMin) {
1334 AgprIndexUnusedMin = ++i;
1335 if (Ctx) {
1336 MCSymbol* const Sym =
1337 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1338 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1339
1340 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1341 MCSymbol* const vSym =
1342 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1343 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1344 VgprIndexUnusedMin);
1345 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1346 }
1347 }
1348 }
1349
1350public:
1351 KernelScopeInfo() = default;
1352
1353 void initialize(MCContext &Context) {
1354 Ctx = &Context;
1355 MSTI = Ctx->getSubtargetInfo();
1356
1357 usesSgprAt(SgprIndexUnusedMin = -1);
1358 usesVgprAt(VgprIndexUnusedMin = -1);
1359 if (hasMAIInsts(*MSTI)) {
1360 usesAgprAt(AgprIndexUnusedMin = -1);
1361 }
1362 }
1363
1364 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1365 unsigned RegWidth) {
1366 switch (RegKind) {
1367 case IS_SGPR:
1368 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1369 break;
1370 case IS_AGPR:
1371 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1372 break;
1373 case IS_VGPR:
1374 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1375 break;
1376 default:
1377 break;
1378 }
1379 }
1380};
1381
1382class AMDGPUAsmParser : public MCTargetAsmParser {
1383 MCAsmParser &Parser;
1384
1385 unsigned ForcedEncodingSize = 0;
1386 bool ForcedDPP = false;
1387 bool ForcedSDWA = false;
1388 KernelScopeInfo KernelScope;
1389 const unsigned HwMode;
1390
1391 /// @name Auto-generated Match Functions
1392 /// {
1393
1394#define GET_ASSEMBLER_HEADER
1395#include "AMDGPUGenAsmMatcher.inc"
1396
1397 /// }
1398
1399 /// Get size of register operand
1400 unsigned getRegOperandSize(const MCInstrDesc &Desc, unsigned OpNo) const {
1401 assert(OpNo < Desc.NumOperands);
1402 int16_t RCID = MII.getOpRegClassID(Desc.operands()[OpNo], HwMode);
1403 return getRegBitWidth(RCID) / 8;
1404 }
1405
1406 std::optional<AMDGPU::InfoSectionData> InfoData;
1407
1408private:
1409 void createConstantSymbol(StringRef Id, int64_t Val);
1410
1411 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1412 bool OutOfRangeError(SMRange Range);
1413 /// Calculate VGPR/SGPR blocks required for given target, reserved
1414 /// registers, and user-specified NextFreeXGPR values.
1415 ///
1416 /// \param Features [in] Target features, used for bug corrections.
1417 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1418 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1419 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1420 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1421 /// descriptor field, if valid.
1422 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1423 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1424 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1425 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1426 /// \param VGPRBlocks [out] Result VGPR block count.
1427 /// \param SGPRBlocks [out] Result SGPR block count.
1428 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1429 const MCExpr *FlatScrUsed, bool XNACKUsed,
1430 std::optional<bool> EnableWavefrontSize32,
1431 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1432 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1433 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1434 bool ParseDirectiveAMDGCNTarget();
1435 bool ParseDirectiveAMDHSACodeObjectVersion();
1436 bool ParseDirectiveAMDHSAKernel();
1437 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1438 bool ParseDirectiveAMDKernelCodeT();
1439 // TODO: Possibly make subtargetHasRegister const.
1440 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1441 bool ParseDirectiveAMDGPUHsaKernel();
1442
1443 bool ParseDirectiveISAVersion();
1444 bool ParseDirectiveHSAMetadata();
1445 bool ParseDirectivePALMetadataBegin();
1446 bool ParseDirectivePALMetadata();
1447 bool ParseDirectiveAMDGPULDS();
1448 bool ParseDirectiveAMDGPUInfo();
1449
1450 /// Common code to parse out a block of text (typically YAML) between start and
1451 /// end directives.
1452 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1453 const char *AssemblerDirectiveEnd,
1454 std::string &CollectString);
1455
1456 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1457 RegisterKind RegKind, MCRegister Reg1,
1458 RegisterKind RegKind1, SMLoc Loc);
1459 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1460 unsigned &RegNum, unsigned &RegWidth,
1461 bool RestoreOnFailure = false);
1462 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1463 unsigned &RegNum, unsigned &RegWidth,
1464 SmallVectorImpl<AsmToken> &Tokens);
1465 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1466 unsigned &RegWidth,
1467 SmallVectorImpl<AsmToken> &Tokens);
1468 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1469 unsigned &RegWidth,
1470 SmallVectorImpl<AsmToken> &Tokens);
1471 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1472 unsigned &RegWidth,
1473 SmallVectorImpl<AsmToken> &Tokens);
1474 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1475 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1476 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1477
1478 bool isRegister();
1479 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1480 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1481 void initializeGprCountSymbol(RegisterKind RegKind);
1482 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1483 unsigned RegWidth);
1484 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1485 bool IsAtomic);
1486
1487public:
1488 enum OperandMode {
1489 OperandMode_Default,
1490 OperandMode_NSA,
1491 };
1492
1493 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1494
1495 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1496 const MCInstrInfo &MII)
1497 : MCTargetAsmParser(STI, MII), Parser(_Parser),
1498 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1500
1501 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1502
1503 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1504 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1505 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1506 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1507 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1508 } else {
1509 createConstantSymbol(".option.machine_version_major", ISA.Major);
1510 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1511 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1512 }
1513 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1514 initializeGprCountSymbol(IS_VGPR);
1515 initializeGprCountSymbol(IS_SGPR);
1516 } else
1517 KernelScope.initialize(getContext());
1518
1519 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1520 createConstantSymbol(Symbol, Code);
1521
1522 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1523 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1524 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1525 }
1526
1527 bool hasMIMG_R128() const {
1528 return AMDGPU::hasMIMG_R128(getSTI());
1529 }
1530
1531 bool hasPackedD16() const {
1532 return AMDGPU::hasPackedD16(getSTI());
1533 }
1534
1535 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1536
1537 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1538
1539 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1540
1541 bool isSI() const {
1542 return AMDGPU::isSI(getSTI());
1543 }
1544
1545 bool isCI() const {
1546 return AMDGPU::isCI(getSTI());
1547 }
1548
1549 bool isVI() const {
1550 return AMDGPU::isVI(getSTI());
1551 }
1552
1553 bool isGFX9() const {
1554 return AMDGPU::isGFX9(getSTI());
1555 }
1556
1557 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1558 bool isGFX90A() const {
1559 return AMDGPU::isGFX90A(getSTI());
1560 }
1561
1562 bool isGFX940() const {
1563 return AMDGPU::isGFX940(getSTI());
1564 }
1565
1566 bool isGFX9Plus() const {
1567 return AMDGPU::isGFX9Plus(getSTI());
1568 }
1569
1570 bool isGFX10() const {
1571 return AMDGPU::isGFX10(getSTI());
1572 }
1573
1574 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1575
1576 bool isGFX11() const {
1577 return AMDGPU::isGFX11(getSTI());
1578 }
1579
1580 bool isGFX11Plus() const {
1581 return AMDGPU::isGFX11Plus(getSTI());
1582 }
1583
1584 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1585
1586 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1587
1588 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1589
1590 bool isGFX1250Plus() const { return AMDGPU::isGFX1250Plus(getSTI()); }
1591
1592 bool isGFX13() const { return AMDGPU::isGFX13(getSTI()); }
1593
1594 bool isGFX13Plus() const { return AMDGPU::isGFX13Plus(getSTI()); }
1595
1596 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1597
1598 bool isGFX10_BEncoding() const {
1599 return AMDGPU::isGFX10_BEncoding(getSTI());
1600 }
1601
1602 bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
1603
1604 bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
1605
1606 bool hasInv2PiInlineImm() const {
1607 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1608 }
1609
1610 bool has64BitLiterals() const {
1611 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1612 }
1613
1614 bool hasFlatOffsets() const {
1615 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1616 }
1617
1618 bool hasTrue16Insts() const {
1619 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1620 }
1621
1622 bool hasArchitectedFlatScratch() const {
1623 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1624 }
1625
1626 bool hasSGPR102_SGPR103() const {
1627 return !isVI() && !isGFX9();
1628 }
1629
1630 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1631
1632 bool hasIntClamp() const {
1633 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1634 }
1635
1636 bool hasPartialNSAEncoding() const {
1637 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1638 }
1639
1640 bool hasGloballyAddressableScratch() const {
1641 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1642 }
1643
1644 unsigned getNSAMaxSize(bool HasSampler = false) const {
1645 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1646 }
1647
1648 unsigned getMaxNumUserSGPRs() const {
1649 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1650 }
1651
1652 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1653
1654 AMDGPUTargetStreamer &getTargetStreamer() {
1655 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1656 return static_cast<AMDGPUTargetStreamer &>(TS);
1657 }
1658
1659 MCContext &getContext() const {
1660 // We need this const_cast because for some reason getContext() is not const
1661 // in MCAsmParser.
1662 return const_cast<AMDGPUAsmParser *>(this)->MCTargetAsmParser::getContext();
1663 }
1664
1665 const MCRegisterInfo *getMRI() const {
1666 return getContext().getRegisterInfo();
1667 }
1668
1669 const MCInstrInfo *getMII() const {
1670 return &MII;
1671 }
1672
1673 // FIXME: This should not be used. Instead, should use queries derived from
1674 // getAvailableFeatures().
1675 const FeatureBitset &getFeatureBits() const {
1676 return getSTI().getFeatureBits();
1677 }
1678
1679 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1680 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1681 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1682
1683 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1684 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1685 bool isForcedDPP() const { return ForcedDPP; }
1686 bool isForcedSDWA() const { return ForcedSDWA; }
1687 ArrayRef<unsigned> getMatchedVariants() const;
1688 StringRef getMatchedVariantName() const;
1689
1690 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1691 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1692 bool RestoreOnFailure);
1693 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1694 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1695 SMLoc &EndLoc) override;
1696 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1697 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1698 unsigned Kind) override;
1699 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1700 OperandVector &Operands, MCStreamer &Out,
1701 uint64_t &ErrorInfo,
1702 bool MatchingInlineAsm) override;
1703 bool ParseDirective(AsmToken DirectiveID) override;
1704 void onEndOfFile() override;
1705 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1706 OperandMode Mode = OperandMode_Default);
1707 StringRef parseMnemonicSuffix(StringRef Name);
1708 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1709 SMLoc NameLoc, OperandVector &Operands) override;
1710 //bool ProcessInstruction(MCInst &Inst);
1711
1712 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1713
1714 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1715
1716 ParseStatus
1717 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1718 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1719 std::function<bool(int64_t &)> ConvertResult = nullptr);
1720
1721 ParseStatus parseOperandArrayWithPrefix(
1722 const char *Prefix, OperandVector &Operands,
1723 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1724 bool (*ConvertResult)(int64_t &) = nullptr);
1725
1726 ParseStatus
1727 parseNamedBit(StringRef Name, OperandVector &Operands,
1728 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1729 bool IgnoreNegative = false);
1730 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1731 ParseStatus parseCPol(OperandVector &Operands);
1732 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1733 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1734 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1735 SMLoc &StringLoc);
1736 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1737 StringRef Name,
1738 ArrayRef<const char *> Ids,
1739 int64_t &IntVal);
1740 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1741 StringRef Name,
1742 ArrayRef<const char *> Ids,
1743 AMDGPUOperand::ImmTy Type);
1744
1745 bool isModifier();
1746 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1747 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1748 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1749 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1750 bool parseSP3NegModifier();
1751 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1752 LitModifier Lit = LitModifier::None);
1753 ParseStatus parseReg(OperandVector &Operands);
1754 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1755 LitModifier Lit = LitModifier::None);
1756 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1757 bool AllowImm = true);
1758 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1759 bool AllowImm = true);
1760 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1761 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1762 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1763 ParseStatus tryParseIndexKey(OperandVector &Operands,
1764 AMDGPUOperand::ImmTy ImmTy);
1765 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1766 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1767 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1768 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1769 AMDGPUOperand::ImmTy Type);
1770 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1771 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1772 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1773 AMDGPUOperand::ImmTy Type);
1774 ParseStatus parseMatrixAScale(OperandVector &Operands);
1775 ParseStatus parseMatrixBScale(OperandVector &Operands);
1776 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1777 AMDGPUOperand::ImmTy Type);
1778 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1779 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1780
1781 ParseStatus parseDfmtNfmt(int64_t &Format);
1782 ParseStatus parseUfmt(int64_t &Format);
1783 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1784 int64_t &Format);
1785 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1786 int64_t &Format);
1787 ParseStatus parseFORMAT(OperandVector &Operands);
1788 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1789 ParseStatus parseNumericFormat(int64_t &Format);
1790 ParseStatus parseFlatOffset(OperandVector &Operands);
1791 ParseStatus parseR128A16(OperandVector &Operands);
1792 ParseStatus parseBLGP(OperandVector &Operands);
1793 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1794 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1795
1796 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1797
1798 bool parseCnt(int64_t &IntVal);
1799 ParseStatus parseSWaitCnt(OperandVector &Operands);
1800
1801 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1802 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1803 ParseStatus parseDepCtr(OperandVector &Operands);
1804
1805 bool parseDelay(int64_t &Delay);
1806 ParseStatus parseSDelayALU(OperandVector &Operands);
1807
1808 ParseStatus parseHwreg(OperandVector &Operands);
1809
1810private:
1811 struct OperandInfoTy {
1812 SMLoc Loc;
1813 int64_t Val;
1814 bool IsSymbolic = false;
1815 bool IsDefined = false;
1816
1817 constexpr OperandInfoTy(int64_t Val) : Val(Val) {}
1818 };
1819
1820 struct StructuredOpField : OperandInfoTy {
1821 StringLiteral Id;
1822 StringLiteral Desc;
1823 unsigned Width;
1824 bool IsDefined = false;
1825
1826 constexpr StructuredOpField(StringLiteral Id, StringLiteral Desc,
1827 unsigned Width, int64_t Default)
1828 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1829 virtual ~StructuredOpField() = default;
1830
1831 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1832 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1833 return false;
1834 }
1835
1836 virtual bool validate(AMDGPUAsmParser &Parser) const {
1837 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1838 return Error(Parser, "not supported on this GPU");
1839 if (!isUIntN(Width, Val))
1840 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1841 return true;
1842 }
1843 };
1844
1845 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1846 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1847
1848 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1849 bool validateSendMsg(const OperandInfoTy &Msg,
1850 const OperandInfoTy &Op,
1851 const OperandInfoTy &Stream);
1852
1853 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1854 OperandInfoTy &Width);
1855
1856 const AMDGPUOperand &findMCOperand(const OperandVector &Operands,
1857 int MCOpIdx) const;
1858
1859 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1860
1861 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1862 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1863 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1864
1865 SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1866 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1867 const OperandVector &Operands) const;
1868 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1869 const OperandVector &Operands) const;
1870 SMLoc getInstLoc(const OperandVector &Operands) const;
1871
1872 bool validateInstruction(const MCInst &Inst, SMLoc IDLoc,
1873 const OperandVector &Operands);
1874 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1875 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1876 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1877 bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1878 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1879 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1880 bool AsVOPD3);
1881 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1882 bool tryVOPD(const MCInst &Inst);
1883 bool tryVOPD3(const MCInst &Inst);
1884 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1885
1886 bool validateIntClampSupported(const MCInst &Inst);
1887 bool validateMIMGAtomicDMask(const MCInst &Inst);
1888 bool validateMIMGGatherDMask(const MCInst &Inst);
1889 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1890 bool validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc);
1891 bool validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc);
1892 bool validateMIMGD16(const MCInst &Inst);
1893 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1894 bool validateTensorR128(const MCInst &Inst);
1895 bool validateMIMGMSAA(const MCInst &Inst);
1896 bool validateOpSel(const MCInst &Inst);
1897 bool validateTrue16OpSel(const MCInst &Inst);
1898 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1899 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1900 bool validateVccOperand(MCRegister Reg) const;
1901 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1902 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1903 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1904 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1905 bool validateAGPRLdSt(const MCInst &Inst) const;
1906 bool validateVGPRAlign(const MCInst &Inst) const;
1907 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1908 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1909 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1910 bool validateDivScale(const MCInst &Inst);
1911 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1912 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1913 SMLoc IDLoc);
1914 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1915 const unsigned CPol);
1916 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1917 bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1918 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1919 unsigned getConstantBusLimit(unsigned Opcode) const;
1920 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1921 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1922 MCRegister findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1923
1924 bool isSupportedMnemo(StringRef Mnemo,
1925 const FeatureBitset &FBS);
1926 bool isSupportedMnemo(StringRef Mnemo,
1927 const FeatureBitset &FBS,
1928 ArrayRef<unsigned> Variants);
1929 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1930
1931 bool isId(const StringRef Id) const;
1932 bool isId(const AsmToken &Token, const StringRef Id) const;
1933 bool isToken(const AsmToken::TokenKind Kind) const;
1934 StringRef getId() const;
1935 bool trySkipId(const StringRef Id);
1936 bool trySkipId(const StringRef Pref, const StringRef Id);
1937 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1938 bool trySkipToken(const AsmToken::TokenKind Kind);
1939 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1940 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1941 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1942
1943 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1944 AsmToken::TokenKind getTokenKind() const;
1945 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1946 bool parseExpr(OperandVector &Operands);
1947 StringRef getTokenStr() const;
1948 AsmToken peekToken(bool ShouldSkipSpace = true);
1949 AsmToken getToken() const;
1950 SMLoc getLoc() const;
1951 void lex();
1952
1953public:
1954 void onBeginOfFile() override;
1955 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1956
1957 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1958
1959 ParseStatus parseExpTgt(OperandVector &Operands);
1960 ParseStatus parseSendMsg(OperandVector &Operands);
1961 ParseStatus parseWaitEvent(OperandVector &Operands);
1962 ParseStatus parseInterpSlot(OperandVector &Operands);
1963 ParseStatus parseInterpAttr(OperandVector &Operands);
1964 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1965 ParseStatus parseBoolReg(OperandVector &Operands);
1966
1967 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1968 const unsigned MaxVal, const Twine &ErrMsg,
1969 SMLoc &Loc);
1970 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1971 const unsigned MinVal,
1972 const unsigned MaxVal,
1973 const StringRef ErrMsg);
1974 ParseStatus parseSwizzle(OperandVector &Operands);
1975 bool parseSwizzleOffset(int64_t &Imm);
1976 bool parseSwizzleMacro(int64_t &Imm);
1977 bool parseSwizzleQuadPerm(int64_t &Imm);
1978 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1979 bool parseSwizzleBroadcast(int64_t &Imm);
1980 bool parseSwizzleSwap(int64_t &Imm);
1981 bool parseSwizzleReverse(int64_t &Imm);
1982 bool parseSwizzleFFT(int64_t &Imm);
1983 bool parseSwizzleRotate(int64_t &Imm);
1984
1985 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1986 int64_t parseGPRIdxMacro();
1987
1988 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1989 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1990
1991 ParseStatus parseOModSI(OperandVector &Operands);
1992
1993 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1994 OptionalImmIndexMap &OptionalIdx);
1995 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1996 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1997 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1998 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1999 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
2000
2001 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
2002 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
2003 OptionalImmIndexMap &OptionalIdx);
2004 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
2005 OptionalImmIndexMap &OptionalIdx);
2006
2007 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
2008 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
2009 void cvtOpSelHelper(MCInst &Inst, unsigned OpSel);
2010
2011 bool parseDimId(unsigned &Encoding);
2012 ParseStatus parseDim(OperandVector &Operands);
2013 bool convertDppBoundCtrl(int64_t &BoundCtrl);
2014 ParseStatus parseDPP8(OperandVector &Operands);
2015 ParseStatus parseDPPCtrl(OperandVector &Operands);
2016 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
2017 int64_t parseDPPCtrlSel(StringRef Ctrl);
2018 int64_t parseDPPCtrlPerm();
2019 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
2020 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
2021 cvtDPP(Inst, Operands, true);
2022 }
2023 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
2024 bool IsDPP8 = false);
2025 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
2026 cvtVOP3DPP(Inst, Operands, true);
2027 }
2028
2029 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
2030 AMDGPUOperand::ImmTy Type);
2031 ParseStatus parseSDWADstUnused(OperandVector &Operands);
2032 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
2033 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
2034 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
2035 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
2036 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
2037
2038 enum class SDWAInstType : unsigned { VOP1 = 0, VOP2 = 1, VOPC = 2 };
2039
2040 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
2041 SDWAInstType BasicInstType, bool SkipDstVcc = false,
2042 bool SkipSrcVcc = false);
2043
2044 ParseStatus parseEndpgm(OperandVector &Operands);
2045
2046 ParseStatus parseVOPD(OperandVector &Operands);
2047};
2048
2049} // end anonymous namespace
2050
2051// May be called with integer type with equivalent bitwidth.
2052static const fltSemantics *getFltSemantics(unsigned Size) {
2053 switch (Size) {
2054 case 4:
2055 return &APFloat::IEEEsingle();
2056 case 8:
2057 return &APFloat::IEEEdouble();
2058 case 2:
2059 return &APFloat::IEEEhalf();
2060 default:
2061 llvm_unreachable("unsupported fp type");
2062 }
2063}
2064
2066 return getFltSemantics(VT.getScalarSizeInBits() / 8);
2067}
2068
2070 switch (OperandType) {
2071 // When floating-point immediate is used as operand of type i16, the 32-bit
2072 // representation of the constant truncated to the 16 LSBs should be used.
2087 return &APFloat::IEEEsingle();
2096 return &APFloat::IEEEdouble();
2104 return &APFloat::IEEEhalf();
2109 return &APFloat::BFloat();
2110 default:
2111 llvm_unreachable("unsupported fp type");
2112 }
2113}
2114
2115//===----------------------------------------------------------------------===//
2116// Operand
2117//===----------------------------------------------------------------------===//
2118
2119static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2120 bool Lost;
2121
2122 // Convert literal to single precision
2125 &Lost);
2126 // We allow precision lost but not overflow or underflow
2127 if (Status != APFloat::opOK &&
2128 Lost &&
2129 ((Status & APFloat::opOverflow) != 0 ||
2130 (Status & APFloat::opUnderflow) != 0)) {
2131 return false;
2132 }
2133
2134 return true;
2135}
2136
2137static bool isSafeTruncation(int64_t Val, unsigned Size) {
2138 return isUIntN(Size, Val) || isIntN(Size, Val);
2139}
2140
2141static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2142 if (VT.getScalarType() == MVT::i16)
2143 return isInlinableLiteral32(Val, HasInv2Pi);
2144
2145 if (VT.getScalarType() == MVT::f16)
2146 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2147
2148 assert(VT.getScalarType() == MVT::bf16);
2149
2150 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2151}
2152
2153bool AMDGPUOperand::isInlinableImm(MVT type) const {
2154
2155 // This is a hack to enable named inline values like
2156 // shared_base with both 32-bit and 64-bit operands.
2157 // Note that these values are defined as
2158 // 32-bit operands only.
2159 if (isInlineValue()) {
2160 return true;
2161 }
2162
2163 if (!isImmTy(ImmTyNone)) {
2164 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2165 return false;
2166 }
2167
2168 if (getModifiers().Lit != LitModifier::None)
2169 return false;
2170
2171 // TODO: We should avoid using host float here. It would be better to
2172 // check the float bit values which is what a few other places do.
2173 // We've had bot failures before due to weird NaN support on mips hosts.
2174
2175 APInt Literal(64, Imm.Val);
2176
2177 if (Imm.IsFPImm) { // We got fp literal token
2178 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2180 AsmParser->hasInv2PiInlineImm());
2181 }
2182
2183 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2184 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2185 return false;
2186
2187 if (type.getScalarSizeInBits() == 16) {
2188 bool Lost = false;
2189 switch (type.getScalarType().SimpleTy) {
2190 default:
2191 llvm_unreachable("unknown 16-bit type");
2192 case MVT::bf16:
2193 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2194 &Lost);
2195 break;
2196 case MVT::f16:
2197 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2198 &Lost);
2199 break;
2200 case MVT::i16:
2201 FPLiteral.convert(APFloatBase::IEEEsingle(),
2202 APFloat::rmNearestTiesToEven, &Lost);
2203 break;
2204 }
2205 // We need to use 32-bit representation here because when a floating-point
2206 // inline constant is used as an i16 operand, its 32-bit representation
2207 // representation will be used. We will need the 32-bit value to check if
2208 // it is FP inline constant.
2209 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2210 return isInlineableLiteralOp16(ImmVal, type,
2211 AsmParser->hasInv2PiInlineImm());
2212 }
2213
2214 // Check if single precision literal is inlinable
2216 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2217 AsmParser->hasInv2PiInlineImm());
2218 }
2219
2220 // We got int literal token.
2221 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2223 AsmParser->hasInv2PiInlineImm());
2224 }
2225
2226 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2227 return false;
2228 }
2229
2230 if (type.getScalarSizeInBits() == 16) {
2232 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2233 type, AsmParser->hasInv2PiInlineImm());
2234 }
2235
2237 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2238 AsmParser->hasInv2PiInlineImm());
2239}
2240
2241bool AMDGPUOperand::isLiteralImm(MVT type) const {
2242 // Check that this immediate can be added as literal
2243 if (!isImmTy(ImmTyNone)) {
2244 return false;
2245 }
2246
2247 bool Allow64Bit =
2248 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2249
2250 if (!Imm.IsFPImm) {
2251 // We got int literal token.
2252
2253 if (type == MVT::f64 && hasFPModifiers()) {
2254 // Cannot apply fp modifiers to int literals preserving the same semantics
2255 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2256 // disable these cases.
2257 return false;
2258 }
2259
2260 unsigned Size = type.getSizeInBits();
2261 if (Size == 64) {
2262 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2263 return true;
2264 Size = 32;
2265 }
2266
2267 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2268 // types.
2269 return isSafeTruncation(Imm.Val, Size);
2270 }
2271
2272 // We got fp literal token
2273 if (type == MVT::f64) { // Expected 64-bit fp operand
2274 // We would set low 64-bits of literal to zeroes but we accept this literals
2275 return true;
2276 }
2277
2278 if (type == MVT::i64) { // Expected 64-bit int operand
2279 // We don't allow fp literals in 64-bit integer instructions. It is
2280 // unclear how we should encode them.
2281 return false;
2282 }
2283
2284 // We allow fp literals with f16x2 operands assuming that the specified
2285 // literal goes into the lower half and the upper half is zero. We also
2286 // require that the literal may be losslessly converted to f16.
2287 //
2288 // For i16x2 operands, we assume that the specified literal is encoded as a
2289 // single-precision float. This is pretty odd, but it matches SP3 and what
2290 // happens in hardware.
2291 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2292 : (type == MVT::v2i16) ? MVT::f32
2293 : (type == MVT::v2f32) ? MVT::f32
2294 : type;
2295
2296 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2297 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2298}
2299
2300bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2301 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2302}
2303
2304bool AMDGPUOperand::isVRegWithInputMods() const {
2305 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2306 // GFX90A allows DPP on 64-bit operands.
2307 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2308 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2309}
2310
2311template <bool IsFake16>
2312bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2313 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2314 : AMDGPU::VGPR_16_Lo128RegClassID);
2315}
2316
2317template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2318 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2319 : AMDGPU::VGPR_16RegClassID);
2320}
2321
2322bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2323 if (AsmParser->isVI())
2324 return isVReg32();
2325 if (AsmParser->isGFX9Plus())
2326 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2327 return false;
2328}
2329
2330bool AMDGPUOperand::isSDWAFP16Operand() const {
2331 return isSDWAOperand(MVT::f16);
2332}
2333
2334bool AMDGPUOperand::isSDWAFP32Operand() const {
2335 return isSDWAOperand(MVT::f32);
2336}
2337
2338bool AMDGPUOperand::isSDWAInt16Operand() const {
2339 return isSDWAOperand(MVT::i16);
2340}
2341
2342bool AMDGPUOperand::isSDWAInt32Operand() const {
2343 return isSDWAOperand(MVT::i32);
2344}
2345
2346bool AMDGPUOperand::isBoolReg() const {
2347 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2348 (AsmParser->isWave32() && isSCSrc_b32()));
2349}
2350
2351uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2352{
2353 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2354 assert(Size == 2 || Size == 4 || Size == 8);
2355
2356 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2357
2358 if (Imm.Mods.Abs) {
2359 Val &= ~FpSignMask;
2360 }
2361 if (Imm.Mods.Neg) {
2362 Val ^= FpSignMask;
2363 }
2364
2365 return Val;
2366}
2367
2368void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2369 MCOpIdx = Inst.getNumOperands();
2370
2371 if (isExpr()) {
2373 return;
2374 }
2375
2376 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2377 Inst.getNumOperands())) {
2378 addLiteralImmOperand(Inst, Imm.Val,
2379 ApplyModifiers &
2380 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2381 } else {
2382 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2384 }
2385}
2386
2387void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2388 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2389 auto OpNum = Inst.getNumOperands();
2390 // Check that this operand accepts literals
2391 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2392
2393 if (ApplyModifiers) {
2394 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2395 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2396 Val = applyInputFPModifiers(Val, Size);
2397 }
2398
2399 APInt Literal(64, Val);
2400 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2401
2402 bool CanUse64BitLiterals =
2403 AsmParser->has64BitLiterals() &&
2404 !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
2405 LitModifier Lit = getModifiers().Lit;
2406 MCContext &Ctx = AsmParser->getContext();
2407
2408 if (Imm.IsFPImm) { // We got fp literal token
2409 switch (OpTy) {
2417 if (Lit == LitModifier::None &&
2419 AsmParser->hasInv2PiInlineImm())) {
2420 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2421 return;
2422 }
2423
2424 // Non-inlineable
2425 if (AMDGPU::isSISrcFPOperand(InstDesc,
2426 OpNum)) { // Expected 64-bit fp operand
2427 bool HasMandatoryLiteral =
2428 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2429 // For fp operands we check if low 32 bits are zeros
2430 if (Literal.getLoBits(32) != 0 &&
2431 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2432 !HasMandatoryLiteral) {
2433 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2434 Inst.getLoc(),
2435 "Can't encode literal as exact 64-bit floating-point operand. "
2436 "Low 32-bits will be set to zero");
2437 Val &= 0xffffffff00000000u;
2438 }
2439
2440 if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 ||
2443 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2444 (isInt<32>(Val) || isUInt<32>(Val))) {
2445 // The floating-point operand will be verbalized as an
2446 // integer one. If that integer happens to fit 32 bits, on
2447 // re-assembling it will be intepreted as the high half of
2448 // the actual value, so we have to wrap it into lit64().
2449 Lit = LitModifier::Lit64;
2450 } else if (Lit == LitModifier::Lit) {
2451 // For FP64 operands lit() specifies the high half of the value.
2452 Val = Hi_32(Val);
2453 }
2454 }
2455 break;
2456 }
2457
2458 // We don't allow fp literals in 64-bit integer instructions. It is
2459 // unclear how we should encode them. This case should be checked earlier
2460 // in predicate methods (isLiteralImm())
2461 llvm_unreachable("fp literal in 64-bit integer instruction.");
2462
2464 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2465 (isInt<32>(Val) || isUInt<32>(Val)))
2466 Lit = LitModifier::Lit64;
2467 break;
2468
2473 if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2474 Literal == 0x3fc45f306725feed) {
2475 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2476 // loss of precision. The constant represents ideomatic fp32 value of
2477 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2478 // bits. Prevent rounding below.
2479 Inst.addOperand(MCOperand::createImm(0x3e22));
2480 return;
2481 }
2482 [[fallthrough]];
2483
2505 bool lost;
2506 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2507 // Convert literal to single precision
2508 FPLiteral.convert(*getOpFltSemantics(OpTy),
2509 APFloat::rmNearestTiesToEven, &lost);
2510 // We allow precision lost but not overflow or underflow. This should be
2511 // checked earlier in isLiteralImm()
2512
2513 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2514 break;
2515 }
2516 default:
2517 llvm_unreachable("invalid operand size");
2518 }
2519
2520 if (Lit != LitModifier::None) {
2521 Inst.addOperand(
2523 } else {
2525 }
2526 return;
2527 }
2528
2529 // We got int literal token.
2530 // Only sign extend inline immediates.
2531 switch (OpTy) {
2546 break;
2547
2551 if (Lit == LitModifier::None &&
2552 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2554 return;
2555 }
2556
2557 // When the 32 MSBs are not zero (effectively means it can't be safely
2558 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2559 // the lit modifier is explicitly used, we need to truncate it to the 32
2560 // LSBs.
2561 if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit)
2562 Val = Lo_32(Val);
2563 break;
2564
2569 if (Lit == LitModifier::None &&
2570 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2572 return;
2573 }
2574
2575 // If the target doesn't support 64-bit literals, we need to use the
2576 // constant as the high 32 MSBs of a double-precision floating point value.
2577 if (!AsmParser->has64BitLiterals()) {
2578 Val = static_cast<uint64_t>(Val) << 32;
2579 } else {
2580 // Now the target does support 64-bit literals, there are two cases
2581 // where we still want to use src_literal encoding:
2582 // 1) explicitly forced by using lit modifier;
2583 // 2) the value is a valid 32-bit representation (signed or unsigned),
2584 // meanwhile not forced by lit64 modifier.
2585 if (Lit == LitModifier::Lit ||
2586 (Lit != LitModifier::Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2587 Val = static_cast<uint64_t>(Val) << 32;
2588 }
2589
2590 // For FP64 operands lit() specifies the high half of the value.
2591 if (Lit == LitModifier::Lit)
2592 Val = Hi_32(Val);
2593 break;
2594
2606 break;
2607
2609 if ((isInt<32>(Val) || isUInt<32>(Val)) && Lit != LitModifier::Lit64)
2610 Val <<= 32;
2611 break;
2612
2613 default:
2614 llvm_unreachable("invalid operand type");
2615 }
2616
2617 if (Lit != LitModifier::None) {
2618 Inst.addOperand(
2620 } else {
2622 }
2623}
2624
2625void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2626 MCOpIdx = Inst.getNumOperands();
2627 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2628}
2629
2630bool AMDGPUOperand::isInlineValue() const {
2631 return isRegKind() && ::isInlineValue(getReg());
2632}
2633
2634//===----------------------------------------------------------------------===//
2635// AsmParser
2636//===----------------------------------------------------------------------===//
2637
2638void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2639 // TODO: make those pre-defined variables read-only.
2640 // Currently there is none suitable machinery in the core llvm-mc for this.
2641 // MCSymbol::isRedefinable is intended for another purpose, and
2642 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2643 MCContext &Ctx = getContext();
2644 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2646}
2647
2648static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2649 if (Is == IS_VGPR) {
2650 switch (RegWidth) {
2651 default: return -1;
2652 case 32:
2653 return AMDGPU::VGPR_32RegClassID;
2654 case 64:
2655 return AMDGPU::VReg_64RegClassID;
2656 case 96:
2657 return AMDGPU::VReg_96RegClassID;
2658 case 128:
2659 return AMDGPU::VReg_128RegClassID;
2660 case 160:
2661 return AMDGPU::VReg_160RegClassID;
2662 case 192:
2663 return AMDGPU::VReg_192RegClassID;
2664 case 224:
2665 return AMDGPU::VReg_224RegClassID;
2666 case 256:
2667 return AMDGPU::VReg_256RegClassID;
2668 case 288:
2669 return AMDGPU::VReg_288RegClassID;
2670 case 320:
2671 return AMDGPU::VReg_320RegClassID;
2672 case 352:
2673 return AMDGPU::VReg_352RegClassID;
2674 case 384:
2675 return AMDGPU::VReg_384RegClassID;
2676 case 512:
2677 return AMDGPU::VReg_512RegClassID;
2678 case 1024:
2679 return AMDGPU::VReg_1024RegClassID;
2680 }
2681 } else if (Is == IS_TTMP) {
2682 switch (RegWidth) {
2683 default: return -1;
2684 case 32:
2685 return AMDGPU::TTMP_32RegClassID;
2686 case 64:
2687 return AMDGPU::TTMP_64RegClassID;
2688 case 128:
2689 return AMDGPU::TTMP_128RegClassID;
2690 case 256:
2691 return AMDGPU::TTMP_256RegClassID;
2692 case 512:
2693 return AMDGPU::TTMP_512RegClassID;
2694 }
2695 } else if (Is == IS_SGPR) {
2696 switch (RegWidth) {
2697 default: return -1;
2698 case 32:
2699 return AMDGPU::SGPR_32RegClassID;
2700 case 64:
2701 return AMDGPU::SGPR_64RegClassID;
2702 case 96:
2703 return AMDGPU::SGPR_96RegClassID;
2704 case 128:
2705 return AMDGPU::SGPR_128RegClassID;
2706 case 160:
2707 return AMDGPU::SGPR_160RegClassID;
2708 case 192:
2709 return AMDGPU::SGPR_192RegClassID;
2710 case 224:
2711 return AMDGPU::SGPR_224RegClassID;
2712 case 256:
2713 return AMDGPU::SGPR_256RegClassID;
2714 case 288:
2715 return AMDGPU::SGPR_288RegClassID;
2716 case 320:
2717 return AMDGPU::SGPR_320RegClassID;
2718 case 352:
2719 return AMDGPU::SGPR_352RegClassID;
2720 case 384:
2721 return AMDGPU::SGPR_384RegClassID;
2722 case 512:
2723 return AMDGPU::SGPR_512RegClassID;
2724 }
2725 } else if (Is == IS_AGPR) {
2726 switch (RegWidth) {
2727 default: return -1;
2728 case 32:
2729 return AMDGPU::AGPR_32RegClassID;
2730 case 64:
2731 return AMDGPU::AReg_64RegClassID;
2732 case 96:
2733 return AMDGPU::AReg_96RegClassID;
2734 case 128:
2735 return AMDGPU::AReg_128RegClassID;
2736 case 160:
2737 return AMDGPU::AReg_160RegClassID;
2738 case 192:
2739 return AMDGPU::AReg_192RegClassID;
2740 case 224:
2741 return AMDGPU::AReg_224RegClassID;
2742 case 256:
2743 return AMDGPU::AReg_256RegClassID;
2744 case 288:
2745 return AMDGPU::AReg_288RegClassID;
2746 case 320:
2747 return AMDGPU::AReg_320RegClassID;
2748 case 352:
2749 return AMDGPU::AReg_352RegClassID;
2750 case 384:
2751 return AMDGPU::AReg_384RegClassID;
2752 case 512:
2753 return AMDGPU::AReg_512RegClassID;
2754 case 1024:
2755 return AMDGPU::AReg_1024RegClassID;
2756 }
2757 }
2758 return -1;
2759}
2760
2763 .Case("exec", AMDGPU::EXEC)
2764 .Case("vcc", AMDGPU::VCC)
2765 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2766 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2767 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2768 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2769 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2770 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2771 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2772 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2773 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2774 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2775 .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2776 .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2777 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2778 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2779 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2780 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2781 .Case("m0", AMDGPU::M0)
2782 .Case("vccz", AMDGPU::SRC_VCCZ)
2783 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2784 .Case("execz", AMDGPU::SRC_EXECZ)
2785 .Case("src_execz", AMDGPU::SRC_EXECZ)
2786 .Case("scc", AMDGPU::SRC_SCC)
2787 .Case("src_scc", AMDGPU::SRC_SCC)
2788 .Case("tba", AMDGPU::TBA)
2789 .Case("tma", AMDGPU::TMA)
2790 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2791 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2792 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2793 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2794 .Case("vcc_lo", AMDGPU::VCC_LO)
2795 .Case("vcc_hi", AMDGPU::VCC_HI)
2796 .Case("exec_lo", AMDGPU::EXEC_LO)
2797 .Case("exec_hi", AMDGPU::EXEC_HI)
2798 .Case("tma_lo", AMDGPU::TMA_LO)
2799 .Case("tma_hi", AMDGPU::TMA_HI)
2800 .Case("tba_lo", AMDGPU::TBA_LO)
2801 .Case("tba_hi", AMDGPU::TBA_HI)
2802 .Case("pc", AMDGPU::PC_REG)
2803 .Case("null", AMDGPU::SGPR_NULL)
2804 .Default(AMDGPU::NoRegister);
2805}
2806
2807bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2808 SMLoc &EndLoc, bool RestoreOnFailure) {
2809 auto R = parseRegister();
2810 if (!R) return true;
2811 assert(R->isReg());
2812 RegNo = R->getReg();
2813 StartLoc = R->getStartLoc();
2814 EndLoc = R->getEndLoc();
2815 return false;
2816}
2817
2818bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2819 SMLoc &EndLoc) {
2820 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2821}
2822
2823ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2824 SMLoc &EndLoc) {
2825 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2826 bool PendingErrors = getParser().hasPendingError();
2827 getParser().clearPendingErrors();
2828 if (PendingErrors)
2829 return ParseStatus::Failure;
2830 if (Result)
2831 return ParseStatus::NoMatch;
2832 return ParseStatus::Success;
2833}
2834
2835bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2836 RegisterKind RegKind,
2837 MCRegister Reg1,
2838 RegisterKind RegKind1, SMLoc Loc) {
2839 // Allow VCC_LO/HI at the end of SGPR lists.
2840 if (RegKind == IS_SGPR) {
2841 unsigned RegIdx = (Reg - AMDGPU::SGPR0) + RegWidth / 32;
2842 if ((RegIdx == 106 && Reg1 == AMDGPU::VCC_LO) ||
2843 (RegIdx == 107 && Reg1 == AMDGPU::VCC_HI)) {
2844 RegWidth += 32;
2845 return true;
2846 }
2847 }
2848
2849 if (RegKind != RegKind1) {
2850 Error(Loc, "registers in a list must be of the same kind");
2851 return MCRegister();
2852 }
2853
2854 switch (RegKind) {
2855 case IS_SPECIAL:
2856 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2857 Reg = AMDGPU::EXEC;
2858 RegWidth = 64;
2859 return true;
2860 }
2861 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2862 Reg = AMDGPU::FLAT_SCR;
2863 RegWidth = 64;
2864 return true;
2865 }
2866 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2867 Reg = AMDGPU::XNACK_MASK;
2868 RegWidth = 64;
2869 return true;
2870 }
2871 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2872 Reg = AMDGPU::VCC;
2873 RegWidth = 64;
2874 return true;
2875 }
2876 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2877 Reg = AMDGPU::TBA;
2878 RegWidth = 64;
2879 return true;
2880 }
2881 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2882 Reg = AMDGPU::TMA;
2883 RegWidth = 64;
2884 return true;
2885 }
2886 Error(Loc, "register does not fit in the list");
2887 return false;
2888 case IS_VGPR:
2889 case IS_SGPR:
2890 case IS_AGPR:
2891 case IS_TTMP:
2892 if (Reg1 != Reg + RegWidth / 32) {
2893 Error(Loc, "registers in a list must have consecutive indices");
2894 return false;
2895 }
2896 RegWidth += 32;
2897 return true;
2898 default:
2899 llvm_unreachable("unexpected register kind");
2900 }
2901}
2902
2903struct RegInfo {
2905 RegisterKind Kind;
2906};
2907
2908static constexpr RegInfo RegularRegisters[] = {
2909 {{"v"}, IS_VGPR},
2910 {{"s"}, IS_SGPR},
2911 {{"ttmp"}, IS_TTMP},
2912 {{"acc"}, IS_AGPR},
2913 {{"a"}, IS_AGPR},
2914};
2915
2916static bool isRegularReg(RegisterKind Kind) {
2917 return Kind == IS_VGPR ||
2918 Kind == IS_SGPR ||
2919 Kind == IS_TTMP ||
2920 Kind == IS_AGPR;
2921}
2922
2924 for (const RegInfo &Reg : RegularRegisters)
2925 if (Str.starts_with(Reg.Name))
2926 return &Reg;
2927 return nullptr;
2928}
2929
2930static bool getRegNum(StringRef Str, unsigned& Num) {
2931 return !Str.getAsInteger(10, Num);
2932}
2933
2934bool
2935AMDGPUAsmParser::isRegister(const AsmToken &Token,
2936 const AsmToken &NextToken) const {
2937
2938 // A list of consecutive registers: [s0,s1,s2,s3]
2939 if (Token.is(AsmToken::LBrac))
2940 return true;
2941
2942 if (!Token.is(AsmToken::Identifier))
2943 return false;
2944
2945 // A single register like s0 or a range of registers like s[0:1]
2946
2947 StringRef Str = Token.getString();
2948 const RegInfo *Reg = getRegularRegInfo(Str);
2949 if (Reg) {
2950 StringRef RegName = Reg->Name;
2951 StringRef RegSuffix = Str.substr(RegName.size());
2952 if (!RegSuffix.empty()) {
2953 RegSuffix.consume_back(".l");
2954 RegSuffix.consume_back(".h");
2955 unsigned Num;
2956 // A single register with an index: rXX
2957 if (getRegNum(RegSuffix, Num))
2958 return true;
2959 } else {
2960 // A range of registers: r[XX:YY].
2961 if (NextToken.is(AsmToken::LBrac))
2962 return true;
2963 }
2964 }
2965
2966 return getSpecialRegForName(Str).isValid();
2967}
2968
2969bool
2970AMDGPUAsmParser::isRegister()
2971{
2972 return isRegister(getToken(), peekToken());
2973}
2974
2975MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2976 unsigned SubReg, unsigned RegWidth,
2977 SMLoc Loc) {
2978 assert(isRegularReg(RegKind));
2979
2980 unsigned AlignSize = 1;
2981 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2982 // SGPR and TTMP registers must be aligned.
2983 // Max required alignment is 4 dwords.
2984 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2985 }
2986
2987 if (RegNum % AlignSize != 0) {
2988 Error(Loc, "invalid register alignment");
2989 return MCRegister();
2990 }
2991
2992 unsigned RegIdx = RegNum / AlignSize;
2993 int RCID = getRegClass(RegKind, RegWidth);
2994 if (RCID == -1) {
2995 Error(Loc, "invalid or unsupported register size");
2996 return MCRegister();
2997 }
2998
2999 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3000 const MCRegisterClass &RC = TRI->getRegClass(RCID);
3001 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
3002 Error(Loc, "register index is out of range");
3003 return AMDGPU::NoRegister;
3004 }
3005
3006 if (RegKind == IS_VGPR && !isGFX1250Plus() && RegIdx + RegWidth / 32 > 256) {
3007 Error(Loc, "register index is out of range");
3008 return MCRegister();
3009 }
3010
3011 MCRegister Reg = RC.getRegister(RegIdx);
3012
3013 if (SubReg) {
3014 Reg = TRI->getSubReg(Reg, SubReg);
3015
3016 // Currently all regular registers have their .l and .h subregisters, so
3017 // we should never need to generate an error here.
3018 assert(Reg && "Invalid subregister!");
3019 }
3020
3021 return Reg;
3022}
3023
3024bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
3025 unsigned &SubReg) {
3026 int64_t RegLo, RegHi;
3027 if (!skipToken(AsmToken::LBrac, "missing register index"))
3028 return false;
3029
3030 SMLoc FirstIdxLoc = getLoc();
3031 SMLoc SecondIdxLoc;
3032
3033 if (!parseExpr(RegLo))
3034 return false;
3035
3036 if (trySkipToken(AsmToken::Colon)) {
3037 SecondIdxLoc = getLoc();
3038 if (!parseExpr(RegHi))
3039 return false;
3040 } else {
3041 RegHi = RegLo;
3042 }
3043
3044 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
3045 return false;
3046
3047 if (!isUInt<32>(RegLo)) {
3048 Error(FirstIdxLoc, "invalid register index");
3049 return false;
3050 }
3051
3052 if (!isUInt<32>(RegHi)) {
3053 Error(SecondIdxLoc, "invalid register index");
3054 return false;
3055 }
3056
3057 if (RegLo > RegHi) {
3058 Error(FirstIdxLoc, "first register index should not exceed second index");
3059 return false;
3060 }
3061
3062 if (RegHi == RegLo) {
3063 StringRef RegSuffix = getTokenStr();
3064 if (RegSuffix == ".l") {
3065 SubReg = AMDGPU::lo16;
3066 lex();
3067 } else if (RegSuffix == ".h") {
3068 SubReg = AMDGPU::hi16;
3069 lex();
3070 }
3071 }
3072
3073 Num = static_cast<unsigned>(RegLo);
3074 RegWidth = 32 * ((RegHi - RegLo) + 1);
3075
3076 return true;
3077}
3078
3079MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3080 unsigned &RegNum,
3081 unsigned &RegWidth,
3082 SmallVectorImpl<AsmToken> &Tokens) {
3083 assert(isToken(AsmToken::Identifier));
3084 MCRegister Reg = getSpecialRegForName(getTokenStr());
3085 if (Reg) {
3086 RegNum = 0;
3087 RegWidth = 32;
3088 RegKind = IS_SPECIAL;
3089 Tokens.push_back(getToken());
3090 lex(); // skip register name
3091 }
3092 return Reg;
3093}
3094
3095MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3096 unsigned &RegNum,
3097 unsigned &RegWidth,
3098 SmallVectorImpl<AsmToken> &Tokens) {
3099 assert(isToken(AsmToken::Identifier));
3100 StringRef RegName = getTokenStr();
3101 auto Loc = getLoc();
3102
3103 const RegInfo *RI = getRegularRegInfo(RegName);
3104 if (!RI) {
3105 Error(Loc, "invalid register name");
3106 return MCRegister();
3107 }
3108
3109 Tokens.push_back(getToken());
3110 lex(); // skip register name
3111
3112 RegKind = RI->Kind;
3113 StringRef RegSuffix = RegName.substr(RI->Name.size());
3114 unsigned SubReg = NoSubRegister;
3115 bool IsRange = false;
3116 if (!RegSuffix.empty()) {
3117 if (RegSuffix.consume_back(".l"))
3118 SubReg = AMDGPU::lo16;
3119 else if (RegSuffix.consume_back(".h"))
3120 SubReg = AMDGPU::hi16;
3121
3122 // Single 32-bit register: vXX.
3123 if (!getRegNum(RegSuffix, RegNum)) {
3124 Error(Loc, "invalid register index");
3125 return MCRegister();
3126 }
3127 RegWidth = 32;
3128 } else {
3129 // Range of registers: v[XX:YY]. ":YY" is optional.
3130 IsRange = true;
3131 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3132 return MCRegister();
3133 }
3134
3135 // Do not allow vcc_lo/hi be referred as s106/107.
3136 MCRegister Reg = getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3137 const MCRegisterInfo &TRI = *getContext().getRegisterInfo();
3138 if (RegKind == IS_SGPR && IsRange
3139 ? (TRI.isSubRegister(Reg, VCC_LO) || TRI.isSubRegister(Reg, VCC_HI))
3140 : (Reg == VCC_LO || Reg == VCC_HI)) {
3141 Error(Loc, "register index is out of range");
3142 return MCRegister();
3143 }
3144
3145 return Reg;
3146}
3147
3148MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3149 unsigned &RegNum, unsigned &RegWidth,
3150 SmallVectorImpl<AsmToken> &Tokens) {
3151 MCRegister Reg;
3152 auto ListLoc = getLoc();
3153
3154 if (!skipToken(AsmToken::LBrac,
3155 "expected a register or a list of registers")) {
3156 return MCRegister();
3157 }
3158
3159 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3160
3161 auto Loc = getLoc();
3162 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3163 return MCRegister();
3164 if (RegWidth != 32) {
3165 Error(Loc, "expected a single 32-bit register");
3166 return MCRegister();
3167 }
3168
3169 for (; trySkipToken(AsmToken::Comma); ) {
3170 RegisterKind NextRegKind;
3171 MCRegister NextReg;
3172 unsigned NextRegNum, NextRegWidth;
3173 Loc = getLoc();
3174
3175 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3176 NextRegNum, NextRegWidth,
3177 Tokens)) {
3178 return MCRegister();
3179 }
3180 if (NextRegWidth != 32) {
3181 Error(Loc, "expected a single 32-bit register");
3182 return MCRegister();
3183 }
3184 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, NextRegKind,
3185 Loc))
3186 return MCRegister();
3187 }
3188
3189 if (!skipToken(AsmToken::RBrac,
3190 "expected a comma or a closing square bracket")) {
3191 return MCRegister();
3192 }
3193
3194 if (isRegularReg(RegKind))
3195 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3196
3197 return Reg;
3198}
3199
3200bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3201 MCRegister &Reg, unsigned &RegNum,
3202 unsigned &RegWidth,
3203 SmallVectorImpl<AsmToken> &Tokens) {
3204 auto Loc = getLoc();
3205 Reg = MCRegister();
3206
3207 if (isToken(AsmToken::Identifier)) {
3208 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3209 if (!Reg)
3210 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3211 } else {
3212 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3213 }
3214
3215 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3216 if (!Reg) {
3217 assert(Parser.hasPendingError());
3218 return false;
3219 }
3220
3221 if (!subtargetHasRegister(*TRI, Reg)) {
3222 if (Reg == AMDGPU::SGPR_NULL) {
3223 Error(Loc, "'null' operand is not supported on this GPU");
3224 } else {
3226 " register not available on this GPU");
3227 }
3228 return false;
3229 }
3230
3231 return true;
3232}
3233
3234bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3235 MCRegister &Reg, unsigned &RegNum,
3236 unsigned &RegWidth,
3237 bool RestoreOnFailure /*=false*/) {
3238 Reg = MCRegister();
3239
3241 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3242 if (RestoreOnFailure) {
3243 while (!Tokens.empty()) {
3244 getLexer().UnLex(Tokens.pop_back_val());
3245 }
3246 }
3247 return true;
3248 }
3249 return false;
3250}
3251
3252std::optional<StringRef>
3253AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3254 switch (RegKind) {
3255 case IS_VGPR:
3256 return StringRef(".amdgcn.next_free_vgpr");
3257 case IS_SGPR:
3258 return StringRef(".amdgcn.next_free_sgpr");
3259 default:
3260 return std::nullopt;
3261 }
3262}
3263
3264void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3265 auto SymbolName = getGprCountSymbolName(RegKind);
3266 assert(SymbolName && "initializing invalid register kind");
3267 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3269 Sym->setRedefinable(true);
3270}
3271
3272bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3273 unsigned DwordRegIndex,
3274 unsigned RegWidth) {
3275 // Symbols are only defined for GCN targets
3276 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3277 return true;
3278
3279 auto SymbolName = getGprCountSymbolName(RegKind);
3280 if (!SymbolName)
3281 return true;
3282 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3283
3284 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3285 int64_t OldCount;
3286
3287 if (!Sym->isVariable())
3288 return !Error(getLoc(),
3289 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3290 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3291 return !Error(
3292 getLoc(),
3293 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3294
3295 if (OldCount <= NewMax)
3297
3298 return true;
3299}
3300
3301std::unique_ptr<AMDGPUOperand>
3302AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3303 const auto &Tok = getToken();
3304 SMLoc StartLoc = Tok.getLoc();
3305 SMLoc EndLoc = Tok.getEndLoc();
3306 RegisterKind RegKind;
3307 MCRegister Reg;
3308 unsigned RegNum, RegWidth;
3309
3310 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3311 return nullptr;
3312 }
3313 if (isHsaAbi(getSTI())) {
3314 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3315 return nullptr;
3316 } else
3317 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3318 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3319}
3320
3321ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3322 bool HasSP3AbsModifier, LitModifier Lit) {
3323 // TODO: add syntactic sugar for 1/(2*PI)
3324
3325 if (isRegister() || isModifier())
3326 return ParseStatus::NoMatch;
3327
3328 if (Lit == LitModifier::None) {
3329 if (trySkipId("lit"))
3330 Lit = LitModifier::Lit;
3331 else if (trySkipId("lit64"))
3332 Lit = LitModifier::Lit64;
3333
3334 if (Lit != LitModifier::None) {
3335 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3336 return ParseStatus::Failure;
3337 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, Lit);
3338 if (S.isSuccess() &&
3339 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3340 return ParseStatus::Failure;
3341 return S;
3342 }
3343 }
3344
3345 const auto& Tok = getToken();
3346 const auto& NextTok = peekToken();
3347 bool IsReal = Tok.is(AsmToken::Real);
3348 SMLoc S = getLoc();
3349 bool Negate = false;
3350
3351 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3352 lex();
3353 IsReal = true;
3354 Negate = true;
3355 }
3356
3357 AMDGPUOperand::Modifiers Mods;
3358 Mods.Lit = Lit;
3359
3360 if (IsReal) {
3361 // Floating-point expressions are not supported.
3362 // Can only allow floating-point literals with an
3363 // optional sign.
3364
3365 StringRef Num = getTokenStr();
3366 lex();
3367
3368 APFloat RealVal(APFloat::IEEEdouble());
3369 auto roundMode = APFloat::rmNearestTiesToEven;
3370 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3371 return ParseStatus::Failure;
3372 if (Negate)
3373 RealVal.changeSign();
3374
3375 Operands.push_back(
3376 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3377 AMDGPUOperand::ImmTyNone, true));
3378 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3379 Op.setModifiers(Mods);
3380
3381 return ParseStatus::Success;
3382
3383 } else {
3384 int64_t IntVal;
3385 const MCExpr *Expr;
3386 SMLoc S = getLoc();
3387
3388 if (HasSP3AbsModifier) {
3389 // This is a workaround for handling expressions
3390 // as arguments of SP3 'abs' modifier, for example:
3391 // |1.0|
3392 // |-1|
3393 // |1+x|
3394 // This syntax is not compatible with syntax of standard
3395 // MC expressions (due to the trailing '|').
3396 SMLoc EndLoc;
3397 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3398 return ParseStatus::Failure;
3399 } else {
3400 if (Parser.parseExpression(Expr))
3401 return ParseStatus::Failure;
3402 }
3403
3404 if (Expr->evaluateAsAbsolute(IntVal)) {
3405 if (Lit == LitModifier::Lit && !isInt<32>(IntVal) && !isUInt<32>(IntVal))
3406 return Error(S, "literal value out of range");
3407 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3408 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3409 Op.setModifiers(Mods);
3410 } else {
3411 if (Lit != LitModifier::None)
3412 return ParseStatus::NoMatch;
3413 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3414 }
3415
3416 return ParseStatus::Success;
3417 }
3418
3419 return ParseStatus::NoMatch;
3420}
3421
3422ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3423 if (!isRegister())
3424 return ParseStatus::NoMatch;
3425
3426 if (auto R = parseRegister()) {
3427 assert(R->isReg());
3428 Operands.push_back(std::move(R));
3429 return ParseStatus::Success;
3430 }
3431 return ParseStatus::Failure;
3432}
3433
3434ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3435 bool HasSP3AbsMod, LitModifier Lit) {
3436 ParseStatus Res = parseReg(Operands);
3437 if (!Res.isNoMatch())
3438 return Res;
3439 if (isModifier())
3440 return ParseStatus::NoMatch;
3441 return parseImm(Operands, HasSP3AbsMod, Lit);
3442}
3443
3444bool
3445AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3446 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3447 const auto &str = Token.getString();
3448 return str == "abs" || str == "neg" || str == "sext";
3449 }
3450 return false;
3451}
3452
3453bool
3454AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3455 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3456}
3457
3458bool
3459AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3460 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3461}
3462
3463bool
3464AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3465 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3466}
3467
3468// Check if this is an operand modifier or an opcode modifier
3469// which may look like an expression but it is not. We should
3470// avoid parsing these modifiers as expressions. Currently
3471// recognized sequences are:
3472// |...|
3473// abs(...)
3474// neg(...)
3475// sext(...)
3476// -reg
3477// -|...|
3478// -abs(...)
3479// name:...
3480//
3481bool
3482AMDGPUAsmParser::isModifier() {
3483
3484 AsmToken Tok = getToken();
3485 AsmToken NextToken[2];
3486 peekTokens(NextToken);
3487
3488 return isOperandModifier(Tok, NextToken[0]) ||
3489 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3490 isOpcodeModifierWithVal(Tok, NextToken[0]);
3491}
3492
3493// Check if the current token is an SP3 'neg' modifier.
3494// Currently this modifier is allowed in the following context:
3495//
3496// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3497// 2. Before an 'abs' modifier: -abs(...)
3498// 3. Before an SP3 'abs' modifier: -|...|
3499//
3500// In all other cases "-" is handled as a part
3501// of an expression that follows the sign.
3502//
3503// Note: When "-" is followed by an integer literal,
3504// this is interpreted as integer negation rather
3505// than a floating-point NEG modifier applied to N.
3506// Beside being contr-intuitive, such use of floating-point
3507// NEG modifier would have resulted in different meaning
3508// of integer literals used with VOP1/2/C and VOP3,
3509// for example:
3510// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3511// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3512// Negative fp literals with preceding "-" are
3513// handled likewise for uniformity
3514//
3515bool
3516AMDGPUAsmParser::parseSP3NegModifier() {
3517
3518 AsmToken NextToken[2];
3519 peekTokens(NextToken);
3520
3521 if (isToken(AsmToken::Minus) &&
3522 (isRegister(NextToken[0], NextToken[1]) ||
3523 NextToken[0].is(AsmToken::Pipe) ||
3524 isId(NextToken[0], "abs"))) {
3525 lex();
3526 return true;
3527 }
3528
3529 return false;
3530}
3531
3532ParseStatus
3533AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3534 bool AllowImm) {
3535 bool Neg, SP3Neg;
3536 bool Abs, SP3Abs;
3537 SMLoc Loc;
3538
3539 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3540 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3541 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3542
3543 SP3Neg = parseSP3NegModifier();
3544
3545 Loc = getLoc();
3546 Neg = trySkipId("neg");
3547 if (Neg && SP3Neg)
3548 return Error(Loc, "expected register or immediate");
3549 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3550 return ParseStatus::Failure;
3551
3552 Abs = trySkipId("abs");
3553 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3554 return ParseStatus::Failure;
3555
3556 LitModifier Lit = LitModifier::None;
3557 if (trySkipId("lit")) {
3558 Lit = LitModifier::Lit;
3559 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3560 return ParseStatus::Failure;
3561 } else if (trySkipId("lit64")) {
3562 Lit = LitModifier::Lit64;
3563 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3564 return ParseStatus::Failure;
3565 if (!has64BitLiterals())
3566 return Error(Loc, "lit64 is not supported on this GPU");
3567 }
3568
3569 Loc = getLoc();
3570 SP3Abs = trySkipToken(AsmToken::Pipe);
3571 if (Abs && SP3Abs)
3572 return Error(Loc, "expected register or immediate");
3573
3574 ParseStatus Res;
3575 if (AllowImm) {
3576 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3577 } else {
3578 Res = parseReg(Operands);
3579 }
3580 if (!Res.isSuccess())
3581 return (SP3Neg || Neg || SP3Abs || Abs || Lit != LitModifier::None)
3583 : Res;
3584
3585 if (Lit != LitModifier::None && !Operands.back()->isImm())
3586 Error(Loc, "expected immediate with lit modifier");
3587
3588 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3589 return ParseStatus::Failure;
3590 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3591 return ParseStatus::Failure;
3592 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3593 return ParseStatus::Failure;
3594 if (Lit != LitModifier::None &&
3595 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3596 return ParseStatus::Failure;
3597
3598 AMDGPUOperand::Modifiers Mods;
3599 Mods.Abs = Abs || SP3Abs;
3600 Mods.Neg = Neg || SP3Neg;
3601 Mods.Lit = Lit;
3602
3603 if (Mods.hasFPModifiers() || Lit != LitModifier::None) {
3604 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3605 if (Op.isExpr())
3606 return Error(Op.getStartLoc(), "expected an absolute expression");
3607 Op.setModifiers(Mods);
3608 }
3609 return ParseStatus::Success;
3610}
3611
3612ParseStatus
3613AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3614 bool AllowImm) {
3615 bool Sext = trySkipId("sext");
3616 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3617 return ParseStatus::Failure;
3618
3619 ParseStatus Res;
3620 if (AllowImm) {
3621 Res = parseRegOrImm(Operands);
3622 } else {
3623 Res = parseReg(Operands);
3624 }
3625 if (!Res.isSuccess())
3626 return Sext ? ParseStatus::Failure : Res;
3627
3628 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3629 return ParseStatus::Failure;
3630
3631 AMDGPUOperand::Modifiers Mods;
3632 Mods.Sext = Sext;
3633
3634 if (Mods.hasIntModifiers()) {
3635 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3636 if (Op.isExpr())
3637 return Error(Op.getStartLoc(), "expected an absolute expression");
3638 Op.setModifiers(Mods);
3639 }
3640
3641 return ParseStatus::Success;
3642}
3643
3644ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3645 return parseRegOrImmWithFPInputMods(Operands, false);
3646}
3647
3648ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3649 return parseRegOrImmWithIntInputMods(Operands, false);
3650}
3651
3652ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3653 auto Loc = getLoc();
3654 if (trySkipId("off")) {
3655 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3656 AMDGPUOperand::ImmTyOff, false));
3657 return ParseStatus::Success;
3658 }
3659
3660 if (!isRegister())
3661 return ParseStatus::NoMatch;
3662
3663 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3664 if (Reg) {
3665 Operands.push_back(std::move(Reg));
3666 return ParseStatus::Success;
3667 }
3668
3669 return ParseStatus::Failure;
3670}
3671
3672unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3673 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3674
3675 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3676 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3677 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3678 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3679 return Match_InvalidOperand;
3680
3681 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3682 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3683 // v_mac_f32/16 allow only dst_sel == DWORD;
3684 auto OpNum =
3685 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3686 const auto &Op = Inst.getOperand(OpNum);
3687 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3688 return Match_InvalidOperand;
3689 }
3690 }
3691
3692 // Asm can first try to match VOPD or VOPD3. By failing early here with
3693 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3694 // Checking later during validateInstruction does not give a chance to retry
3695 // parsing as a different encoding.
3696 if (tryAnotherVOPDEncoding(Inst))
3697 return Match_InvalidOperand;
3698
3699 return Match_Success;
3700}
3701
3711
3712// What asm variants we should check
3713ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3714 if (isForcedDPP() && isForcedVOP3()) {
3715 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3716 return ArrayRef(Variants);
3717 }
3718 if (getForcedEncodingSize() == 32) {
3719 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3720 return ArrayRef(Variants);
3721 }
3722
3723 if (isForcedVOP3()) {
3724 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3725 return ArrayRef(Variants);
3726 }
3727
3728 if (isForcedSDWA()) {
3729 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3731 return ArrayRef(Variants);
3732 }
3733
3734 if (isForcedDPP()) {
3735 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3736 return ArrayRef(Variants);
3737 }
3738
3739 return getAllVariants();
3740}
3741
3742StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3743 if (isForcedDPP() && isForcedVOP3())
3744 return "e64_dpp";
3745
3746 if (getForcedEncodingSize() == 32)
3747 return "e32";
3748
3749 if (isForcedVOP3())
3750 return "e64";
3751
3752 if (isForcedSDWA())
3753 return "sdwa";
3754
3755 if (isForcedDPP())
3756 return "dpp";
3757
3758 return "";
3759}
3760
3761MCRegister
3762AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3763 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3764 for (MCPhysReg Reg : Desc.implicit_uses()) {
3765 switch (Reg) {
3766 case AMDGPU::FLAT_SCR:
3767 case AMDGPU::VCC:
3768 case AMDGPU::VCC_LO:
3769 case AMDGPU::VCC_HI:
3770 case AMDGPU::M0:
3771 return Reg;
3772 default:
3773 break;
3774 }
3775 }
3776 return MCRegister();
3777}
3778
3779// NB: This code is correct only when used to check constant
3780// bus limitations because GFX7 support no f16 inline constants.
3781// Note that there are no cases when a GFX7 opcode violates
3782// constant bus limitations due to the use of an f16 constant.
3783bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3784 unsigned OpIdx) const {
3785 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3786
3789 return false;
3790 }
3791
3792 const MCOperand &MO = Inst.getOperand(OpIdx);
3793
3794 int64_t Val = MO.isImm() ? MO.getImm() : getLitValue(MO.getExpr());
3795 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3796
3797 switch (OpSize) { // expected operand size
3798 case 8:
3799 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3800 case 4:
3801 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3802 case 2: {
3803 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3806 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3807
3811
3815
3818
3822
3825 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3826
3829 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3830
3832 return false;
3833
3834 llvm_unreachable("invalid operand type");
3835 }
3836 default:
3837 llvm_unreachable("invalid operand size");
3838 }
3839}
3840
3841unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3842 if (!isGFX10Plus())
3843 return 1;
3844
3845 switch (Opcode) {
3846 // 64-bit shift instructions can use only one scalar value input
3847 case AMDGPU::V_LSHLREV_B64_e64:
3848 case AMDGPU::V_LSHLREV_B64_gfx10:
3849 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3850 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3851 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3852 case AMDGPU::V_LSHRREV_B64_e64:
3853 case AMDGPU::V_LSHRREV_B64_gfx10:
3854 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3855 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3856 case AMDGPU::V_ASHRREV_I64_e64:
3857 case AMDGPU::V_ASHRREV_I64_gfx10:
3858 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3859 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3860 case AMDGPU::V_LSHL_B64_e64:
3861 case AMDGPU::V_LSHR_B64_e64:
3862 case AMDGPU::V_ASHR_I64_e64:
3863 return 1;
3864 default:
3865 return 2;
3866 }
3867}
3868
3869constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3871
3872// Get regular operand indices in the same order as specified
3873// in the instruction (but append mandatory literals to the end).
3875 bool AddMandatoryLiterals = false) {
3876
3877 int16_t ImmIdx =
3878 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3879
3880 if (isVOPD(Opcode)) {
3881 int16_t ImmXIdx =
3882 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3883
3884 return {getNamedOperandIdx(Opcode, OpName::src0X),
3885 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3886 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3887 getNamedOperandIdx(Opcode, OpName::src0Y),
3888 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3889 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3890 ImmXIdx,
3891 ImmIdx};
3892 }
3893
3894 return {getNamedOperandIdx(Opcode, OpName::src0),
3895 getNamedOperandIdx(Opcode, OpName::src1),
3896 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3897}
3898
3899bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3900 const MCOperand &MO = Inst.getOperand(OpIdx);
3901 if (MO.isImm())
3902 return !isInlineConstant(Inst, OpIdx);
3903 if (MO.isReg()) {
3904 auto Reg = MO.getReg();
3905 if (!Reg)
3906 return false;
3907 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3908 auto PReg = mc2PseudoReg(Reg);
3909 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3910 }
3911 return true;
3912}
3913
3914// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3915// Writelane is special in that it can use SGPR and M0 (which would normally
3916// count as using the constant bus twice - but in this case it is allowed since
3917// the lane selector doesn't count as a use of the constant bus). However, it is
3918// still required to abide by the 1 SGPR rule.
3919static bool checkWriteLane(const MCInst &Inst) {
3920 const unsigned Opcode = Inst.getOpcode();
3921 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3922 return false;
3923 const MCOperand &LaneSelOp = Inst.getOperand(2);
3924 if (!LaneSelOp.isReg())
3925 return false;
3926 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3927 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3928}
3929
3930bool AMDGPUAsmParser::validateConstantBusLimitations(
3931 const MCInst &Inst, const OperandVector &Operands) {
3932 const unsigned Opcode = Inst.getOpcode();
3933 const MCInstrDesc &Desc = MII.get(Opcode);
3934 MCRegister LastSGPR;
3935 unsigned ConstantBusUseCount = 0;
3936 unsigned NumLiterals = 0;
3937 unsigned LiteralSize;
3938
3939 if (!(Desc.TSFlags &
3942 !isVOPD(Opcode))
3943 return true;
3944
3945 if (checkWriteLane(Inst))
3946 return true;
3947
3948 // Check special imm operands (used by madmk, etc)
3949 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3950 ++NumLiterals;
3951 LiteralSize = 4;
3952 }
3953
3954 SmallDenseSet<MCRegister> SGPRsUsed;
3955 MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3956 if (SGPRUsed) {
3957 SGPRsUsed.insert(SGPRUsed);
3958 ++ConstantBusUseCount;
3959 }
3960
3961 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3962
3963 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3964
3965 for (int OpIdx : OpIndices) {
3966 if (OpIdx == -1)
3967 continue;
3968
3969 const MCOperand &MO = Inst.getOperand(OpIdx);
3970 if (usesConstantBus(Inst, OpIdx)) {
3971 if (MO.isReg()) {
3972 LastSGPR = mc2PseudoReg(MO.getReg());
3973 // Pairs of registers with a partial intersections like these
3974 // s0, s[0:1]
3975 // flat_scratch_lo, flat_scratch
3976 // flat_scratch_lo, flat_scratch_hi
3977 // are theoretically valid but they are disabled anyway.
3978 // Note that this code mimics SIInstrInfo::verifyInstruction
3979 if (SGPRsUsed.insert(LastSGPR).second) {
3980 ++ConstantBusUseCount;
3981 }
3982 } else { // Expression or a literal
3983
3984 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3985 continue; // special operand like VINTERP attr_chan
3986
3987 // An instruction may use only one literal.
3988 // This has been validated on the previous step.
3989 // See validateVOPLiteral.
3990 // This literal may be used as more than one operand.
3991 // If all these operands are of the same size,
3992 // this literal counts as one scalar value.
3993 // Otherwise it counts as 2 scalar values.
3994 // See "GFX10 Shader Programming", section 3.6.2.3.
3995
3997 if (Size < 4)
3998 Size = 4;
3999
4000 if (NumLiterals == 0) {
4001 NumLiterals = 1;
4002 LiteralSize = Size;
4003 } else if (LiteralSize != Size) {
4004 NumLiterals = 2;
4005 }
4006 }
4007 }
4008
4009 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
4010 Error(getOperandLoc(Operands, OpIdx),
4011 "invalid operand (violates constant bus restrictions)");
4012 return false;
4013 }
4014 }
4015 return true;
4016}
4017
4018std::optional<unsigned>
4019AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
4020
4021 const unsigned Opcode = Inst.getOpcode();
4022 if (!isVOPD(Opcode))
4023 return {};
4024
4025 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4026
4027 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
4028 const MCOperand &Opr = Inst.getOperand(OperandIdx);
4029 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
4030 ? Opr.getReg()
4031 : MCRegister();
4032 };
4033
4034 // On GFX1170+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
4035 // source-cache.
4036 bool SkipSrc =
4037 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1170 ||
4038 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
4039 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
4040 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx13 ||
4041 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250 ||
4042 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx13;
4043 bool AllowSameVGPR = isGFX12Plus();
4044
4045 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
4046 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
4047 int I = getNamedOperandIdx(Opcode, OpName);
4048 const MCOperand &Op = Inst.getOperand(I);
4049 if (!Op.isImm())
4050 continue;
4051 int64_t Imm = Op.getImm();
4052 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
4053 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
4054 return (unsigned)I;
4055 }
4056
4057 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
4058 OpName::vsrc2Y, OpName::imm}) {
4059 int I = getNamedOperandIdx(Opcode, OpName);
4060 if (I == -1)
4061 continue;
4062 const MCOperand &Op = Inst.getOperand(I);
4063 if (Op.isImm())
4064 return (unsigned)I;
4065 }
4066 }
4067
4068 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4069 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4070 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
4071
4072 return InvalidCompOprIdx;
4073}
4074
4075bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
4076 const OperandVector &Operands) {
4077
4078 unsigned Opcode = Inst.getOpcode();
4079 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
4080
4081 if (AsVOPD3) {
4082 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
4083 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
4084 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4085 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
4086 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
4087 }
4088 }
4089
4090 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4091 if (!InvalidCompOprIdx.has_value())
4092 return true;
4093
4094 auto CompOprIdx = *InvalidCompOprIdx;
4095 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4096 auto ParsedIdx =
4097 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
4098 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4099 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4100
4101 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4102 if (CompOprIdx == VOPD::Component::DST) {
4103 if (AsVOPD3)
4104 Error(Loc, "dst registers must be distinct");
4105 else
4106 Error(Loc, "one dst register must be even and the other odd");
4107 } else {
4108 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4109 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
4110 " operands must use different VGPR banks");
4111 }
4112
4113 return false;
4114}
4115
4116// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4117// potentially used as VOPD3 with the same operands.
4118bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4119 // First check if it fits VOPD
4120 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4121 if (!InvalidCompOprIdx.has_value())
4122 return false;
4123
4124 // Then if it fits VOPD3
4125 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4126 if (InvalidCompOprIdx.has_value()) {
4127 // If failed operand is dst it is better to show error about VOPD3
4128 // instruction as it has more capabilities and error message will be
4129 // more informative. If the dst is not legal for VOPD3, then it is not
4130 // legal for VOPD either.
4131 if (*InvalidCompOprIdx == VOPD::Component::DST)
4132 return true;
4133
4134 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4135 // with a conflict in tied implicit src2 of fmac and no asm operand to
4136 // to point to.
4137 return false;
4138 }
4139 return true;
4140}
4141
4142// \returns true is a VOPD3 instruction can be also represented as a shorter
4143// VOPD encoding.
4144bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4145 const unsigned Opcode = Inst.getOpcode();
4146 const auto &II = getVOPDInstInfo(Opcode, &MII);
4147 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4148 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4149 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4150 return false;
4151
4152 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4153 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4154 // be parsed as VOPD which does not accept src2.
4155 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4156 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4157 return false;
4158
4159 // If any modifiers are set this cannot be VOPD.
4160 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4161 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4162 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4163 int I = getNamedOperandIdx(Opcode, OpName);
4164 if (I == -1)
4165 continue;
4166 if (Inst.getOperand(I).getImm())
4167 return false;
4168 }
4169
4170 return !tryVOPD3(Inst);
4171}
4172
4173// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4174// form but switch to VOPD3 otherwise.
4175bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4176 const unsigned Opcode = Inst.getOpcode();
4177 if (!isGFX1250Plus() || !isVOPD(Opcode))
4178 return false;
4179
4180 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4181 return tryVOPD(Inst);
4182 return tryVOPD3(Inst);
4183}
4184
4185bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4186
4187 const unsigned Opc = Inst.getOpcode();
4188 const MCInstrDesc &Desc = MII.get(Opc);
4189
4190 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4191 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4192 assert(ClampIdx != -1);
4193 return Inst.getOperand(ClampIdx).getImm() == 0;
4194 }
4195
4196 return true;
4197}
4198
4201
4202bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) {
4203
4204 const unsigned Opc = Inst.getOpcode();
4205 const MCInstrDesc &Desc = MII.get(Opc);
4206
4207 if ((Desc.TSFlags & MIMGFlags) == 0)
4208 return true;
4209
4210 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4211 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4212 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4213
4214 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4215 return true;
4216
4217 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4218 return true;
4219
4220 unsigned VDataSize = getRegOperandSize(Desc, VDataIdx);
4221 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4222 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4223 if (DMask == 0)
4224 DMask = 1;
4225
4226 bool IsPackedD16 = false;
4227 unsigned DataSize =
4228 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4229 if (hasPackedD16()) {
4230 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4231 IsPackedD16 = D16Idx >= 0;
4232 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4233 DataSize = (DataSize + 1) / 2;
4234 }
4235
4236 if ((VDataSize / 4) == DataSize + TFESize)
4237 return true;
4238
4239 StringRef Modifiers;
4240 if (isGFX90A())
4241 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4242 else
4243 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4244
4245 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4246 return false;
4247}
4248
4249bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
4250 const unsigned Opc = Inst.getOpcode();
4251 const MCInstrDesc &Desc = MII.get(Opc);
4252
4253 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4254 return true;
4255
4256 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4257
4258 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4260 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4261 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4262 ? AMDGPU::OpName::srsrc
4263 : AMDGPU::OpName::rsrc;
4264 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4265 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4266 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4267
4268 assert(VAddr0Idx != -1);
4269 assert(SrsrcIdx != -1);
4270 assert(SrsrcIdx > VAddr0Idx);
4271
4272 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4273 if (BaseOpcode->BVH) {
4274 if (IsA16 == BaseOpcode->A16)
4275 return true;
4276 Error(IDLoc, "image address size does not match a16");
4277 return false;
4278 }
4279
4280 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4281 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4282 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4283 unsigned ActualAddrSize =
4284 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(Desc, VAddr0Idx) / 4;
4285
4286 unsigned ExpectedAddrSize =
4287 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4288
4289 if (IsNSA) {
4290 if (hasPartialNSAEncoding() &&
4291 ExpectedAddrSize >
4293 int VAddrLastIdx = SrsrcIdx - 1;
4294 unsigned VAddrLastSize = getRegOperandSize(Desc, VAddrLastIdx) / 4;
4295
4296 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4297 }
4298 } else {
4299 if (ExpectedAddrSize > 12)
4300 ExpectedAddrSize = 16;
4301
4302 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4303 // This provides backward compatibility for assembly created
4304 // before 160b/192b/224b types were directly supported.
4305 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4306 return true;
4307 }
4308
4309 if (ActualAddrSize == ExpectedAddrSize)
4310 return true;
4311
4312 Error(IDLoc, "image address size does not match dim and a16");
4313 return false;
4314}
4315
4316bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4317
4318 const unsigned Opc = Inst.getOpcode();
4319 const MCInstrDesc &Desc = MII.get(Opc);
4320
4321 if ((Desc.TSFlags & MIMGFlags) == 0)
4322 return true;
4323 if (!Desc.mayLoad() || !Desc.mayStore())
4324 return true; // Not atomic
4325
4326 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4327 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4328
4329 // This is an incomplete check because image_atomic_cmpswap
4330 // may only use 0x3 and 0xf while other atomic operations
4331 // may use 0x1 and 0x3. However these limitations are
4332 // verified when we check that dmask matches dst size.
4333 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4334}
4335
4336bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4337
4338 const unsigned Opc = Inst.getOpcode();
4339 const MCInstrDesc &Desc = MII.get(Opc);
4340
4341 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4342 return true;
4343
4344 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4345 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4346
4347 // GATHER4 instructions use dmask in a different fashion compared to
4348 // other MIMG instructions. The only useful DMASK values are
4349 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4350 // (red,red,red,red) etc.) The ISA document doesn't mention
4351 // this.
4352 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4353}
4354
4355bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4356 const OperandVector &Operands) {
4357 if (!isGFX10Plus())
4358 return true;
4359
4360 const unsigned Opc = Inst.getOpcode();
4361 const MCInstrDesc &Desc = MII.get(Opc);
4362
4363 if ((Desc.TSFlags & MIMGFlags) == 0)
4364 return true;
4365
4366 // image_bvh_intersect_ray instructions do not have dim
4368 return true;
4369
4370 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4371 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4372 if (Op.isDim())
4373 return true;
4374 }
4375 return false;
4376}
4377
4378bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4379 const unsigned Opc = Inst.getOpcode();
4380 const MCInstrDesc &Desc = MII.get(Opc);
4381
4382 if ((Desc.TSFlags & MIMGFlags) == 0)
4383 return true;
4384
4385 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4386 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4388
4389 if (!BaseOpcode->MSAA)
4390 return true;
4391
4392 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4393 assert(DimIdx != -1);
4394
4395 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4396 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4397
4398 return DimInfo->MSAA;
4399}
4400
4401static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4402{
4403 switch (Opcode) {
4404 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4405 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4406 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4407 return true;
4408 default:
4409 return false;
4410 }
4411}
4412
4413// movrels* opcodes should only allow VGPRS as src0.
4414// This is specified in .td description for vop1/vop3,
4415// but sdwa is handled differently. See isSDWAOperand.
4416bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4417 const OperandVector &Operands) {
4418
4419 const unsigned Opc = Inst.getOpcode();
4420 const MCInstrDesc &Desc = MII.get(Opc);
4421
4422 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4423 return true;
4424
4425 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4426 assert(Src0Idx != -1);
4427
4428 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4429 if (Src0.isReg()) {
4430 auto Reg = mc2PseudoReg(Src0.getReg());
4431 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4432 if (!isSGPR(Reg, TRI))
4433 return true;
4434 }
4435
4436 Error(getOperandLoc(Operands, Src0Idx), "source operand must be a VGPR");
4437 return false;
4438}
4439
4440bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4441 const OperandVector &Operands) {
4442
4443 const unsigned Opc = Inst.getOpcode();
4444
4445 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4446 return true;
4447
4448 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4449 assert(Src0Idx != -1);
4450
4451 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4452 if (!Src0.isReg())
4453 return true;
4454
4455 auto Reg = mc2PseudoReg(Src0.getReg());
4456 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4457 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4458 Error(getOperandLoc(Operands, Src0Idx),
4459 "source operand must be either a VGPR or an inline constant");
4460 return false;
4461 }
4462
4463 return true;
4464}
4465
4466bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4467 const OperandVector &Operands) {
4468 unsigned Opcode = Inst.getOpcode();
4469 const MCInstrDesc &Desc = MII.get(Opcode);
4470
4471 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4472 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4473 return true;
4474
4475 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4476 if (Src2Idx == -1)
4477 return true;
4478
4479 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4480 Error(getOperandLoc(Operands, Src2Idx),
4481 "inline constants are not allowed for this operand");
4482 return false;
4483 }
4484
4485 return true;
4486}
4487
4488bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4489 const OperandVector &Operands) {
4490 const unsigned Opc = Inst.getOpcode();
4491 const MCInstrDesc &Desc = MII.get(Opc);
4492
4493 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4494 return true;
4495
4496 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4497 if (BlgpIdx != -1) {
4498 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4499 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4500
4501 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4502 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4503
4504 // Validate the correct register size was used for the floating point
4505 // format operands
4506
4507 bool Success = true;
4508 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4509 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4510 Error(getOperandLoc(Operands, Src0Idx),
4511 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4512 Success = false;
4513 }
4514
4515 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4516 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4517 Error(getOperandLoc(Operands, Src1Idx),
4518 "wrong register tuple size for blgp value " + Twine(BLGP));
4519 Success = false;
4520 }
4521
4522 return Success;
4523 }
4524 }
4525
4526 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4527 if (Src2Idx == -1)
4528 return true;
4529
4530 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4531 if (!Src2.isReg())
4532 return true;
4533
4534 MCRegister Src2Reg = Src2.getReg();
4535 MCRegister DstReg = Inst.getOperand(0).getReg();
4536 if (Src2Reg == DstReg)
4537 return true;
4538
4539 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4540 if (TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[0], HwMode))
4541 .getSizeInBits() <= 128)
4542 return true;
4543
4544 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4545 Error(getOperandLoc(Operands, Src2Idx),
4546 "source 2 operand must not partially overlap with dst");
4547 return false;
4548 }
4549
4550 return true;
4551}
4552
4553bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4554 switch (Inst.getOpcode()) {
4555 default:
4556 return true;
4557 case V_DIV_SCALE_F32_gfx6_gfx7:
4558 case V_DIV_SCALE_F32_vi:
4559 case V_DIV_SCALE_F32_gfx10:
4560 case V_DIV_SCALE_F64_gfx6_gfx7:
4561 case V_DIV_SCALE_F64_vi:
4562 case V_DIV_SCALE_F64_gfx10:
4563 break;
4564 }
4565
4566 // TODO: Check that src0 = src1 or src2.
4567
4568 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4569 AMDGPU::OpName::src2_modifiers,
4570 AMDGPU::OpName::src2_modifiers}) {
4571 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4572 .getImm() &
4574 return false;
4575 }
4576 }
4577
4578 return true;
4579}
4580
4581bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4582
4583 const unsigned Opc = Inst.getOpcode();
4584 const MCInstrDesc &Desc = MII.get(Opc);
4585
4586 if ((Desc.TSFlags & MIMGFlags) == 0)
4587 return true;
4588
4589 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4590 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4591 if (isCI() || isSI())
4592 return false;
4593 }
4594
4595 return true;
4596}
4597
4598bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4599 const unsigned Opc = Inst.getOpcode();
4600 const MCInstrDesc &Desc = MII.get(Opc);
4601
4602 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4603 return true;
4604
4605 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4606
4607 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4608}
4609
4610static bool IsRevOpcode(const unsigned Opcode)
4611{
4612 switch (Opcode) {
4613 case AMDGPU::V_SUBREV_F32_e32:
4614 case AMDGPU::V_SUBREV_F32_e64:
4615 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4616 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4617 case AMDGPU::V_SUBREV_F32_e32_vi:
4618 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4619 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4620 case AMDGPU::V_SUBREV_F32_e64_vi:
4621
4622 case AMDGPU::V_SUBREV_CO_U32_e32:
4623 case AMDGPU::V_SUBREV_CO_U32_e64:
4624 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4625 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4626
4627 case AMDGPU::V_SUBBREV_U32_e32:
4628 case AMDGPU::V_SUBBREV_U32_e64:
4629 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4630 case AMDGPU::V_SUBBREV_U32_e32_vi:
4631 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4632 case AMDGPU::V_SUBBREV_U32_e64_vi:
4633
4634 case AMDGPU::V_SUBREV_U32_e32:
4635 case AMDGPU::V_SUBREV_U32_e64:
4636 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4637 case AMDGPU::V_SUBREV_U32_e32_vi:
4638 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4639 case AMDGPU::V_SUBREV_U32_e64_vi:
4640
4641 case AMDGPU::V_SUBREV_F16_e32:
4642 case AMDGPU::V_SUBREV_F16_e64:
4643 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4644 case AMDGPU::V_SUBREV_F16_e32_vi:
4645 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4646 case AMDGPU::V_SUBREV_F16_e64_vi:
4647
4648 case AMDGPU::V_SUBREV_U16_e32:
4649 case AMDGPU::V_SUBREV_U16_e64:
4650 case AMDGPU::V_SUBREV_U16_e32_vi:
4651 case AMDGPU::V_SUBREV_U16_e64_vi:
4652
4653 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4654 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4655 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4656
4657 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4658 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4659
4660 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4661 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4662
4663 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4664 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4665
4666 case AMDGPU::V_LSHRREV_B32_e32:
4667 case AMDGPU::V_LSHRREV_B32_e64:
4668 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4669 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4670 case AMDGPU::V_LSHRREV_B32_e32_vi:
4671 case AMDGPU::V_LSHRREV_B32_e64_vi:
4672 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4673 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4674
4675 case AMDGPU::V_ASHRREV_I32_e32:
4676 case AMDGPU::V_ASHRREV_I32_e64:
4677 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4678 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4679 case AMDGPU::V_ASHRREV_I32_e32_vi:
4680 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4681 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4682 case AMDGPU::V_ASHRREV_I32_e64_vi:
4683
4684 case AMDGPU::V_LSHLREV_B32_e32:
4685 case AMDGPU::V_LSHLREV_B32_e64:
4686 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4687 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4688 case AMDGPU::V_LSHLREV_B32_e32_vi:
4689 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4690 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4691 case AMDGPU::V_LSHLREV_B32_e64_vi:
4692
4693 case AMDGPU::V_LSHLREV_B16_e32:
4694 case AMDGPU::V_LSHLREV_B16_e64:
4695 case AMDGPU::V_LSHLREV_B16_e32_vi:
4696 case AMDGPU::V_LSHLREV_B16_e64_vi:
4697 case AMDGPU::V_LSHLREV_B16_gfx10:
4698
4699 case AMDGPU::V_LSHRREV_B16_e32:
4700 case AMDGPU::V_LSHRREV_B16_e64:
4701 case AMDGPU::V_LSHRREV_B16_e32_vi:
4702 case AMDGPU::V_LSHRREV_B16_e64_vi:
4703 case AMDGPU::V_LSHRREV_B16_gfx10:
4704
4705 case AMDGPU::V_ASHRREV_I16_e32:
4706 case AMDGPU::V_ASHRREV_I16_e64:
4707 case AMDGPU::V_ASHRREV_I16_e32_vi:
4708 case AMDGPU::V_ASHRREV_I16_e64_vi:
4709 case AMDGPU::V_ASHRREV_I16_gfx10:
4710
4711 case AMDGPU::V_LSHLREV_B64_e64:
4712 case AMDGPU::V_LSHLREV_B64_gfx10:
4713 case AMDGPU::V_LSHLREV_B64_vi:
4714
4715 case AMDGPU::V_LSHRREV_B64_e64:
4716 case AMDGPU::V_LSHRREV_B64_gfx10:
4717 case AMDGPU::V_LSHRREV_B64_vi:
4718
4719 case AMDGPU::V_ASHRREV_I64_e64:
4720 case AMDGPU::V_ASHRREV_I64_gfx10:
4721 case AMDGPU::V_ASHRREV_I64_vi:
4722
4723 case AMDGPU::V_PK_LSHLREV_B16:
4724 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4725 case AMDGPU::V_PK_LSHLREV_B16_vi:
4726
4727 case AMDGPU::V_PK_LSHRREV_B16:
4728 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4729 case AMDGPU::V_PK_LSHRREV_B16_vi:
4730 case AMDGPU::V_PK_ASHRREV_I16:
4731 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4732 case AMDGPU::V_PK_ASHRREV_I16_vi:
4733 return true;
4734 default:
4735 return false;
4736 }
4737}
4738
4739bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4740 const OperandVector &Operands) {
4741 using namespace SIInstrFlags;
4742 const unsigned Opcode = Inst.getOpcode();
4743 const MCInstrDesc &Desc = MII.get(Opcode);
4744
4745 // lds_direct register is defined so that it can be used
4746 // with 9-bit operands only. Ignore encodings which do not accept these.
4747 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4748 if ((Desc.TSFlags & Enc) == 0)
4749 return true;
4750
4751 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4752 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4753 if (SrcIdx == -1)
4754 break;
4755 const auto &Src = Inst.getOperand(SrcIdx);
4756 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4757
4758 if (isGFX90A() || isGFX11Plus()) {
4759 Error(getOperandLoc(Operands, SrcIdx),
4760 "lds_direct is not supported on this GPU");
4761 return false;
4762 }
4763
4764 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) {
4765 Error(getOperandLoc(Operands, SrcIdx),
4766 "lds_direct cannot be used with this instruction");
4767 return false;
4768 }
4769
4770 if (SrcName != OpName::src0) {
4771 Error(getOperandLoc(Operands, SrcIdx),
4772 "lds_direct may be used as src0 only");
4773 return false;
4774 }
4775 }
4776 }
4777
4778 return true;
4779}
4780
4781SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4782 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4783 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4784 if (Op.isFlatOffset())
4785 return Op.getStartLoc();
4786 }
4787 return getLoc();
4788}
4789
4790bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4791 const OperandVector &Operands) {
4792 auto Opcode = Inst.getOpcode();
4793 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4794 if (OpNum == -1)
4795 return true;
4796
4797 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4798 if ((TSFlags & SIInstrFlags::FLAT))
4799 return validateFlatOffset(Inst, Operands);
4800
4801 if ((TSFlags & SIInstrFlags::SMRD))
4802 return validateSMEMOffset(Inst, Operands);
4803
4804 const auto &Op = Inst.getOperand(OpNum);
4805 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4806 if (isGFX12Plus() &&
4807 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4808 const unsigned OffsetSize = 24;
4809 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
4810 Error(getFlatOffsetLoc(Operands),
4811 Twine("expected a ") + Twine(OffsetSize - 1) +
4812 "-bit unsigned offset for buffer ops");
4813 return false;
4814 }
4815 } else {
4816 const unsigned OffsetSize = 16;
4817 if (!isUIntN(OffsetSize, Op.getImm())) {
4818 Error(getFlatOffsetLoc(Operands),
4819 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4820 return false;
4821 }
4822 }
4823 return true;
4824}
4825
4826bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4827 const OperandVector &Operands) {
4828 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4829 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4830 return true;
4831
4832 auto Opcode = Inst.getOpcode();
4833 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4834 assert(OpNum != -1);
4835
4836 const auto &Op = Inst.getOperand(OpNum);
4837 if (!hasFlatOffsets() && Op.getImm() != 0) {
4838 Error(getFlatOffsetLoc(Operands),
4839 "flat offset modifier is not supported on this GPU");
4840 return false;
4841 }
4842
4843 // For pre-GFX12 FLAT instructions the offset must be positive;
4844 // MSB is ignored and forced to zero.
4845 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4846 bool AllowNegative =
4848 isGFX12Plus();
4849 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4850 Error(getFlatOffsetLoc(Operands),
4851 Twine("expected a ") +
4852 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4853 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4854 return false;
4855 }
4856
4857 return true;
4858}
4859
4860SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4861 // Start with second operand because SMEM Offset cannot be dst or src0.
4862 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4863 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4864 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4865 return Op.getStartLoc();
4866 }
4867 return getLoc();
4868}
4869
4870bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4871 const OperandVector &Operands) {
4872 if (isCI() || isSI())
4873 return true;
4874
4875 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4876 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4877 return true;
4878
4879 auto Opcode = Inst.getOpcode();
4880 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4881 if (OpNum == -1)
4882 return true;
4883
4884 const auto &Op = Inst.getOperand(OpNum);
4885 if (!Op.isImm())
4886 return true;
4887
4888 uint64_t Offset = Op.getImm();
4889 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4892 return true;
4893
4894 Error(getSMEMOffsetLoc(Operands),
4895 isGFX12Plus() && IsBuffer
4896 ? "expected a 23-bit unsigned offset for buffer ops"
4897 : isGFX12Plus() ? "expected a 24-bit signed offset"
4898 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4899 : "expected a 21-bit signed offset");
4900
4901 return false;
4902}
4903
4904bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4905 const OperandVector &Operands) {
4906 unsigned Opcode = Inst.getOpcode();
4907 const MCInstrDesc &Desc = MII.get(Opcode);
4908 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4909 return true;
4910
4911 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4912 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4913
4914 const int OpIndices[] = { Src0Idx, Src1Idx };
4915
4916 unsigned NumExprs = 0;
4917 unsigned NumLiterals = 0;
4918 int64_t LiteralValue;
4919
4920 for (int OpIdx : OpIndices) {
4921 if (OpIdx == -1) break;
4922
4923 const MCOperand &MO = Inst.getOperand(OpIdx);
4924 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4926 bool IsLit = false;
4927 std::optional<int64_t> Imm;
4928 if (MO.isImm()) {
4929 Imm = MO.getImm();
4930 } else if (MO.isExpr()) {
4931 if (isLitExpr(MO.getExpr())) {
4932 IsLit = true;
4933 Imm = getLitValue(MO.getExpr());
4934 }
4935 } else {
4936 continue;
4937 }
4938
4939 if (!Imm.has_value()) {
4940 ++NumExprs;
4941 } else if (!isInlineConstant(Inst, OpIdx)) {
4942 auto OpType = static_cast<AMDGPU::OperandType>(
4943 Desc.operands()[OpIdx].OperandType);
4944 int64_t Value = encode32BitLiteral(*Imm, OpType, IsLit);
4945 if (NumLiterals == 0 || LiteralValue != Value) {
4947 ++NumLiterals;
4948 }
4949 }
4950 }
4951 }
4952
4953 if (NumLiterals + NumExprs <= 1)
4954 return true;
4955
4956 Error(getOperandLoc(Operands, Src1Idx),
4957 "only one unique literal operand is allowed");
4958 return false;
4959}
4960
4961bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4962 const unsigned Opc = Inst.getOpcode();
4963 if (isPermlane16(Opc)) {
4964 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4965 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4966
4967 if (OpSel & ~3)
4968 return false;
4969 }
4970
4971 uint64_t TSFlags = MII.get(Opc).TSFlags;
4972
4973 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4974 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4975 if (OpSelIdx != -1) {
4976 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4977 return false;
4978 }
4979 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4980 if (OpSelHiIdx != -1) {
4981 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4982 return false;
4983 }
4984 }
4985
4986 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4987 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4988 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4989 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4990 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4991 if (OpSel & 3)
4992 return false;
4993 }
4994
4995 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4996 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4997 // the first SGPR and use it for both the low and high operations.
4998 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4999 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
5000 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5001 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5002 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5003
5004 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
5005 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5006 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5007 unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5008
5009 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5010
5011 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
5012 unsigned Mask = 1U << Index;
5013 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
5014 };
5015
5016 if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
5017 !VerifyOneSGPR(/*Index=*/0))
5018 return false;
5019 if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
5020 !VerifyOneSGPR(/*Index=*/1))
5021 return false;
5022
5023 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
5024 if (Src2Idx != -1) {
5025 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
5026 if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
5027 !VerifyOneSGPR(/*Index=*/2))
5028 return false;
5029 }
5030 }
5031
5032 return true;
5033}
5034
5035bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
5036 if (!hasTrue16Insts())
5037 return true;
5038 const MCRegisterInfo *MRI = getMRI();
5039 const unsigned Opc = Inst.getOpcode();
5040 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5041 if (OpSelIdx == -1)
5042 return true;
5043 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
5044 // If the value is 0 we could have a default OpSel Operand, so conservatively
5045 // allow it.
5046 if (OpSelOpValue == 0)
5047 return true;
5048 unsigned OpCount = 0;
5049 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
5050 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
5051 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
5052 if (OpIdx == -1)
5053 continue;
5054 const MCOperand &Op = Inst.getOperand(OpIdx);
5055 if (Op.isReg() &&
5056 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
5057 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
5058 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
5059 if (OpSelOpIsHi != VGPRSuffixIsHi)
5060 return false;
5061 }
5062 ++OpCount;
5063 }
5064
5065 return true;
5066}
5067
5068bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
5069 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
5070
5071 const unsigned Opc = Inst.getOpcode();
5072 uint64_t TSFlags = MII.get(Opc).TSFlags;
5073
5074 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
5075 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
5076 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
5077 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
5078 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
5079 !(TSFlags & SIInstrFlags::IsSWMMAC))
5080 return true;
5081
5082 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
5083 if (NegIdx == -1)
5084 return true;
5085
5086 unsigned Neg = Inst.getOperand(NegIdx).getImm();
5087
5088 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
5089 // on some src operands but not allowed on other.
5090 // It is convenient that such instructions don't have src_modifiers operand
5091 // for src operands that don't allow neg because they also don't allow opsel.
5092
5093 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5094 AMDGPU::OpName::src1_modifiers,
5095 AMDGPU::OpName::src2_modifiers};
5096
5097 for (unsigned i = 0; i < 3; ++i) {
5098 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
5099 if (Neg & (1 << i))
5100 return false;
5101 }
5102 }
5103
5104 return true;
5105}
5106
5107bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
5108 const OperandVector &Operands) {
5109 const unsigned Opc = Inst.getOpcode();
5110 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
5111 if (DppCtrlIdx >= 0) {
5112 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
5113
5114 if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
5115 AMDGPU::isDPALU_DPP(MII.get(Opc), MII, getSTI())) {
5116 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5117 // only on GFX12.
5118 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5119 Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
5120 : "DP ALU dpp only supports row_newbcast");
5121 return false;
5122 }
5123 }
5124
5125 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
5126 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5127
5128 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
5129 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5130 if (Src1Idx >= 0) {
5131 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5132 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5133 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
5134 Error(getOperandLoc(Operands, Src1Idx),
5135 "invalid operand for instruction");
5136 return false;
5137 }
5138 if (Src1.isImm()) {
5139 Error(getInstLoc(Operands),
5140 "src1 immediate operand invalid for instruction");
5141 return false;
5142 }
5143 }
5144 }
5145
5146 return true;
5147}
5148
5149// Check if VCC register matches wavefront size
5150bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5151 return (Reg == AMDGPU::VCC && isWave64()) ||
5152 (Reg == AMDGPU::VCC_LO && isWave32());
5153}
5154
5155// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5156bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5157 const OperandVector &Operands) {
5158 unsigned Opcode = Inst.getOpcode();
5159 const MCInstrDesc &Desc = MII.get(Opcode);
5160 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5161 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5162 !HasMandatoryLiteral && !isVOPD(Opcode))
5163 return true;
5164
5165 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
5166
5167 std::optional<unsigned> LiteralOpIdx;
5168 std::optional<uint64_t> LiteralValue;
5169
5170 for (int OpIdx : OpIndices) {
5171 if (OpIdx == -1)
5172 continue;
5173
5174 const MCOperand &MO = Inst.getOperand(OpIdx);
5175 if (!MO.isImm() && !MO.isExpr())
5176 continue;
5177 if (!isSISrcOperand(Desc, OpIdx))
5178 continue;
5179
5180 std::optional<int64_t> Imm;
5181 if (MO.isImm())
5182 Imm = MO.getImm();
5183 else if (MO.isExpr() && isLitExpr(MO.getExpr()))
5184 Imm = getLitValue(MO.getExpr());
5185
5186 bool IsAnotherLiteral = false;
5187 bool IsForcedLit = findMCOperand(Operands, OpIdx).isForcedLit();
5188 bool IsForcedLit64 = findMCOperand(Operands, OpIdx).isForcedLit64();
5189 if (!Imm.has_value()) {
5190 // Literal value not known, so we conservately assume it's different.
5191 IsAnotherLiteral = true;
5192 } else if (IsForcedLit || IsForcedLit64 || !isInlineConstant(Inst, OpIdx)) {
5193 uint64_t Value = *Imm;
5194 bool IsForcedFP64 =
5195 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5197 HasMandatoryLiteral);
5198 unsigned OpTy = Desc.operands()[OpIdx].OperandType;
5199 bool IsFP64 =
5200 (IsForcedFP64 || (AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
5202 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5203 bool IsValid32Op =
5204 IsForcedLit || AMDGPU::isValid32BitLiteral(Value, IsFP64);
5205
5206 if (((!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5207 !IsForcedFP64) ||
5208 (IsForcedLit64 && !HasMandatoryLiteral)) &&
5209 (!has64BitLiterals() || Desc.getSize() != 4)) {
5210 Error(getOperandLoc(Operands, OpIdx),
5211 "invalid operand for instruction");
5212 return false;
5213 }
5214
5215 // Only src0 can use lit64 in VOP* encoding.
5216 if (!IsForcedFP64 && (IsForcedLit64 || !IsValid32Op) &&
5217 OpIdx != getNamedOperandIdx(Opcode, OpName::src0)) {
5218 Error(getOperandLoc(Operands, OpIdx),
5219 "invalid operand for instruction");
5220 return false;
5221 }
5222
5223 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5224 Value = Hi_32(Value);
5225
5226 IsAnotherLiteral = !LiteralValue || *LiteralValue != Value;
5228 }
5229
5230 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5231 !getFeatureBits()[FeatureVOP3Literal]) {
5232 Error(getOperandLoc(Operands, OpIdx),
5233 "literal operands are not supported");
5234 return false;
5235 }
5236
5237 if (LiteralOpIdx && IsAnotherLiteral) {
5238 Error(getLaterLoc(getOperandLoc(Operands, OpIdx),
5239 getOperandLoc(Operands, *LiteralOpIdx)),
5240 "only one unique literal operand is allowed");
5241 return false;
5242 }
5243
5244 if (IsAnotherLiteral)
5245 LiteralOpIdx = OpIdx;
5246 }
5247
5248 return true;
5249}
5250
5251// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5252static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5253 const MCRegisterInfo *MRI) {
5254 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5255 if (OpIdx < 0)
5256 return -1;
5257
5258 const MCOperand &Op = Inst.getOperand(OpIdx);
5259 if (!Op.isReg())
5260 return -1;
5261
5262 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5263 auto Reg = Sub ? Sub : Op.getReg();
5264 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5265 return AGPR32.contains(Reg) ? 1 : 0;
5266}
5267
5268bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5269 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5270 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5272 SIInstrFlags::DS)) == 0)
5273 return true;
5274
5275 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5276 ? AMDGPU::OpName::data0
5277 : AMDGPU::OpName::vdata;
5278
5279 const MCRegisterInfo *MRI = getMRI();
5280 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5281 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5282
5283 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5284 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5285 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5286 return false;
5287 }
5288
5289 auto FB = getFeatureBits();
5290 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5291 if (DataAreg < 0 || DstAreg < 0)
5292 return true;
5293 return DstAreg == DataAreg;
5294 }
5295
5296 return DstAreg < 1 && DataAreg < 1;
5297}
5298
5299bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5300 auto FB = getFeatureBits();
5301 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5302 return true;
5303
5304 unsigned Opc = Inst.getOpcode();
5305 const MCRegisterInfo *MRI = getMRI();
5306 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5307 // unaligned VGPR. All others only allow even aligned VGPRs.
5308 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5309 return true;
5310
5311 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5312 switch (Opc) {
5313 default:
5314 break;
5315 case AMDGPU::DS_LOAD_TR6_B96:
5316 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5317 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5318 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5319 return true;
5320 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5321 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5322 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5323 // allows unaligned VGPR for vdst, but other operands still only allow
5324 // even aligned VGPRs.
5325 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
5326 if (VAddrIdx != -1) {
5327 const MCOperand &Op = Inst.getOperand(VAddrIdx);
5328 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5329 if ((Sub - AMDGPU::VGPR0) & 1)
5330 return false;
5331 }
5332 return true;
5333 }
5334 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5335 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5336 return true;
5337 }
5338 }
5339
5340 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5341 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5342 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5343 const MCOperand &Op = Inst.getOperand(I);
5344 if (!Op.isReg())
5345 continue;
5346
5347 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5348 if (!Sub)
5349 continue;
5350
5351 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5352 return false;
5353 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5354 return false;
5355 }
5356
5357 return true;
5358}
5359
5360SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5361 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5362 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5363 if (Op.isBLGP())
5364 return Op.getStartLoc();
5365 }
5366 return SMLoc();
5367}
5368
5369bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5370 const OperandVector &Operands) {
5371 unsigned Opc = Inst.getOpcode();
5372 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5373 if (BlgpIdx == -1)
5374 return true;
5375 SMLoc BLGPLoc = getBLGPLoc(Operands);
5376 if (!BLGPLoc.isValid())
5377 return true;
5378 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5379 auto FB = getFeatureBits();
5380 bool UsesNeg = false;
5381 if (FB[AMDGPU::FeatureGFX940Insts]) {
5382 switch (Opc) {
5383 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5384 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5385 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5386 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5387 UsesNeg = true;
5388 }
5389 }
5390
5391 if (IsNeg == UsesNeg)
5392 return true;
5393
5394 Error(BLGPLoc,
5395 UsesNeg ? "invalid modifier: blgp is not supported"
5396 : "invalid modifier: neg is not supported");
5397
5398 return false;
5399}
5400
5401bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5402 const OperandVector &Operands) {
5403 if (!isGFX11Plus())
5404 return true;
5405
5406 unsigned Opc = Inst.getOpcode();
5407 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5408 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5409 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5410 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5411 return true;
5412
5413 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5414 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5415 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5416 if (Reg == AMDGPU::SGPR_NULL)
5417 return true;
5418
5419 Error(getOperandLoc(Operands, Src0Idx), "src0 must be null");
5420 return false;
5421}
5422
5423bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5424 const OperandVector &Operands) {
5425 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5426 if ((TSFlags & SIInstrFlags::DS) == 0)
5427 return true;
5428 if (TSFlags & SIInstrFlags::GWS)
5429 return validateGWS(Inst, Operands);
5430 // Only validate GDS for non-GWS instructions.
5431 if (hasGDS())
5432 return true;
5433 int GDSIdx =
5434 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5435 if (GDSIdx < 0)
5436 return true;
5437 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5438 if (GDS) {
5439 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5440 Error(S, "gds modifier is not supported on this GPU");
5441 return false;
5442 }
5443 return true;
5444}
5445
5446// gfx90a has an undocumented limitation:
5447// DS_GWS opcodes must use even aligned registers.
5448bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5449 const OperandVector &Operands) {
5450 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5451 return true;
5452
5453 int Opc = Inst.getOpcode();
5454 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5455 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5456 return true;
5457
5458 const MCRegisterInfo *MRI = getMRI();
5459 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5460 int Data0Pos =
5461 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5462 assert(Data0Pos != -1);
5463 auto Reg = Inst.getOperand(Data0Pos).getReg();
5464 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5465 if (RegIdx & 1) {
5466 Error(getOperandLoc(Operands, Data0Pos), "vgpr must be even aligned");
5467 return false;
5468 }
5469
5470 return true;
5471}
5472
5473bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5474 const OperandVector &Operands,
5475 SMLoc IDLoc) {
5476 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5477 AMDGPU::OpName::cpol);
5478 if (CPolPos == -1)
5479 return true;
5480
5481 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5482
5483 if (!isGFX1250Plus()) {
5484 if (CPol & CPol::SCAL) {
5485 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5486 StringRef CStr(S.getPointer());
5487 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5488 Error(S, "scale_offset is not supported on this GPU");
5489 }
5490 if (CPol & CPol::NV) {
5491 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5492 StringRef CStr(S.getPointer());
5493 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5494 Error(S, "nv is not supported on this GPU");
5495 }
5496 }
5497
5498 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
5499 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5500 StringRef CStr(S.getPointer());
5501 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5502 Error(S, "scale_offset is not supported for this instruction");
5503 }
5504
5505 if (isGFX12Plus())
5506 return validateTHAndScopeBits(Inst, Operands, CPol);
5507
5508 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5509 if (TSFlags & SIInstrFlags::SMRD) {
5510 if (CPol && (isSI() || isCI())) {
5511 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5512 Error(S, "cache policy is not supported for SMRD instructions");
5513 return false;
5514 }
5515 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5516 Error(IDLoc, "invalid cache policy for SMEM instruction");
5517 return false;
5518 }
5519 }
5520
5521 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5522 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5525 if (!(TSFlags & AllowSCCModifier)) {
5526 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5527 StringRef CStr(S.getPointer());
5528 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5529 Error(S,
5530 "scc modifier is not supported for this instruction on this GPU");
5531 return false;
5532 }
5533 }
5534
5536 return true;
5537
5538 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5539 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5540 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5541 : "instruction must use glc");
5542 return false;
5543 }
5544 } else {
5545 if (CPol & CPol::GLC) {
5546 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5547 StringRef CStr(S.getPointer());
5549 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5550 Error(S, isGFX940() ? "instruction must not use sc0"
5551 : "instruction must not use glc");
5552 return false;
5553 }
5554 }
5555
5556 return true;
5557}
5558
5559bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5560 const OperandVector &Operands,
5561 const unsigned CPol) {
5562 const unsigned TH = CPol & AMDGPU::CPol::TH;
5563 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5564
5565 const unsigned Opcode = Inst.getOpcode();
5566 const MCInstrDesc &TID = MII.get(Opcode);
5567
5568 auto PrintError = [&](StringRef Msg) {
5569 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5570 Error(S, Msg);
5571 return false;
5572 };
5573
5574 if ((TH & AMDGPU::CPol::TH_ATOMIC_RETURN) &&
5576 return PrintError("th:TH_ATOMIC_RETURN requires a destination operand");
5577
5578 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5581 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5582
5583 if (TH == 0)
5584 return true;
5585
5586 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5587 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5588 (TH == AMDGPU::CPol::TH_NT_HT)))
5589 return PrintError("invalid th value for SMEM instruction");
5590
5591 if (TH == AMDGPU::CPol::TH_BYPASS) {
5592 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5594 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5596 return PrintError("scope and th combination is not valid");
5597 }
5598
5599 unsigned THType = AMDGPU::getTemporalHintType(TID);
5600 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5601 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5602 return PrintError("invalid th value for atomic instructions");
5603 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5604 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5605 return PrintError("invalid th value for store instructions");
5606 } else {
5607 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5608 return PrintError("invalid th value for load instructions");
5609 }
5610
5611 return true;
5612}
5613
5614bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5615 const OperandVector &Operands) {
5616 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5617 if (Desc.mayStore() &&
5619 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5620 if (Loc != getInstLoc(Operands)) {
5621 Error(Loc, "TFE modifier has no meaning for store instructions");
5622 return false;
5623 }
5624 }
5625
5626 return true;
5627}
5628
5629bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5630 const OperandVector &Operands) {
5631 unsigned Opc = Inst.getOpcode();
5632 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5633 const MCInstrDesc &Desc = MII.get(Opc);
5634
5635 int AFmtIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
5636 if (AFmtIdx == -1)
5637 return true;
5638 unsigned AFmt = Inst.getOperand(AFmtIdx).getImm();
5639 int BFmtIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
5640 unsigned BFmt = Inst.getOperand(BFmtIdx).getImm();
5641
5642 auto validateFmt = [&](unsigned Fmt, AMDGPU::OpName SrcOp) -> bool {
5643 int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
5644 unsigned RegSize =
5645 TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[SrcIdx], HwMode))
5646 .getSizeInBits();
5647
5649 return true;
5650
5651 Error(getOperandLoc(Operands, SrcIdx),
5652 "wrong register tuple size for " +
5653 Twine(WMMAMods::ModMatrixFmt[Fmt]));
5654 return false;
5655 };
5656
5657 if (!validateFmt(AFmt, AMDGPU::OpName::src0) ||
5658 !validateFmt(BFmt, AMDGPU::OpName::src1))
5659 return false;
5660
5661 int AScaleIdx =
5662 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
5663 if (AScaleIdx == -1)
5664 return true;
5665 unsigned AScale = Inst.getOperand(AScaleIdx).getImm();
5666 int BScaleIdx =
5667 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
5668 unsigned BScale = Inst.getOperand(BScaleIdx).getImm();
5669 if (!isValidWMMAScaleFmtCombination(AFmt, AScale, BFmt, BScale)) {
5670 Error(getImmLoc(AMDGPUOperand::ImmTyMatrixAFMT, Operands),
5671 "invalid matrix and scale format combination");
5672 return false;
5673 }
5674
5675 return true;
5676}
5677
5678bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc,
5679 const OperandVector &Operands) {
5680 if (!validateLdsDirect(Inst, Operands))
5681 return false;
5682 if (!validateTrue16OpSel(Inst)) {
5683 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5684 "op_sel operand conflicts with 16-bit operand suffix");
5685 return false;
5686 }
5687 if (!validateSOPLiteral(Inst, Operands))
5688 return false;
5689 if (!validateVOPLiteral(Inst, Operands)) {
5690 return false;
5691 }
5692 if (!validateConstantBusLimitations(Inst, Operands)) {
5693 return false;
5694 }
5695 if (!validateVOPD(Inst, Operands)) {
5696 return false;
5697 }
5698 if (!validateIntClampSupported(Inst)) {
5699 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5700 "integer clamping is not supported on this GPU");
5701 return false;
5702 }
5703 if (!validateOpSel(Inst)) {
5704 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5705 "invalid op_sel operand");
5706 return false;
5707 }
5708 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5709 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5710 "invalid neg_lo operand");
5711 return false;
5712 }
5713 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5714 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5715 "invalid neg_hi operand");
5716 return false;
5717 }
5718 if (!validateDPP(Inst, Operands)) {
5719 return false;
5720 }
5721 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5722 if (!validateMIMGD16(Inst)) {
5723 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5724 "d16 modifier is not supported on this GPU");
5725 return false;
5726 }
5727 if (!validateMIMGDim(Inst, Operands)) {
5728 Error(IDLoc, "missing dim operand");
5729 return false;
5730 }
5731 if (!validateTensorR128(Inst)) {
5732 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5733 "instruction must set modifier r128=0");
5734 return false;
5735 }
5736 if (!validateMIMGMSAA(Inst)) {
5737 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5738 "invalid dim; must be MSAA type");
5739 return false;
5740 }
5741 if (!validateMIMGDataSize(Inst, IDLoc)) {
5742 return false;
5743 }
5744 if (!validateMIMGAddrSize(Inst, IDLoc))
5745 return false;
5746 if (!validateMIMGAtomicDMask(Inst)) {
5747 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5748 "invalid atomic image dmask");
5749 return false;
5750 }
5751 if (!validateMIMGGatherDMask(Inst)) {
5752 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5753 "invalid image_gather dmask: only one bit must be set");
5754 return false;
5755 }
5756 if (!validateMovrels(Inst, Operands)) {
5757 return false;
5758 }
5759 if (!validateOffset(Inst, Operands)) {
5760 return false;
5761 }
5762 if (!validateMAIAccWrite(Inst, Operands)) {
5763 return false;
5764 }
5765 if (!validateMAISrc2(Inst, Operands)) {
5766 return false;
5767 }
5768 if (!validateMFMA(Inst, Operands)) {
5769 return false;
5770 }
5771 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5772 return false;
5773 }
5774
5775 if (!validateAGPRLdSt(Inst)) {
5776 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5777 ? "invalid register class: data and dst should be all VGPR or AGPR"
5778 : "invalid register class: agpr loads and stores not supported on this GPU"
5779 );
5780 return false;
5781 }
5782 if (!validateVGPRAlign(Inst)) {
5783 Error(IDLoc,
5784 "invalid register class: vgpr tuples must be 64 bit aligned");
5785 return false;
5786 }
5787 if (!validateDS(Inst, Operands)) {
5788 return false;
5789 }
5790
5791 if (!validateBLGP(Inst, Operands)) {
5792 return false;
5793 }
5794
5795 if (!validateDivScale(Inst)) {
5796 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5797 return false;
5798 }
5799 if (!validateWaitCnt(Inst, Operands)) {
5800 return false;
5801 }
5802 if (!validateTFE(Inst, Operands)) {
5803 return false;
5804 }
5805 if (!validateWMMA(Inst, Operands)) {
5806 return false;
5807 }
5808
5809 return true;
5810}
5811
5813 const FeatureBitset &FBS,
5814 unsigned VariantID = 0);
5815
5816static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5817 const FeatureBitset &AvailableFeatures,
5818 unsigned VariantID);
5819
5820bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5821 const FeatureBitset &FBS) {
5822 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5823}
5824
5825bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5826 const FeatureBitset &FBS,
5827 ArrayRef<unsigned> Variants) {
5828 for (auto Variant : Variants) {
5829 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5830 return true;
5831 }
5832
5833 return false;
5834}
5835
5836bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5837 SMLoc IDLoc) {
5838 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5839
5840 // Check if requested instruction variant is supported.
5841 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5842 return false;
5843
5844 // This instruction is not supported.
5845 // Clear any other pending errors because they are no longer relevant.
5846 getParser().clearPendingErrors();
5847
5848 // Requested instruction variant is not supported.
5849 // Check if any other variants are supported.
5850 StringRef VariantName = getMatchedVariantName();
5851 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5852 return Error(IDLoc,
5853 Twine(VariantName,
5854 " variant of this instruction is not supported"));
5855 }
5856
5857 // Check if this instruction may be used with a different wavesize.
5858 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5859 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5860 // FIXME: Use getAvailableFeatures, and do not manually recompute
5861 FeatureBitset FeaturesWS32 = getFeatureBits();
5862 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5863 .flip(AMDGPU::FeatureWavefrontSize32);
5864 FeatureBitset AvailableFeaturesWS32 =
5865 ComputeAvailableFeatures(FeaturesWS32);
5866
5867 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5868 return Error(IDLoc, "instruction requires wavesize=32");
5869 }
5870
5871 // Finally check if this instruction is supported on any other GPU.
5872 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5873 return Error(IDLoc, "instruction not supported on this GPU (" +
5874 getSTI().getCPU() + ")" + ": " + Mnemo);
5875 }
5876
5877 // Instruction not supported on any GPU. Probably a typo.
5878 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5879 return Error(IDLoc, "invalid instruction" + Suggestion);
5880}
5881
5882static bool isInvalidVOPDY(const OperandVector &Operands,
5883 uint64_t InvalidOprIdx) {
5884 assert(InvalidOprIdx < Operands.size());
5885 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5886 if (Op.isToken() && InvalidOprIdx > 1) {
5887 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5888 return PrevOp.isToken() && PrevOp.getToken() == "::";
5889 }
5890 return false;
5891}
5892
5893bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5894 OperandVector &Operands,
5895 MCStreamer &Out,
5896 uint64_t &ErrorInfo,
5897 bool MatchingInlineAsm) {
5898 MCInst Inst;
5899 Inst.setLoc(IDLoc);
5900 unsigned Result = Match_Success;
5901 for (auto Variant : getMatchedVariants()) {
5902 uint64_t EI;
5903 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5904 Variant);
5905 // We order match statuses from least to most specific. We use most specific
5906 // status as resulting
5907 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5908 if (R == Match_Success || R == Match_MissingFeature ||
5909 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5910 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5911 Result != Match_MissingFeature)) {
5912 Result = R;
5913 ErrorInfo = EI;
5914 }
5915 if (R == Match_Success)
5916 break;
5917 }
5918
5919 if (Result == Match_Success) {
5920 if (!validateInstruction(Inst, IDLoc, Operands)) {
5921 return true;
5922 }
5923 Out.emitInstruction(Inst, getSTI());
5924 return false;
5925 }
5926
5927 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5928 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5929 return true;
5930 }
5931
5932 switch (Result) {
5933 default: break;
5934 case Match_MissingFeature:
5935 // It has been verified that the specified instruction
5936 // mnemonic is valid. A match was found but it requires
5937 // features which are not supported on this GPU.
5938 return Error(IDLoc, "operands are not valid for this GPU or mode");
5939
5940 case Match_InvalidOperand: {
5941 SMLoc ErrorLoc = IDLoc;
5942 if (ErrorInfo != ~0ULL) {
5943 if (ErrorInfo >= Operands.size()) {
5944 return Error(IDLoc, "too few operands for instruction");
5945 }
5946 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5947 if (ErrorLoc == SMLoc())
5948 ErrorLoc = IDLoc;
5949
5950 if (isInvalidVOPDY(Operands, ErrorInfo))
5951 return Error(ErrorLoc, "invalid VOPDY instruction");
5952 }
5953 return Error(ErrorLoc, "invalid operand for instruction");
5954 }
5955
5956 case Match_MnemonicFail:
5957 llvm_unreachable("Invalid instructions should have been handled already");
5958 }
5959 llvm_unreachable("Implement any new match types added!");
5960}
5961
5962bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5963 int64_t Tmp = -1;
5964 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5965 return true;
5966 }
5967 if (getParser().parseAbsoluteExpression(Tmp)) {
5968 return true;
5969 }
5970 Ret = static_cast<uint32_t>(Tmp);
5971 return false;
5972}
5973
5974bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5975 if (!getSTI().getTargetTriple().isAMDGCN())
5976 return TokError("directive only supported for amdgcn architecture");
5977
5978 std::string TargetIDDirective;
5979 SMLoc TargetStart = getTok().getLoc();
5980 if (getParser().parseEscapedString(TargetIDDirective))
5981 return true;
5982
5983 std::optional<AMDGPU::TargetID> MaybeParsed =
5984 AMDGPU::TargetID::parseTargetIDString(TargetIDDirective);
5985 if (!MaybeParsed)
5986 return getParser().Error(TargetStart, "malformed target ID");
5987
5988 const AMDGPU::TargetID &ParsedTargetID = *MaybeParsed;
5989 const std::optional<AMDGPU::TargetID> &CurrentTargetID =
5990 getTargetStreamer().getTargetID();
5991
5992 if (*CurrentTargetID != ParsedTargetID) {
5993 return getParser().Error(
5994 TargetStart, Twine(".amdgcn_target directive's target id ") +
5995 Twine(ParsedTargetID.toString()) +
5996 Twine(" does not match the specified target id ") +
5997 Twine(CurrentTargetID->toString()));
5998 }
5999
6000 return false;
6001}
6002
6003bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
6004 return Error(Range.Start, "value out of range", Range);
6005}
6006
6007bool AMDGPUAsmParser::calculateGPRBlocks(
6008 const FeatureBitset &Features, const MCExpr *VCCUsed,
6009 const MCExpr *FlatScrUsed, bool XNACKUsed,
6010 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
6011 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
6012 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
6013 // TODO(scott.linder): These calculations are duplicated from
6014 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
6015 IsaVersion Version = getIsaVersion(getSTI().getCPU());
6016 MCContext &Ctx = getContext();
6017
6018 const MCExpr *NumSGPRs = NextFreeSGPR;
6019 int64_t EvaluatedSGPRs;
6020
6021 if (Version.Major >= 10)
6023 else {
6024 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(getSTI());
6025
6026 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
6027 !Features.test(FeatureSGPRInitBug) &&
6028 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
6029 return OutOfRangeError(SGPRRange);
6030
6031 const MCExpr *ExtraSGPRs =
6032 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
6033 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
6034
6035 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
6036 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
6037 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
6038 return OutOfRangeError(SGPRRange);
6039
6040 if (Features.test(FeatureSGPRInitBug))
6041 NumSGPRs =
6043 }
6044
6045 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
6046 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
6047 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
6048 unsigned Granule) -> const MCExpr * {
6049 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
6050 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
6051 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
6052 const MCExpr *AlignToGPR =
6053 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
6054 const MCExpr *DivGPR =
6055 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
6056 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
6057 return SubGPR;
6058 };
6059
6060 VGPRBlocks = GetNumGPRBlocks(
6061 NextFreeVGPR,
6062 IsaInfo::getVGPREncodingGranule(getSTI(), EnableWavefrontSize32));
6063 SGPRBlocks =
6064 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(getSTI()));
6065
6066 return false;
6067}
6068
6069bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
6070 if (!getSTI().getTargetTriple().isAMDGCN())
6071 return TokError("directive only supported for amdgcn architecture");
6072
6073 if (!isHsaAbi(getSTI()))
6074 return TokError("directive only supported for amdhsa OS");
6075
6076 StringRef KernelName;
6077 if (getParser().parseIdentifier(KernelName))
6078 return true;
6079
6080 AMDGPU::MCKernelDescriptor KD =
6082 &getSTI(), getContext());
6083
6084 StringSet<> Seen;
6085
6086 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
6087
6088 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
6089 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
6090
6091 SMRange VGPRRange;
6092 const MCExpr *NextFreeVGPR = ZeroExpr;
6093 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
6094 const MCExpr *NamedBarCnt = ZeroExpr;
6095 uint64_t SharedVGPRCount = 0;
6096 uint64_t PreloadLength = 0;
6097 uint64_t PreloadOffset = 0;
6098 SMRange SGPRRange;
6099 const MCExpr *NextFreeSGPR = ZeroExpr;
6100
6101 // Count the number of user SGPRs implied from the enabled feature bits.
6102 unsigned ImpliedUserSGPRCount = 0;
6103
6104 // Track if the asm explicitly contains the directive for the user SGPR
6105 // count.
6106 std::optional<unsigned> ExplicitUserSGPRCount;
6107 const MCExpr *ReserveVCC = OneExpr;
6108 const MCExpr *ReserveFlatScr = OneExpr;
6109 std::optional<bool> EnableWavefrontSize32;
6110
6111 while (true) {
6112 while (trySkipToken(AsmToken::EndOfStatement));
6113
6114 StringRef ID;
6115 SMRange IDRange = getTok().getLocRange();
6116 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
6117 return true;
6118
6119 if (ID == ".end_amdhsa_kernel")
6120 break;
6121
6122 if (!Seen.insert(ID).second)
6123 return TokError(".amdhsa_ directives cannot be repeated");
6124
6125 SMLoc ValStart = getLoc();
6126 const MCExpr *ExprVal;
6127 if (getParser().parseExpression(ExprVal))
6128 return true;
6129 SMLoc ValEnd = getLoc();
6130 SMRange ValRange = SMRange(ValStart, ValEnd);
6131
6132 int64_t IVal = 0;
6133 uint64_t Val = IVal;
6134 bool EvaluatableExpr;
6135 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6136 if (IVal < 0)
6137 return OutOfRangeError(ValRange);
6138 Val = IVal;
6139 }
6140
6141#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6142 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6143 return OutOfRangeError(RANGE); \
6144 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6145 getContext());
6146
6147// Some fields use the parsed value immediately which requires the expression to
6148// be solvable.
6149#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6150 if (!(RESOLVED)) \
6151 return Error(IDRange.Start, "directive should have resolvable expression", \
6152 IDRange);
6153
6154 if (ID == ".amdhsa_group_segment_fixed_size") {
6156 CHAR_BIT>(Val))
6157 return OutOfRangeError(ValRange);
6158 KD.group_segment_fixed_size = ExprVal;
6159 } else if (ID == ".amdhsa_private_segment_fixed_size") {
6161 CHAR_BIT>(Val))
6162 return OutOfRangeError(ValRange);
6163 KD.private_segment_fixed_size = ExprVal;
6164 } else if (ID == ".amdhsa_kernarg_size") {
6165 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
6166 return OutOfRangeError(ValRange);
6167 KD.kernarg_size = ExprVal;
6168 } else if (ID == ".amdhsa_user_sgpr_count") {
6169 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6170 ExplicitUserSGPRCount = Val;
6171 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6172 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6174 return Error(IDRange.Start,
6175 "directive is not supported with architected flat scratch",
6176 IDRange);
6178 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6179 ExprVal, ValRange);
6180 if (Val)
6181 ImpliedUserSGPRCount += 4;
6182 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6183 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6184 if (!hasKernargPreload())
6185 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6186
6187 if (Val > getMaxNumUserSGPRs())
6188 return OutOfRangeError(ValRange);
6189 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6190 ValRange);
6191 if (Val) {
6192 ImpliedUserSGPRCount += Val;
6193 PreloadLength = Val;
6194 }
6195 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6196 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6197 if (!hasKernargPreload())
6198 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6199
6200 if (Val >= 1024)
6201 return OutOfRangeError(ValRange);
6202 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6203 ValRange);
6204 if (Val)
6205 PreloadOffset = Val;
6206 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6207 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6209 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6210 ValRange);
6211 if (Val)
6212 ImpliedUserSGPRCount += 2;
6213 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6214 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6216 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6217 ValRange);
6218 if (Val)
6219 ImpliedUserSGPRCount += 2;
6220 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6221 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6223 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6224 ExprVal, ValRange);
6225 if (Val)
6226 ImpliedUserSGPRCount += 2;
6227 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6228 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6230 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6231 ValRange);
6232 if (Val)
6233 ImpliedUserSGPRCount += 2;
6234 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6236 return Error(IDRange.Start,
6237 "directive is not supported with architected flat scratch",
6238 IDRange);
6239 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6241 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6242 ExprVal, ValRange);
6243 if (Val)
6244 ImpliedUserSGPRCount += 2;
6245 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6246 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6248 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6249 ExprVal, ValRange);
6250 if (Val)
6251 ImpliedUserSGPRCount += 1;
6252 } else if (ID == ".amdhsa_wavefront_size32") {
6253 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6254 if (IVersion.Major < 10)
6255 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6256 EnableWavefrontSize32 = Val;
6258 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6259 ValRange);
6260 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6262 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6263 ValRange);
6264 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6266 return Error(IDRange.Start,
6267 "directive is not supported with architected flat scratch",
6268 IDRange);
6270 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6271 ValRange);
6272 } else if (ID == ".amdhsa_enable_private_segment") {
6274 return Error(
6275 IDRange.Start,
6276 "directive is not supported without architected flat scratch",
6277 IDRange);
6279 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6280 ValRange);
6281 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6283 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6284 ValRange);
6285 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6287 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6288 ValRange);
6289 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6291 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6292 ValRange);
6293 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6295 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6296 ValRange);
6297 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6299 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6300 ValRange);
6301 } else if (ID == ".amdhsa_next_free_vgpr") {
6302 VGPRRange = ValRange;
6303 NextFreeVGPR = ExprVal;
6304 } else if (ID == ".amdhsa_next_free_sgpr") {
6305 SGPRRange = ValRange;
6306 NextFreeSGPR = ExprVal;
6307 } else if (ID == ".amdhsa_accum_offset") {
6308 if (!isGFX90A())
6309 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6310 AccumOffset = ExprVal;
6311 } else if (ID == ".amdhsa_named_barrier_count") {
6312 if (!isGFX1250Plus())
6313 return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
6314 NamedBarCnt = ExprVal;
6315 } else if (ID == ".amdhsa_reserve_vcc") {
6316 if (EvaluatableExpr && !isUInt<1>(Val))
6317 return OutOfRangeError(ValRange);
6318 ReserveVCC = ExprVal;
6319 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6320 if (IVersion.Major < 7)
6321 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6323 return Error(IDRange.Start,
6324 "directive is not supported with architected flat scratch",
6325 IDRange);
6326 if (EvaluatableExpr && !isUInt<1>(Val))
6327 return OutOfRangeError(ValRange);
6328 ReserveFlatScr = ExprVal;
6329 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6330 if (IVersion.Major < 8)
6331 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6332 if (!isUInt<1>(Val))
6333 return OutOfRangeError(ValRange);
6334 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6335 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6336 IDRange);
6337 } else if (ID == ".amdhsa_float_round_mode_32") {
6339 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6340 ValRange);
6341 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6343 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6344 ValRange);
6345 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6347 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6348 ValRange);
6349 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6351 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6352 ValRange);
6353 } else if (ID == ".amdhsa_dx10_clamp") {
6354 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6355 return Error(IDRange.Start, "directive unsupported on gfx1170+",
6356 IDRange);
6358 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6359 ValRange);
6360 } else if (ID == ".amdhsa_ieee_mode") {
6361 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6362 return Error(IDRange.Start, "directive unsupported on gfx1170+",
6363 IDRange);
6365 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6366 ValRange);
6367 } else if (ID == ".amdhsa_fp16_overflow") {
6368 if (IVersion.Major < 9)
6369 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6371 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6372 ValRange);
6373 } else if (ID == ".amdhsa_tg_split") {
6374 if (!isGFX90A())
6375 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6376 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6377 ExprVal, ValRange);
6378 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6379 if (!supportsWGP(getSTI()))
6380 return Error(IDRange.Start,
6381 "directive unsupported on " + getSTI().getCPU(), IDRange);
6383 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6384 ValRange);
6385 } else if (ID == ".amdhsa_memory_ordered") {
6386 if (IVersion.Major < 10)
6387 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6389 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6390 ValRange);
6391 } else if (ID == ".amdhsa_forward_progress") {
6392 if (IVersion.Major < 10)
6393 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6395 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6396 ValRange);
6397 } else if (ID == ".amdhsa_shared_vgpr_count") {
6398 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6399 if (IVersion.Major < 10 || IVersion.Major >= 12)
6400 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6401 IDRange);
6402 SharedVGPRCount = Val;
6404 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6405 ValRange);
6406 } else if (ID == ".amdhsa_inst_pref_size") {
6407 if (IVersion.Major < 11)
6408 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6409 if (IVersion.Major == 11) {
6411 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6412 ValRange);
6413 } else {
6415 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6416 ValRange);
6417 }
6418 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6421 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6422 ExprVal, ValRange);
6423 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6425 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6426 ExprVal, ValRange);
6427 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6430 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6431 ExprVal, ValRange);
6432 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6434 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6435 ExprVal, ValRange);
6436 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6438 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6439 ExprVal, ValRange);
6440 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6442 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6443 ExprVal, ValRange);
6444 } else if (ID == ".amdhsa_exception_int_div_zero") {
6446 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6447 ExprVal, ValRange);
6448 } else if (ID == ".amdhsa_round_robin_scheduling") {
6449 if (IVersion.Major < 12)
6450 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6452 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6453 ValRange);
6454 } else {
6455 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6456 }
6457
6458#undef PARSE_BITS_ENTRY
6459 }
6460
6461 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6462 return TokError(".amdhsa_next_free_vgpr directive is required");
6463
6464 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6465 return TokError(".amdhsa_next_free_sgpr directive is required");
6466
6467 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6468 if (UserSGPRCount > getMaxNumUserSGPRs())
6469 return TokError("too many user SGPRs enabled, found " +
6470 Twine(UserSGPRCount) + ", but only " +
6471 Twine(getMaxNumUserSGPRs()) + " are supported.");
6472
6473 // Consider the case where the total number of UserSGPRs with trailing
6474 // allocated preload SGPRs, is greater than the number of explicitly
6475 // referenced SGPRs.
6476 if (PreloadLength) {
6477 MCContext &Ctx = getContext();
6478 NextFreeSGPR = AMDGPUMCExpr::createMax(
6479 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6480 }
6481
6482 const MCExpr *VGPRBlocks;
6483 const MCExpr *SGPRBlocks;
6484 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6485 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6486 EnableWavefrontSize32, NextFreeVGPR,
6487 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6488 SGPRBlocks))
6489 return true;
6490
6491 int64_t EvaluatedVGPRBlocks;
6492 bool VGPRBlocksEvaluatable =
6493 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6494 if (VGPRBlocksEvaluatable &&
6496 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6497 return OutOfRangeError(VGPRRange);
6498 }
6500 KD.compute_pgm_rsrc1, VGPRBlocks,
6501 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6502 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6503
6504 int64_t EvaluatedSGPRBlocks;
6505 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6507 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6508 return OutOfRangeError(SGPRRange);
6510 KD.compute_pgm_rsrc1, SGPRBlocks,
6511 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6512 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6513
6514 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6515 return TokError("amdgpu_user_sgpr_count smaller than implied by "
6516 "enabled user SGPRs");
6517
6518 if (isGFX1250Plus()) {
6521 MCConstantExpr::create(UserSGPRCount, getContext()),
6522 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6523 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6524 } else {
6527 MCConstantExpr::create(UserSGPRCount, getContext()),
6528 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6529 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6530 }
6531
6532 int64_t IVal = 0;
6533 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6534 return TokError("Kernarg size should be resolvable");
6535 uint64_t kernarg_size = IVal;
6536 if (PreloadLength && kernarg_size &&
6537 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6538 return TokError("Kernarg preload length + offset is larger than the "
6539 "kernarg segment size");
6540
6541 if (isGFX90A()) {
6542 if (!Seen.contains(".amdhsa_accum_offset"))
6543 return TokError(".amdhsa_accum_offset directive is required");
6544 int64_t EvaluatedAccum;
6545 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6546 uint64_t UEvaluatedAccum = EvaluatedAccum;
6547 if (AccumEvaluatable &&
6548 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6549 return TokError("accum_offset should be in range [4..256] in "
6550 "increments of 4");
6551
6552 int64_t EvaluatedNumVGPR;
6553 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6554 AccumEvaluatable &&
6555 UEvaluatedAccum >
6556 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6557 return TokError("accum_offset exceeds total VGPR allocation");
6558 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6560 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6563 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6564 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6565 getContext());
6566 }
6567
6568 if (isGFX1250Plus())
6570 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6571 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6572 getContext());
6573
6574 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6575 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6576 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6577 return TokError("shared_vgpr_count directive not valid on "
6578 "wavefront size 32");
6579 }
6580
6581 if (VGPRBlocksEvaluatable &&
6582 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6583 63)) {
6584 return TokError("shared_vgpr_count*2 + "
6585 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6586 "exceed 63\n");
6587 }
6588 }
6589
6590 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6591 NextFreeVGPR, NextFreeSGPR,
6592 ReserveVCC, ReserveFlatScr);
6593 return false;
6594}
6595
6596bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6597 uint32_t Version;
6598 if (ParseAsAbsoluteExpression(Version))
6599 return true;
6600
6601 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6602 return false;
6603}
6604
6605bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6606 AMDGPUMCKernelCodeT &C) {
6607 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6608 // assembly for backwards compatibility.
6609 if (ID == "max_scratch_backing_memory_byte_size") {
6610 Parser.eatToEndOfStatement();
6611 return false;
6612 }
6613
6614 SmallString<40> ErrStr;
6615 raw_svector_ostream Err(ErrStr);
6616 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6617 return TokError(Err.str());
6618 }
6619 Lex();
6620
6621 if (ID == "enable_wavefront_size32") {
6622 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6623 if (!isGFX10Plus())
6624 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6625 if (!isWave32())
6626 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6627 } else {
6628 if (!isWave64())
6629 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6630 }
6631 }
6632
6633 if (ID == "wavefront_size") {
6634 if (C.wavefront_size == 5) {
6635 if (!isGFX10Plus())
6636 return TokError("wavefront_size=5 is only allowed on GFX10+");
6637 if (!isWave32())
6638 return TokError("wavefront_size=5 requires +WavefrontSize32");
6639 } else if (C.wavefront_size == 6) {
6640 if (!isWave64())
6641 return TokError("wavefront_size=6 requires +WavefrontSize64");
6642 }
6643 }
6644
6645 return false;
6646}
6647
6648bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6649 AMDGPUMCKernelCodeT KernelCode;
6650 KernelCode.initDefault(getSTI(), getContext());
6651
6652 while (true) {
6653 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6654 // will set the current token to EndOfStatement.
6655 while(trySkipToken(AsmToken::EndOfStatement));
6656
6657 StringRef ID;
6658 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6659 return true;
6660
6661 if (ID == ".end_amd_kernel_code_t")
6662 break;
6663
6664 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6665 return true;
6666 }
6667
6668 KernelCode.validate(&getSTI(), getContext());
6669 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6670
6671 return false;
6672}
6673
6674bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6675 StringRef KernelName;
6676 if (!parseId(KernelName, "expected symbol name"))
6677 return true;
6678
6679 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6681
6682 KernelScope.initialize(getContext());
6683 return false;
6684}
6685
6686bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6687 if (!getSTI().getTargetTriple().isAMDGCN()) {
6688 return Error(getLoc(),
6689 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6690 "architectures");
6691 }
6692
6693 StringRef TargetIDDirective = getLexer().getTok().getStringContents();
6694
6695 std::optional<AMDGPU::TargetID> MaybeParsed =
6696 AMDGPU::TargetID::parseTargetIDString(TargetIDDirective);
6697 if (!MaybeParsed)
6698 return Error(getParser().getTok().getLoc(), "malformed target id");
6699
6700 const AMDGPU::TargetID &ParsedTargetID = *MaybeParsed;
6701 const std::optional<AMDGPU::TargetID> &CurrentTargetID =
6702 getTargetStreamer().getTargetID();
6703
6704 if (*CurrentTargetID != ParsedTargetID) {
6705 return Error(getParser().getTok().getLoc(),
6706 Twine(".amd_amdgpu_isa directive's target id ") +
6707 Twine(ParsedTargetID.toString()) +
6708 Twine(" does not match the specified target id ") +
6709 Twine(CurrentTargetID->toString()));
6710 }
6711
6712 getTargetStreamer().EmitISAVersion();
6713 Lex();
6714
6715 return false;
6716}
6717
6718bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6719 assert(isHsaAbi(getSTI()));
6720
6721 std::string HSAMetadataString;
6722 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6723 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6724 return true;
6725
6726 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6727 return Error(getLoc(), "invalid HSA metadata");
6728
6729 return false;
6730}
6731
6732/// Common code to parse out a block of text (typically YAML) between start and
6733/// end directives.
6734bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6735 const char *AssemblerDirectiveEnd,
6736 std::string &CollectString) {
6737
6738 raw_string_ostream CollectStream(CollectString);
6739
6740 getLexer().setSkipSpace(false);
6741
6742 bool FoundEnd = false;
6743 while (!isToken(AsmToken::Eof)) {
6744 while (isToken(AsmToken::Space)) {
6745 CollectStream << getTokenStr();
6746 Lex();
6747 }
6748
6749 if (trySkipId(AssemblerDirectiveEnd)) {
6750 FoundEnd = true;
6751 break;
6752 }
6753
6754 CollectStream << Parser.parseStringToEndOfStatement()
6755 << getContext().getAsmInfo().getSeparatorString();
6756
6757 Parser.eatToEndOfStatement();
6758 }
6759
6760 getLexer().setSkipSpace(true);
6761
6762 if (isToken(AsmToken::Eof) && !FoundEnd) {
6763 return TokError(Twine("expected directive ") +
6764 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6765 }
6766
6767 return false;
6768}
6769
6770/// Parse the assembler directive for new MsgPack-format PAL metadata.
6771bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6772 std::string String;
6773 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6775 return true;
6776
6777 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6778 if (!PALMetadata->setFromString(String))
6779 return Error(getLoc(), "invalid PAL metadata");
6780 return false;
6781}
6782
6783/// Parse the assembler directive for old linear-format PAL metadata.
6784bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6785 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6786 return Error(getLoc(),
6787 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6788 "not available on non-amdpal OSes")).str());
6789 }
6790
6791 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6792 PALMetadata->setLegacy();
6793 for (;;) {
6794 uint32_t Key, Value;
6795 if (ParseAsAbsoluteExpression(Key)) {
6796 return TokError(Twine("invalid value in ") +
6798 }
6799 if (!trySkipToken(AsmToken::Comma)) {
6800 return TokError(Twine("expected an even number of values in ") +
6802 }
6803 if (ParseAsAbsoluteExpression(Value)) {
6804 return TokError(Twine("invalid value in ") +
6806 }
6807 PALMetadata->setRegister(Key, Value);
6808 if (!trySkipToken(AsmToken::Comma))
6809 break;
6810 }
6811 return false;
6812}
6813
6814/// ParseDirectiveAMDGPULDS
6815/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6816bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6817 if (getParser().checkForValidSection())
6818 return true;
6819
6820 StringRef Name;
6821 SMLoc NameLoc = getLoc();
6822 if (getParser().parseIdentifier(Name))
6823 return TokError("expected identifier in directive");
6824
6825 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6826 if (getParser().parseComma())
6827 return true;
6828
6829 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(getSTI());
6830
6831 int64_t Size;
6832 SMLoc SizeLoc = getLoc();
6833 if (getParser().parseAbsoluteExpression(Size))
6834 return true;
6835 if (Size < 0)
6836 return Error(SizeLoc, "size must be non-negative");
6837 if (Size > LocalMemorySize)
6838 return Error(SizeLoc, "size is too large");
6839
6840 int64_t Alignment = 4;
6841 if (trySkipToken(AsmToken::Comma)) {
6842 SMLoc AlignLoc = getLoc();
6843 if (getParser().parseAbsoluteExpression(Alignment))
6844 return true;
6845 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6846 return Error(AlignLoc, "alignment must be a power of two");
6847
6848 // Alignment larger than the size of LDS is possible in theory, as long
6849 // as the linker manages to place to symbol at address 0, but we do want
6850 // to make sure the alignment fits nicely into a 32-bit integer.
6851 if (Alignment >= 1u << 31)
6852 return Error(AlignLoc, "alignment is too large");
6853 }
6854
6855 if (parseEOL())
6856 return true;
6857
6858 Symbol->redefineIfPossible();
6859 if (!Symbol->isUndefined())
6860 return Error(NameLoc, "invalid symbol redefinition");
6861
6862 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6863 return false;
6864}
6865
6866bool AMDGPUAsmParser::ParseDirectiveAMDGPUInfo() {
6867 if (getParser().checkForValidSection())
6868 return true;
6869
6870 StringRef FuncName;
6871 if (getParser().parseIdentifier(FuncName))
6872 return TokError("expected symbol name after .amdgpu_info");
6873
6874 MCSymbol *FuncSym = getContext().getOrCreateSymbol(FuncName);
6875 AMDGPU::InfoSectionData ParsedInfoData;
6876 AMDGPU::FuncInfo FI;
6877 FI.Sym = FuncSym;
6878 bool HasScalarAttrs = false;
6879
6880 while (true) {
6881 while (trySkipToken(AsmToken::EndOfStatement))
6882 ;
6883
6884 StringRef ID;
6885 SMLoc IDLoc = getLoc();
6886 if (!parseId(ID, "expected directive or .end_amdgpu_info"))
6887 return true;
6888
6889 if (ID == ".end_amdgpu_info")
6890 break;
6891
6892 // Every per-entry directive shares the `.amdgpu_` namespace prefix; strip
6893 // it once and dispatch on the distinguishing suffix below. The unstripped
6894 // ID is preserved for diagnostics.
6895 StringRef Dir = ID;
6896 if (!Dir.consume_front(".amdgpu_"))
6897 return Error(IDLoc, "unknown .amdgpu_info directive '" + ID + "'");
6898
6899 if (Dir == "flags") {
6900 int64_t Val;
6901 if (getParser().parseAbsoluteExpression(Val))
6902 return true;
6903 auto Flags = static_cast<AMDGPU::FuncInfoFlags>(Val);
6904 FI.UsesVCC = !!(Flags & AMDGPU::FuncInfoFlags::FUNC_USES_VCC);
6905 FI.UsesFlatScratch =
6906 !!(Flags & AMDGPU::FuncInfoFlags::FUNC_USES_FLAT_SCRATCH);
6907 FI.HasDynStack = !!(Flags & AMDGPU::FuncInfoFlags::FUNC_HAS_DYN_STACK);
6908 HasScalarAttrs = true;
6909 } else if (Dir == "num_sgpr") {
6910 int64_t Val;
6911 if (getParser().parseAbsoluteExpression(Val))
6912 return true;
6913 FI.NumSGPR = static_cast<uint32_t>(Val);
6914 HasScalarAttrs = true;
6915 } else if (Dir == "num_vgpr") {
6916 int64_t Val;
6917 if (getParser().parseAbsoluteExpression(Val))
6918 return true;
6919 FI.NumArchVGPR = static_cast<uint32_t>(Val);
6920 HasScalarAttrs = true;
6921 } else if (Dir == "num_agpr") {
6922 int64_t Val;
6923 if (getParser().parseAbsoluteExpression(Val))
6924 return true;
6925 FI.NumAccVGPR = static_cast<uint32_t>(Val);
6926 HasScalarAttrs = true;
6927 } else if (Dir == "private_segment_size") {
6928 int64_t Val;
6929 if (getParser().parseAbsoluteExpression(Val))
6930 return true;
6931 FI.PrivateSegmentSize = static_cast<uint32_t>(Val);
6932 HasScalarAttrs = true;
6933 } else if (Dir == "use") {
6934 StringRef ResName;
6935 if (getParser().parseIdentifier(ResName))
6936 return TokError("expected resource symbol for .amdgpu_use");
6937 ParsedInfoData.Uses.push_back(
6938 {FuncSym, getContext().getOrCreateSymbol(ResName)});
6939 } else if (Dir == "call") {
6940 StringRef DstName;
6941 if (getParser().parseIdentifier(DstName))
6942 return TokError("expected callee symbol for .amdgpu_call");
6943 ParsedInfoData.Calls.push_back(
6944 {FuncSym, getContext().getOrCreateSymbol(DstName)});
6945 } else if (Dir == "indirect_call") {
6946 std::string TypeId;
6947 if (getParser().parseEscapedString(TypeId))
6948 return TokError("expected type ID string for .amdgpu_indirect_call");
6949 ParsedInfoData.IndirectCalls.push_back({FuncSym, std::move(TypeId)});
6950 } else if (Dir == "typeid") {
6951 std::string TypeId;
6952 if (getParser().parseEscapedString(TypeId))
6953 return TokError("expected type ID string for .amdgpu_typeid");
6954 ParsedInfoData.TypeIds.push_back({FuncSym, std::move(TypeId)});
6955 } else {
6956 return Error(IDLoc, "unknown .amdgpu_info directive '" + ID + "'");
6957 }
6958 }
6959
6960 if (HasScalarAttrs)
6961 ParsedInfoData.Funcs.push_back(std::move(FI));
6962
6963 AMDGPU::InfoSectionData &Data = InfoData ? *InfoData : InfoData.emplace();
6964 for (AMDGPU::FuncInfo &Func : ParsedInfoData.Funcs)
6965 Data.Funcs.push_back(std::move(Func));
6966 for (std::pair<MCSymbol *, MCSymbol *> &Use : ParsedInfoData.Uses)
6967 Data.Uses.push_back(Use);
6968 for (std::pair<MCSymbol *, MCSymbol *> &Call : ParsedInfoData.Calls)
6969 Data.Calls.push_back(Call);
6970 for (std::pair<MCSymbol *, std::string> &IndirectCall :
6971 ParsedInfoData.IndirectCalls)
6972 Data.IndirectCalls.push_back(std::move(IndirectCall));
6973 for (std::pair<MCSymbol *, std::string> &TypeId : ParsedInfoData.TypeIds)
6974 Data.TypeIds.push_back(std::move(TypeId));
6975
6976 return false;
6977}
6978
6979void AMDGPUAsmParser::onEndOfFile() {
6980 if (InfoData)
6981 getTargetStreamer().emitAMDGPUInfo(*InfoData);
6982}
6983
6984bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6985 StringRef IDVal = DirectiveID.getString();
6986
6987 if (isHsaAbi(getSTI())) {
6988 if (IDVal == ".amdhsa_kernel")
6989 return ParseDirectiveAMDHSAKernel();
6990
6991 if (IDVal == ".amdhsa_code_object_version")
6992 return ParseDirectiveAMDHSACodeObjectVersion();
6993
6994 // TODO: Restructure/combine with PAL metadata directive.
6996 return ParseDirectiveHSAMetadata();
6997 } else {
6998 if (IDVal == ".amd_kernel_code_t")
6999 return ParseDirectiveAMDKernelCodeT();
7000
7001 if (IDVal == ".amdgpu_hsa_kernel")
7002 return ParseDirectiveAMDGPUHsaKernel();
7003
7004 if (IDVal == ".amd_amdgpu_isa")
7005 return ParseDirectiveISAVersion();
7006
7008 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
7009 Twine(" directive is "
7010 "not available on non-amdhsa OSes"))
7011 .str());
7012 }
7013 }
7014
7015 if (IDVal == ".amdgcn_target")
7016 return ParseDirectiveAMDGCNTarget();
7017
7018 if (IDVal == ".amdgpu_lds")
7019 return ParseDirectiveAMDGPULDS();
7020
7021 if (IDVal == ".amdgpu_info")
7022 return ParseDirectiveAMDGPUInfo();
7023
7024 if (IDVal == PALMD::AssemblerDirectiveBegin)
7025 return ParseDirectivePALMetadataBegin();
7026
7027 if (IDVal == PALMD::AssemblerDirective)
7028 return ParseDirectivePALMetadata();
7029
7030 return true;
7031}
7032
7033bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
7034 MCRegister Reg) {
7035 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
7036 return isGFX9Plus();
7037
7038 // GFX10+ has 2 more SGPRs 104 and 105.
7039 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
7040 return hasSGPR104_SGPR105();
7041
7042 switch (Reg.id()) {
7043 case SRC_SHARED_BASE_LO:
7044 case SRC_SHARED_BASE:
7045 case SRC_SHARED_LIMIT_LO:
7046 case SRC_SHARED_LIMIT:
7047 case SRC_PRIVATE_BASE_LO:
7048 case SRC_PRIVATE_BASE:
7049 case SRC_PRIVATE_LIMIT_LO:
7050 case SRC_PRIVATE_LIMIT:
7051 return isGFX9Plus();
7052 case SRC_FLAT_SCRATCH_BASE_LO:
7053 case SRC_FLAT_SCRATCH_BASE_HI:
7054 return hasGloballyAddressableScratch();
7055 case SRC_POPS_EXITING_WAVE_ID:
7056 return isGFX9Plus() && !isGFX11Plus();
7057 case TBA:
7058 case TBA_LO:
7059 case TBA_HI:
7060 case TMA:
7061 case TMA_LO:
7062 case TMA_HI:
7063 return !isGFX9Plus();
7064 case XNACK_MASK:
7065 case XNACK_MASK_LO:
7066 case XNACK_MASK_HI:
7067 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
7068 case SGPR_NULL:
7069 return isGFX10Plus();
7070 case SRC_EXECZ:
7071 case SRC_VCCZ:
7072 return !isGFX11Plus();
7073 default:
7074 break;
7075 }
7076
7077 if (isCI())
7078 return true;
7079
7080 if (isSI() || isGFX10Plus()) {
7081 // No flat_scr on SI.
7082 // On GFX10Plus flat scratch is not a valid register operand and can only be
7083 // accessed with s_setreg/s_getreg.
7084 switch (Reg.id()) {
7085 case FLAT_SCR:
7086 case FLAT_SCR_LO:
7087 case FLAT_SCR_HI:
7088 return false;
7089 default:
7090 return true;
7091 }
7092 }
7093
7094 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
7095 // SI/CI have.
7096 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
7097 return hasSGPR102_SGPR103();
7098
7099 return true;
7100}
7101
7102ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
7103 StringRef Mnemonic,
7104 OperandMode Mode) {
7105 ParseStatus Res = parseVOPD(Operands);
7106 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
7107 return Res;
7108
7109 // Try to parse with a custom parser
7110 Res = MatchOperandParserImpl(Operands, Mnemonic);
7111
7112 // If we successfully parsed the operand or if there as an error parsing,
7113 // we are done.
7114 //
7115 // If we are parsing after we reach EndOfStatement then this means we
7116 // are appending default values to the Operands list. This is only done
7117 // by custom parser, so we shouldn't continue on to the generic parsing.
7118 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
7119 return Res;
7120
7121 SMLoc RBraceLoc;
7122 SMLoc LBraceLoc = getLoc();
7123 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
7124 unsigned Prefix = Operands.size();
7125
7126 for (;;) {
7127 auto Loc = getLoc();
7128 Res = parseReg(Operands);
7129 if (Res.isNoMatch())
7130 Error(Loc, "expected a register");
7131 if (!Res.isSuccess())
7132 return ParseStatus::Failure;
7133
7134 RBraceLoc = getLoc();
7135 if (trySkipToken(AsmToken::RBrac))
7136 break;
7137
7138 if (!skipToken(AsmToken::Comma,
7139 "expected a comma or a closing square bracket"))
7140 return ParseStatus::Failure;
7141 }
7142
7143 if (Operands.size() - Prefix > 1) {
7144 Operands.insert(Operands.begin() + Prefix,
7145 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
7146 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
7147 }
7148
7149 return ParseStatus::Success;
7150 }
7151
7152 return parseRegOrImm(Operands);
7153}
7154
7155StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
7156 // Clear any forced encodings from the previous instruction.
7157 setForcedEncodingSize(0);
7158 setForcedDPP(false);
7159 setForcedSDWA(false);
7160
7161 if (Name.consume_back("_e64_dpp")) {
7162 setForcedDPP(true);
7163 setForcedEncodingSize(64);
7164 return Name;
7165 }
7166 if (Name.consume_back("_e64")) {
7167 setForcedEncodingSize(64);
7168 return Name;
7169 }
7170 if (Name.consume_back("_e32")) {
7171 setForcedEncodingSize(32);
7172 return Name;
7173 }
7174 if (Name.consume_back("_dpp")) {
7175 setForcedDPP(true);
7176 return Name;
7177 }
7178 if (Name.consume_back("_sdwa")) {
7179 setForcedSDWA(true);
7180 return Name;
7181 }
7182 return Name;
7183}
7184
7185static void applyMnemonicAliases(StringRef &Mnemonic,
7186 const FeatureBitset &Features,
7187 unsigned VariantID);
7188
7189bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
7190 StringRef Name, SMLoc NameLoc,
7191 OperandVector &Operands) {
7192 // Add the instruction mnemonic
7193 Name = parseMnemonicSuffix(Name);
7194
7195 // If the target architecture uses MnemonicAlias, call it here to parse
7196 // operands correctly.
7197 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
7198
7199 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
7200
7201 bool IsMIMG = Name.starts_with("image_");
7202
7203 while (!trySkipToken(AsmToken::EndOfStatement)) {
7204 OperandMode Mode = OperandMode_Default;
7205 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
7206 Mode = OperandMode_NSA;
7207 ParseStatus Res = parseOperand(Operands, Name, Mode);
7208
7209 if (!Res.isSuccess()) {
7210 checkUnsupportedInstruction(Name, NameLoc);
7211 if (!Parser.hasPendingError()) {
7212 // FIXME: use real operand location rather than the current location.
7213 StringRef Msg = Res.isFailure() ? "failed parsing operand."
7214 : "not a valid operand.";
7215 Error(getLoc(), Msg);
7216 }
7217 while (!trySkipToken(AsmToken::EndOfStatement)) {
7218 lex();
7219 }
7220 return true;
7221 }
7222
7223 // Eat the comma or space if there is one.
7224 trySkipToken(AsmToken::Comma);
7225 }
7226
7227 return false;
7228}
7229
7230//===----------------------------------------------------------------------===//
7231// Utility functions
7232//===----------------------------------------------------------------------===//
7233
7234ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
7235 OperandVector &Operands) {
7236 SMLoc S = getLoc();
7237 if (!trySkipId(Name))
7238 return ParseStatus::NoMatch;
7239
7240 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
7241 return ParseStatus::Success;
7242}
7243
7244ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
7245 int64_t &IntVal) {
7246
7247 if (!trySkipId(Prefix, AsmToken::Colon))
7248 return ParseStatus::NoMatch;
7249
7251}
7252
7253ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
7254 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7255 std::function<bool(int64_t &)> ConvertResult) {
7256 SMLoc S = getLoc();
7257 int64_t Value = 0;
7258
7259 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
7260 if (!Res.isSuccess())
7261 return Res;
7262
7263 if (ConvertResult && !ConvertResult(Value)) {
7264 Error(S, "invalid " + StringRef(Prefix) + " value.");
7265 }
7266
7267 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
7268 return ParseStatus::Success;
7269}
7270
7271ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7272 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7273 bool (*ConvertResult)(int64_t &)) {
7274 SMLoc S = getLoc();
7275 if (!trySkipId(Prefix, AsmToken::Colon))
7276 return ParseStatus::NoMatch;
7277
7278 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
7279 return ParseStatus::Failure;
7280
7281 unsigned Val = 0;
7282 const unsigned MaxSize = 4;
7283
7284 // FIXME: How to verify the number of elements matches the number of src
7285 // operands?
7286 for (int I = 0; ; ++I) {
7287 int64_t Op;
7288 SMLoc Loc = getLoc();
7289 if (!parseExpr(Op))
7290 return ParseStatus::Failure;
7291
7292 if (Op != 0 && Op != 1)
7293 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
7294
7295 Val |= (Op << I);
7296
7297 if (trySkipToken(AsmToken::RBrac))
7298 break;
7299
7300 if (I + 1 == MaxSize)
7301 return Error(getLoc(), "expected a closing square bracket");
7302
7303 if (!skipToken(AsmToken::Comma, "expected a comma"))
7304 return ParseStatus::Failure;
7305 }
7306
7307 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
7308 return ParseStatus::Success;
7309}
7310
7311ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7312 OperandVector &Operands,
7313 AMDGPUOperand::ImmTy ImmTy,
7314 bool IgnoreNegative) {
7315 int64_t Bit;
7316 SMLoc S = getLoc();
7317
7318 if (trySkipId(Name)) {
7319 Bit = 1;
7320 } else if (trySkipId("no", Name)) {
7321 if (IgnoreNegative)
7322 return ParseStatus::Success;
7323 Bit = 0;
7324 } else {
7325 return ParseStatus::NoMatch;
7326 }
7327
7328 if (Name == "r128" && !hasMIMG_R128())
7329 return Error(S, "r128 modifier is not supported on this GPU");
7330 if (Name == "a16" && !hasA16())
7331 return Error(S, "a16 modifier is not supported on this GPU");
7332
7333 if (Bit == 0 && Name == "gds") {
7334 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7335 if (Mnemo.starts_with("ds_gws"))
7336 return Error(S, "nogds is not allowed");
7337 }
7338
7339 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7340 ImmTy = AMDGPUOperand::ImmTyR128A16;
7341
7342 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
7343 return ParseStatus::Success;
7344}
7345
7346unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7347 bool &Disabling) const {
7348 Disabling = Id.consume_front("no");
7349
7350 if (isGFX940() && !Mnemo.starts_with("s_")) {
7351 return StringSwitch<unsigned>(Id)
7352 .Case("nt", AMDGPU::CPol::NT)
7353 .Case("sc0", AMDGPU::CPol::SC0)
7354 .Case("sc1", AMDGPU::CPol::SC1)
7355 .Default(0);
7356 }
7357
7358 return StringSwitch<unsigned>(Id)
7359 .Case("dlc", AMDGPU::CPol::DLC)
7360 .Case("glc", AMDGPU::CPol::GLC)
7361 .Case("scc", AMDGPU::CPol::SCC)
7362 .Case("slc", AMDGPU::CPol::SLC)
7363 .Default(0);
7364}
7365
7366ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7367 if (isGFX12Plus()) {
7368 SMLoc StringLoc = getLoc();
7369
7370 int64_t CPolVal = 0;
7371 ParseStatus ResTH = ParseStatus::NoMatch;
7372 ParseStatus ResScope = ParseStatus::NoMatch;
7373 ParseStatus ResNV = ParseStatus::NoMatch;
7374 ParseStatus ResScal = ParseStatus::NoMatch;
7375
7376 for (;;) {
7377 if (ResTH.isNoMatch()) {
7378 int64_t TH;
7379 ResTH = parseTH(Operands, TH);
7380 if (ResTH.isFailure())
7381 return ResTH;
7382 if (ResTH.isSuccess()) {
7383 CPolVal |= TH;
7384 continue;
7385 }
7386 }
7387
7388 if (ResScope.isNoMatch()) {
7389 int64_t Scope;
7390 ResScope = parseScope(Operands, Scope);
7391 if (ResScope.isFailure())
7392 return ResScope;
7393 if (ResScope.isSuccess()) {
7394 CPolVal |= Scope;
7395 continue;
7396 }
7397 }
7398
7399 // NV bit exists on GFX12+, but does something starting from GFX1250.
7400 // Allow parsing on all GFX12 and fail on validation for better
7401 // diagnostics.
7402 if (ResNV.isNoMatch()) {
7403 if (trySkipId("nv")) {
7404 ResNV = ParseStatus::Success;
7405 CPolVal |= CPol::NV;
7406 continue;
7407 } else if (trySkipId("no", "nv")) {
7408 ResNV = ParseStatus::Success;
7409 continue;
7410 }
7411 }
7412
7413 if (ResScal.isNoMatch()) {
7414 if (trySkipId("scale_offset")) {
7415 ResScal = ParseStatus::Success;
7416 CPolVal |= CPol::SCAL;
7417 continue;
7418 } else if (trySkipId("no", "scale_offset")) {
7419 ResScal = ParseStatus::Success;
7420 continue;
7421 }
7422 }
7423
7424 break;
7425 }
7426
7427 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7428 ResScal.isNoMatch())
7429 return ParseStatus::NoMatch;
7430
7431 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
7432 AMDGPUOperand::ImmTyCPol));
7433 return ParseStatus::Success;
7434 }
7435
7436 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7437 SMLoc OpLoc = getLoc();
7438 unsigned Enabled = 0, Seen = 0;
7439 for (;;) {
7440 SMLoc S = getLoc();
7441 bool Disabling;
7442 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7443 if (!CPol)
7444 break;
7445
7446 lex();
7447
7448 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7449 return Error(S, "dlc modifier is not supported on this GPU");
7450
7451 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7452 return Error(S, "scc modifier is not supported on this GPU");
7453
7454 if (Seen & CPol)
7455 return Error(S, "duplicate cache policy modifier");
7456
7457 if (!Disabling)
7458 Enabled |= CPol;
7459
7460 Seen |= CPol;
7461 }
7462
7463 if (!Seen)
7464 return ParseStatus::NoMatch;
7465
7466 Operands.push_back(
7467 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7468 return ParseStatus::Success;
7469}
7470
7471ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7472 int64_t &Scope) {
7473 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7475
7476 ParseStatus Res = parseStringOrIntWithPrefix(
7477 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7478 Scope);
7479
7480 if (Res.isSuccess())
7481 Scope = Scopes[Scope];
7482
7483 return Res;
7484}
7485
7486ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7487 TH = AMDGPU::CPol::TH_RT; // default
7488
7489 StringRef Value;
7490 SMLoc StringLoc;
7491 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7492 if (!Res.isSuccess())
7493 return Res;
7494
7495 if (Value == "TH_DEFAULT")
7497 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7498 Value == "TH_LOAD_NT_WB") {
7499 return Error(StringLoc, "invalid th value");
7500 } else if (Value.consume_front("TH_ATOMIC_")) {
7502 } else if (Value.consume_front("TH_LOAD_")) {
7504 } else if (Value.consume_front("TH_STORE_")) {
7506 } else {
7507 return Error(StringLoc, "invalid th value");
7508 }
7509
7510 if (Value == "BYPASS")
7512
7513 if (TH != 0) {
7515 TH |= StringSwitch<int64_t>(Value)
7516 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7517 .Case("RT", AMDGPU::CPol::TH_RT)
7518 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7519 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7520 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7522 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7523 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7525 .Default(0xffffffff);
7526 else
7527 TH |= StringSwitch<int64_t>(Value)
7528 .Case("RT", AMDGPU::CPol::TH_RT)
7529 .Case("NT", AMDGPU::CPol::TH_NT)
7530 .Case("HT", AMDGPU::CPol::TH_HT)
7531 .Case("LU", AMDGPU::CPol::TH_LU)
7532 .Case("WB", AMDGPU::CPol::TH_WB)
7533 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7534 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7535 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7536 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7537 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7538 .Default(0xffffffff);
7539 }
7540
7541 if (TH == 0xffffffff)
7542 return Error(StringLoc, "invalid th value");
7543
7544 return ParseStatus::Success;
7545}
7546
7547static void
7549 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7550 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7551 std::optional<unsigned> InsertAt = std::nullopt) {
7552 auto i = OptionalIdx.find(ImmT);
7553 if (i != OptionalIdx.end()) {
7554 unsigned Idx = i->second;
7555 const AMDGPUOperand &Op =
7556 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7557 if (InsertAt)
7558 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7559 else
7560 Op.addImmOperands(Inst, 1);
7561 } else {
7562 if (InsertAt.has_value())
7563 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7564 else
7566 }
7567}
7568
7569ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7570 StringRef &Value,
7571 SMLoc &StringLoc) {
7572 if (!trySkipId(Prefix, AsmToken::Colon))
7573 return ParseStatus::NoMatch;
7574
7575 StringLoc = getLoc();
7576 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7578}
7579
7580ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7581 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7582 int64_t &IntVal) {
7583 if (!trySkipId(Name, AsmToken::Colon))
7584 return ParseStatus::NoMatch;
7585
7586 SMLoc StringLoc = getLoc();
7587
7588 StringRef Value;
7589 if (isToken(AsmToken::Identifier)) {
7590 Value = getTokenStr();
7591 lex();
7592
7593 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7594 if (Value == Ids[IntVal])
7595 break;
7596 } else if (!parseExpr(IntVal))
7597 return ParseStatus::Failure;
7598
7599 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7600 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7601
7602 return ParseStatus::Success;
7603}
7604
7605ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7606 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7607 AMDGPUOperand::ImmTy Type) {
7608 SMLoc S = getLoc();
7609 int64_t IntVal;
7610
7611 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7612 if (Res.isSuccess())
7613 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7614
7615 return Res;
7616}
7617
7618//===----------------------------------------------------------------------===//
7619// MTBUF format
7620//===----------------------------------------------------------------------===//
7621
7622bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7623 int64_t MaxVal,
7624 int64_t &Fmt) {
7625 int64_t Val;
7626 SMLoc Loc = getLoc();
7627
7628 auto Res = parseIntWithPrefix(Pref, Val);
7629 if (Res.isFailure())
7630 return false;
7631 if (Res.isNoMatch())
7632 return true;
7633
7634 if (Val < 0 || Val > MaxVal) {
7635 Error(Loc, Twine("out of range ", StringRef(Pref)));
7636 return false;
7637 }
7638
7639 Fmt = Val;
7640 return true;
7641}
7642
7643ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7644 AMDGPUOperand::ImmTy ImmTy) {
7645 const char *Pref = "index_key";
7646 int64_t ImmVal = 0;
7647 SMLoc Loc = getLoc();
7648 auto Res = parseIntWithPrefix(Pref, ImmVal);
7649 if (!Res.isSuccess())
7650 return Res;
7651
7652 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7653 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7654 (ImmVal < 0 || ImmVal > 1))
7655 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7656
7657 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7658 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7659
7660 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7661 return ParseStatus::Success;
7662}
7663
7664ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7665 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7666}
7667
7668ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7669 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7670}
7671
7672ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7673 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7674}
7675
7676ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7677 StringRef Name,
7678 AMDGPUOperand::ImmTy Type) {
7679 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixFmt,
7680 Type);
7681}
7682
7683ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7684 return tryParseMatrixFMT(Operands, "matrix_a_fmt",
7685 AMDGPUOperand::ImmTyMatrixAFMT);
7686}
7687
7688ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7689 return tryParseMatrixFMT(Operands, "matrix_b_fmt",
7690 AMDGPUOperand::ImmTyMatrixBFMT);
7691}
7692
7693ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7694 StringRef Name,
7695 AMDGPUOperand::ImmTy Type) {
7696 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixScale,
7697 Type);
7698}
7699
7700ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7701 return tryParseMatrixScale(Operands, "matrix_a_scale",
7702 AMDGPUOperand::ImmTyMatrixAScale);
7703}
7704
7705ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7706 return tryParseMatrixScale(Operands, "matrix_b_scale",
7707 AMDGPUOperand::ImmTyMatrixBScale);
7708}
7709
7710ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7711 StringRef Name,
7712 AMDGPUOperand::ImmTy Type) {
7713 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixScaleFmt,
7714 Type);
7715}
7716
7717ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7718 return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
7719 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7720}
7721
7722ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7723 return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
7724 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7725}
7726
7727// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7728// values to live in a joint format operand in the MCInst encoding.
7729ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7730 using namespace llvm::AMDGPU::MTBUFFormat;
7731
7732 int64_t Dfmt = DFMT_UNDEF;
7733 int64_t Nfmt = NFMT_UNDEF;
7734
7735 // dfmt and nfmt can appear in either order, and each is optional.
7736 for (int I = 0; I < 2; ++I) {
7737 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7738 return ParseStatus::Failure;
7739
7740 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7741 return ParseStatus::Failure;
7742
7743 // Skip optional comma between dfmt/nfmt
7744 // but guard against 2 commas following each other.
7745 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7746 !peekToken().is(AsmToken::Comma)) {
7747 trySkipToken(AsmToken::Comma);
7748 }
7749 }
7750
7751 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7752 return ParseStatus::NoMatch;
7753
7754 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7755 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7756
7757 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7758 return ParseStatus::Success;
7759}
7760
7761ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7762 using namespace llvm::AMDGPU::MTBUFFormat;
7763
7764 int64_t Fmt = UFMT_UNDEF;
7765
7766 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7767 return ParseStatus::Failure;
7768
7769 if (Fmt == UFMT_UNDEF)
7770 return ParseStatus::NoMatch;
7771
7772 Format = Fmt;
7773 return ParseStatus::Success;
7774}
7775
7776bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7777 int64_t &Nfmt,
7778 StringRef FormatStr,
7779 SMLoc Loc) {
7780 using namespace llvm::AMDGPU::MTBUFFormat;
7781 int64_t Format;
7782
7783 Format = getDfmt(FormatStr);
7784 if (Format != DFMT_UNDEF) {
7785 Dfmt = Format;
7786 return true;
7787 }
7788
7789 Format = getNfmt(FormatStr, getSTI());
7790 if (Format != NFMT_UNDEF) {
7791 Nfmt = Format;
7792 return true;
7793 }
7794
7795 Error(Loc, "unsupported format");
7796 return false;
7797}
7798
7799ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7800 SMLoc FormatLoc,
7801 int64_t &Format) {
7802 using namespace llvm::AMDGPU::MTBUFFormat;
7803
7804 int64_t Dfmt = DFMT_UNDEF;
7805 int64_t Nfmt = NFMT_UNDEF;
7806 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7807 return ParseStatus::Failure;
7808
7809 if (trySkipToken(AsmToken::Comma)) {
7810 StringRef Str;
7811 SMLoc Loc = getLoc();
7812 if (!parseId(Str, "expected a format string") ||
7813 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7814 return ParseStatus::Failure;
7815 if (Dfmt == DFMT_UNDEF)
7816 return Error(Loc, "duplicate numeric format");
7817 if (Nfmt == NFMT_UNDEF)
7818 return Error(Loc, "duplicate data format");
7819 }
7820
7821 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7822 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7823
7824 if (isGFX10Plus()) {
7825 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7826 if (Ufmt == UFMT_UNDEF)
7827 return Error(FormatLoc, "unsupported format");
7828 Format = Ufmt;
7829 } else {
7830 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7831 }
7832
7833 return ParseStatus::Success;
7834}
7835
7836ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7837 SMLoc Loc,
7838 int64_t &Format) {
7839 using namespace llvm::AMDGPU::MTBUFFormat;
7840
7841 auto Id = getUnifiedFormat(FormatStr, getSTI());
7842 if (Id == UFMT_UNDEF)
7843 return ParseStatus::NoMatch;
7844
7845 if (!isGFX10Plus())
7846 return Error(Loc, "unified format is not supported on this GPU");
7847
7848 Format = Id;
7849 return ParseStatus::Success;
7850}
7851
7852ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7853 using namespace llvm::AMDGPU::MTBUFFormat;
7854 SMLoc Loc = getLoc();
7855
7856 if (!parseExpr(Format))
7857 return ParseStatus::Failure;
7858 if (!isValidFormatEncoding(Format, getSTI()))
7859 return Error(Loc, "out of range format");
7860
7861 return ParseStatus::Success;
7862}
7863
7864ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7865 using namespace llvm::AMDGPU::MTBUFFormat;
7866
7867 if (!trySkipId("format", AsmToken::Colon))
7868 return ParseStatus::NoMatch;
7869
7870 if (trySkipToken(AsmToken::LBrac)) {
7871 StringRef FormatStr;
7872 SMLoc Loc = getLoc();
7873 if (!parseId(FormatStr, "expected a format string"))
7874 return ParseStatus::Failure;
7875
7876 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7877 if (Res.isNoMatch())
7878 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7879 if (!Res.isSuccess())
7880 return Res;
7881
7882 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7883 return ParseStatus::Failure;
7884
7885 return ParseStatus::Success;
7886 }
7887
7888 return parseNumericFormat(Format);
7889}
7890
7891ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7892 using namespace llvm::AMDGPU::MTBUFFormat;
7893
7894 int64_t Format = getDefaultFormatEncoding(getSTI());
7895 ParseStatus Res;
7896 SMLoc Loc = getLoc();
7897
7898 // Parse legacy format syntax.
7899 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7900 if (Res.isFailure())
7901 return Res;
7902
7903 bool FormatFound = Res.isSuccess();
7904
7905 Operands.push_back(
7906 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7907
7908 if (FormatFound)
7909 trySkipToken(AsmToken::Comma);
7910
7911 if (isToken(AsmToken::EndOfStatement)) {
7912 // We are expecting an soffset operand,
7913 // but let matcher handle the error.
7914 return ParseStatus::Success;
7915 }
7916
7917 // Parse soffset.
7918 Res = parseRegOrImm(Operands);
7919 if (!Res.isSuccess())
7920 return Res;
7921
7922 trySkipToken(AsmToken::Comma);
7923
7924 if (!FormatFound) {
7925 Res = parseSymbolicOrNumericFormat(Format);
7926 if (Res.isFailure())
7927 return Res;
7928 if (Res.isSuccess()) {
7929 auto Size = Operands.size();
7930 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7931 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7932 Op.setImm(Format);
7933 }
7934 return ParseStatus::Success;
7935 }
7936
7937 if (isId("format") && peekToken().is(AsmToken::Colon))
7938 return Error(getLoc(), "duplicate format");
7939 return ParseStatus::Success;
7940}
7941
7942ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7943 ParseStatus Res =
7944 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7945 if (Res.isNoMatch()) {
7946 Res = parseIntWithPrefix("inst_offset", Operands,
7947 AMDGPUOperand::ImmTyInstOffset);
7948 }
7949 return Res;
7950}
7951
7952ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7953 ParseStatus Res =
7954 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7955 if (Res.isNoMatch())
7956 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7957 return Res;
7958}
7959
7960ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7961 ParseStatus Res =
7962 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7963 if (Res.isNoMatch()) {
7964 Res =
7965 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7966 }
7967 return Res;
7968}
7969
7970//===----------------------------------------------------------------------===//
7971// Exp
7972//===----------------------------------------------------------------------===//
7973
7974void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7975 OptionalImmIndexMap OptionalIdx;
7976
7977 unsigned OperandIdx[4];
7978 unsigned EnMask = 0;
7979 int SrcIdx = 0;
7980
7981 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7982 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7983
7984 // Add the register arguments
7985 if (Op.isReg()) {
7986 assert(SrcIdx < 4);
7987 OperandIdx[SrcIdx] = Inst.size();
7988 Op.addRegOperands(Inst, 1);
7989 ++SrcIdx;
7990 continue;
7991 }
7992
7993 if (Op.isOff()) {
7994 assert(SrcIdx < 4);
7995 OperandIdx[SrcIdx] = Inst.size();
7996 Inst.addOperand(MCOperand::createReg(MCRegister()));
7997 ++SrcIdx;
7998 continue;
7999 }
8000
8001 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
8002 Op.addImmOperands(Inst, 1);
8003 continue;
8004 }
8005
8006 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
8007 continue;
8008
8009 // Handle optional arguments
8010 OptionalIdx[Op.getImmTy()] = i;
8011 }
8012
8013 assert(SrcIdx == 4);
8014
8015 bool Compr = false;
8016 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
8017 Compr = true;
8018 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
8019 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
8020 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
8021 }
8022
8023 for (auto i = 0; i < SrcIdx; ++i) {
8024 if (Inst.getOperand(OperandIdx[i]).getReg()) {
8025 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
8026 }
8027 }
8028
8029 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
8030 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
8031
8032 Inst.addOperand(MCOperand::createImm(EnMask));
8033}
8034
8035//===----------------------------------------------------------------------===//
8036// s_waitcnt
8037//===----------------------------------------------------------------------===//
8038
8039static bool
8041 const AMDGPU::IsaVersion ISA,
8042 int64_t &IntVal,
8043 int64_t CntVal,
8044 bool Saturate,
8045 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
8046 unsigned (*decode)(const IsaVersion &Version, unsigned))
8047{
8048 bool Failed = false;
8049
8050 IntVal = encode(ISA, IntVal, CntVal);
8051 if (CntVal != decode(ISA, IntVal)) {
8052 if (Saturate) {
8053 IntVal = encode(ISA, IntVal, -1);
8054 } else {
8055 Failed = true;
8056 }
8057 }
8058 return Failed;
8059}
8060
8061bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
8062
8063 SMLoc CntLoc = getLoc();
8064 StringRef CntName = getTokenStr();
8065
8066 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
8067 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
8068 return false;
8069
8070 int64_t CntVal;
8071 SMLoc ValLoc = getLoc();
8072 if (!parseExpr(CntVal))
8073 return false;
8074
8075 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
8076
8077 bool Failed = true;
8078 bool Sat = CntName.ends_with("_sat");
8079
8080 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
8081 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
8082 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
8083 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
8084 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
8085 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
8086 } else {
8087 Error(CntLoc, "invalid counter name " + CntName);
8088 return false;
8089 }
8090
8091 if (Failed) {
8092 Error(ValLoc, "too large value for " + CntName);
8093 return false;
8094 }
8095
8096 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8097 return false;
8098
8099 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
8100 if (isToken(AsmToken::EndOfStatement)) {
8101 Error(getLoc(), "expected a counter name");
8102 return false;
8103 }
8104 }
8105
8106 return true;
8107}
8108
8109ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
8110 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
8111 int64_t Waitcnt = getWaitcntBitMask(ISA);
8112 SMLoc S = getLoc();
8113
8114 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8115 while (!isToken(AsmToken::EndOfStatement)) {
8116 if (!parseCnt(Waitcnt))
8117 return ParseStatus::Failure;
8118 }
8119 } else {
8120 if (!parseExpr(Waitcnt))
8121 return ParseStatus::Failure;
8122 }
8123
8124 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
8125 return ParseStatus::Success;
8126}
8127
8128bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
8129 SMLoc FieldLoc = getLoc();
8130 StringRef FieldName = getTokenStr();
8131 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
8132 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
8133 return false;
8134
8135 SMLoc ValueLoc = getLoc();
8136 StringRef ValueName = getTokenStr();
8137 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
8138 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
8139 return false;
8140
8141 unsigned Shift;
8142 if (FieldName == "instid0") {
8143 Shift = 0;
8144 } else if (FieldName == "instskip") {
8145 Shift = 4;
8146 } else if (FieldName == "instid1") {
8147 Shift = 7;
8148 } else {
8149 Error(FieldLoc, "invalid field name " + FieldName);
8150 return false;
8151 }
8152
8153 int Value;
8154 if (Shift == 4) {
8155 // Parse values for instskip.
8156 Value = StringSwitch<int>(ValueName)
8157 .Case("SAME", 0)
8158 .Case("NEXT", 1)
8159 .Case("SKIP_1", 2)
8160 .Case("SKIP_2", 3)
8161 .Case("SKIP_3", 4)
8162 .Case("SKIP_4", 5)
8163 .Default(-1);
8164 } else {
8165 // Parse values for instid0 and instid1.
8166 Value = StringSwitch<int>(ValueName)
8167 .Case("NO_DEP", 0)
8168 .Case("VALU_DEP_1", 1)
8169 .Case("VALU_DEP_2", 2)
8170 .Case("VALU_DEP_3", 3)
8171 .Case("VALU_DEP_4", 4)
8172 .Case("TRANS32_DEP_1", 5)
8173 .Case("TRANS32_DEP_2", 6)
8174 .Case("TRANS32_DEP_3", 7)
8175 .Case("FMA_ACCUM_CYCLE_1", 8)
8176 .Case("SALU_CYCLE_1", 9)
8177 .Case("SALU_CYCLE_2", 10)
8178 .Case("SALU_CYCLE_3", 11)
8179 .Default(-1);
8180 }
8181 if (Value < 0) {
8182 Error(ValueLoc, "invalid value name " + ValueName);
8183 return false;
8184 }
8185
8186 Delay |= Value << Shift;
8187 return true;
8188}
8189
8190ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
8191 int64_t Delay = 0;
8192 SMLoc S = getLoc();
8193
8194 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8195 do {
8196 if (!parseDelay(Delay))
8197 return ParseStatus::Failure;
8198 } while (trySkipToken(AsmToken::Pipe));
8199 } else {
8200 if (!parseExpr(Delay))
8201 return ParseStatus::Failure;
8202 }
8203
8204 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
8205 return ParseStatus::Success;
8206}
8207
8208bool
8209AMDGPUOperand::isSWaitCnt() const {
8210 return isImm();
8211}
8212
8213bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
8214
8215//===----------------------------------------------------------------------===//
8216// DepCtr
8217//===----------------------------------------------------------------------===//
8218
8219void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
8220 StringRef DepCtrName) {
8221 switch (ErrorId) {
8222 case OPR_ID_UNKNOWN:
8223 Error(Loc, Twine("invalid counter name ", DepCtrName));
8224 return;
8225 case OPR_ID_UNSUPPORTED:
8226 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
8227 return;
8228 case OPR_ID_DUPLICATE:
8229 Error(Loc, Twine("duplicate counter name ", DepCtrName));
8230 return;
8231 case OPR_VAL_INVALID:
8232 Error(Loc, Twine("invalid value for ", DepCtrName));
8233 return;
8234 default:
8235 assert(false);
8236 }
8237}
8238
8239bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
8240
8241 using namespace llvm::AMDGPU::DepCtr;
8242
8243 SMLoc DepCtrLoc = getLoc();
8244 StringRef DepCtrName = getTokenStr();
8245
8246 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
8247 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
8248 return false;
8249
8250 int64_t ExprVal;
8251 if (!parseExpr(ExprVal))
8252 return false;
8253
8254 unsigned PrevOprMask = UsedOprMask;
8255 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
8256
8257 if (CntVal < 0) {
8258 depCtrError(DepCtrLoc, CntVal, DepCtrName);
8259 return false;
8260 }
8261
8262 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8263 return false;
8264
8265 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
8266 if (isToken(AsmToken::EndOfStatement)) {
8267 Error(getLoc(), "expected a counter name");
8268 return false;
8269 }
8270 }
8271
8272 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8273 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8274 return true;
8275}
8276
8277ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
8278 using namespace llvm::AMDGPU::DepCtr;
8279
8280 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
8281 SMLoc Loc = getLoc();
8282
8283 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8284 unsigned UsedOprMask = 0;
8285 while (!isToken(AsmToken::EndOfStatement)) {
8286 if (!parseDepCtr(DepCtr, UsedOprMask))
8287 return ParseStatus::Failure;
8288 }
8289 } else {
8290 if (!parseExpr(DepCtr))
8291 return ParseStatus::Failure;
8292 }
8293
8294 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
8295 return ParseStatus::Success;
8296}
8297
8298bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8299
8300//===----------------------------------------------------------------------===//
8301// hwreg
8302//===----------------------------------------------------------------------===//
8303
8304ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8305 OperandInfoTy &Offset,
8306 OperandInfoTy &Width) {
8307 using namespace llvm::AMDGPU::Hwreg;
8308
8309 if (!trySkipId("hwreg", AsmToken::LParen))
8310 return ParseStatus::NoMatch;
8311
8312 // The register may be specified by name or using a numeric code
8313 HwReg.Loc = getLoc();
8314 if (isToken(AsmToken::Identifier) &&
8315 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8316 HwReg.IsSymbolic = true;
8317 lex(); // skip register name
8318 } else if (!parseExpr(HwReg.Val, "a register name")) {
8319 return ParseStatus::Failure;
8320 }
8321
8322 if (trySkipToken(AsmToken::RParen))
8323 return ParseStatus::Success;
8324
8325 // parse optional params
8326 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
8327 return ParseStatus::Failure;
8328
8329 Offset.Loc = getLoc();
8330 if (!parseExpr(Offset.Val))
8331 return ParseStatus::Failure;
8332
8333 if (!skipToken(AsmToken::Comma, "expected a comma"))
8334 return ParseStatus::Failure;
8335
8336 Width.Loc = getLoc();
8337 if (!parseExpr(Width.Val) ||
8338 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8339 return ParseStatus::Failure;
8340
8341 return ParseStatus::Success;
8342}
8343
8344ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8345 using namespace llvm::AMDGPU::Hwreg;
8346
8347 int64_t ImmVal = 0;
8348 SMLoc Loc = getLoc();
8349
8350 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8351 HwregId::Default);
8352 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8353 HwregOffset::Default);
8354 struct : StructuredOpField {
8355 using StructuredOpField::StructuredOpField;
8356 bool validate(AMDGPUAsmParser &Parser) const override {
8357 if (!isUIntN(Width, Val - 1))
8358 return Error(Parser, "only values from 1 to 32 are legal");
8359 return true;
8360 }
8361 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8362 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
8363
8364 if (Res.isNoMatch())
8365 Res = parseHwregFunc(HwReg, Offset, Width);
8366
8367 if (Res.isSuccess()) {
8368 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
8369 return ParseStatus::Failure;
8370 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
8371 }
8372
8373 if (Res.isNoMatch() &&
8374 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
8376
8377 if (!Res.isSuccess())
8378 return ParseStatus::Failure;
8379
8380 if (!isUInt<16>(ImmVal))
8381 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8382 Operands.push_back(
8383 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8384 return ParseStatus::Success;
8385}
8386
8387bool AMDGPUOperand::isHwreg() const {
8388 return isImmTy(ImmTyHwreg);
8389}
8390
8391//===----------------------------------------------------------------------===//
8392// sendmsg
8393//===----------------------------------------------------------------------===//
8394
8395bool
8396AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8397 OperandInfoTy &Op,
8398 OperandInfoTy &Stream) {
8399 using namespace llvm::AMDGPU::SendMsg;
8400
8401 Msg.Loc = getLoc();
8402 if (isToken(AsmToken::Identifier) &&
8403 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8404 Msg.IsSymbolic = true;
8405 lex(); // skip message name
8406 } else if (!parseExpr(Msg.Val, "a message name")) {
8407 return false;
8408 }
8409
8410 if (trySkipToken(AsmToken::Comma)) {
8411 Op.IsDefined = true;
8412 Op.Loc = getLoc();
8413 if (isToken(AsmToken::Identifier) &&
8414 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8416 lex(); // skip operation name
8417 } else if (!parseExpr(Op.Val, "an operation name")) {
8418 return false;
8419 }
8420
8421 if (trySkipToken(AsmToken::Comma)) {
8422 Stream.IsDefined = true;
8423 Stream.Loc = getLoc();
8424 if (!parseExpr(Stream.Val))
8425 return false;
8426 }
8427 }
8428
8429 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
8430}
8431
8432bool
8433AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8434 const OperandInfoTy &Op,
8435 const OperandInfoTy &Stream) {
8436 using namespace llvm::AMDGPU::SendMsg;
8437
8438 // Validation strictness depends on whether message is specified
8439 // in a symbolic or in a numeric form. In the latter case
8440 // only encoding possibility is checked.
8441 bool Strict = Msg.IsSymbolic;
8442
8443 if (Strict) {
8444 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8445 Error(Msg.Loc, "specified message id is not supported on this GPU");
8446 return false;
8447 }
8448 } else {
8449 if (!isValidMsgId(Msg.Val, getSTI())) {
8450 Error(Msg.Loc, "invalid message id");
8451 return false;
8452 }
8453 }
8454 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
8455 if (Op.IsDefined) {
8456 Error(Op.Loc, "message does not support operations");
8457 } else {
8458 Error(Msg.Loc, "missing message operation");
8459 }
8460 return false;
8461 }
8462 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
8463 if (Op.Val == OPR_ID_UNSUPPORTED)
8464 Error(Op.Loc, "specified operation id is not supported on this GPU");
8465 else
8466 Error(Op.Loc, "invalid operation id");
8467 return false;
8468 }
8469 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
8470 Stream.IsDefined) {
8471 Error(Stream.Loc, "message operation does not support streams");
8472 return false;
8473 }
8474 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
8475 Error(Stream.Loc, "invalid message stream id");
8476 return false;
8477 }
8478 return true;
8479}
8480
8481ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8482 using namespace llvm::AMDGPU::SendMsg;
8483
8484 int64_t ImmVal = 0;
8485 SMLoc Loc = getLoc();
8486
8487 if (trySkipId("sendmsg", AsmToken::LParen)) {
8488 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8489 OperandInfoTy Op(OP_NONE_);
8490 OperandInfoTy Stream(STREAM_ID_NONE_);
8491 if (parseSendMsgBody(Msg, Op, Stream) &&
8492 validateSendMsg(Msg, Op, Stream)) {
8493 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
8494 } else {
8495 return ParseStatus::Failure;
8496 }
8497 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
8498 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8499 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8500 } else {
8501 return ParseStatus::Failure;
8502 }
8503
8504 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8505 return ParseStatus::Success;
8506}
8507
8508bool AMDGPUOperand::isSendMsg() const {
8509 return isImmTy(ImmTySendMsg);
8510}
8511
8512ParseStatus AMDGPUAsmParser::parseWaitEvent(OperandVector &Operands) {
8513 using namespace llvm::AMDGPU::WaitEvent;
8514
8515 SMLoc Loc = getLoc();
8516 int64_t ImmVal = 0;
8517
8518 StructuredOpField DontWaitExportReady("dont_wait_export_ready", "bit value",
8519 1, 0);
8520 StructuredOpField ExportReady("export_ready", "bit value", 1, 0);
8521
8522 StructuredOpField *TargetBitfield =
8523 isGFX11() ? &DontWaitExportReady : &ExportReady;
8524
8525 ParseStatus Res = parseStructuredOpFields({TargetBitfield});
8526 if (Res.isNoMatch() && parseExpr(ImmVal, "structured immediate"))
8528 else if (Res.isSuccess()) {
8529 if (!validateStructuredOpFields({TargetBitfield}))
8530 return ParseStatus::Failure;
8531 ImmVal = TargetBitfield->Val;
8532 }
8533
8534 if (!Res.isSuccess())
8535 return ParseStatus::Failure;
8536
8537 if (!isUInt<16>(ImmVal))
8538 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8539
8540 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc,
8541 AMDGPUOperand::ImmTyWaitEvent));
8542 return ParseStatus::Success;
8543}
8544
8545bool AMDGPUOperand::isWaitEvent() const { return isImmTy(ImmTyWaitEvent); }
8546
8547//===----------------------------------------------------------------------===//
8548// v_interp
8549//===----------------------------------------------------------------------===//
8550
8551ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8552 StringRef Str;
8553 SMLoc S = getLoc();
8554
8555 if (!parseId(Str))
8556 return ParseStatus::NoMatch;
8557
8558 int Slot = StringSwitch<int>(Str)
8559 .Case("p10", 0)
8560 .Case("p20", 1)
8561 .Case("p0", 2)
8562 .Default(-1);
8563
8564 if (Slot == -1)
8565 return Error(S, "invalid interpolation slot");
8566
8567 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8568 AMDGPUOperand::ImmTyInterpSlot));
8569 return ParseStatus::Success;
8570}
8571
8572ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8573 StringRef Str;
8574 SMLoc S = getLoc();
8575
8576 if (!parseId(Str))
8577 return ParseStatus::NoMatch;
8578
8579 if (!Str.starts_with("attr"))
8580 return Error(S, "invalid interpolation attribute");
8581
8582 StringRef Chan = Str.take_back(2);
8583 int AttrChan = StringSwitch<int>(Chan)
8584 .Case(".x", 0)
8585 .Case(".y", 1)
8586 .Case(".z", 2)
8587 .Case(".w", 3)
8588 .Default(-1);
8589 if (AttrChan == -1)
8590 return Error(S, "invalid or missing interpolation attribute channel");
8591
8592 Str = Str.drop_back(2).drop_front(4);
8593
8594 uint8_t Attr;
8595 if (Str.getAsInteger(10, Attr))
8596 return Error(S, "invalid or missing interpolation attribute number");
8597
8598 if (Attr > 32)
8599 return Error(S, "out of bounds interpolation attribute number");
8600
8601 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8602
8603 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8604 AMDGPUOperand::ImmTyInterpAttr));
8605 Operands.push_back(AMDGPUOperand::CreateImm(
8606 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8607 return ParseStatus::Success;
8608}
8609
8610//===----------------------------------------------------------------------===//
8611// exp
8612//===----------------------------------------------------------------------===//
8613
8614ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8615 using namespace llvm::AMDGPU::Exp;
8616
8617 StringRef Str;
8618 SMLoc S = getLoc();
8619
8620 if (!parseId(Str))
8621 return ParseStatus::NoMatch;
8622
8623 unsigned Id = getTgtId(Str);
8624 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8625 return Error(S, (Id == ET_INVALID)
8626 ? "invalid exp target"
8627 : "exp target is not supported on this GPU");
8628
8629 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8630 AMDGPUOperand::ImmTyExpTgt));
8631 return ParseStatus::Success;
8632}
8633
8634//===----------------------------------------------------------------------===//
8635// parser helpers
8636//===----------------------------------------------------------------------===//
8637
8638bool
8639AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8640 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8641}
8642
8643bool
8644AMDGPUAsmParser::isId(const StringRef Id) const {
8645 return isId(getToken(), Id);
8646}
8647
8648bool
8649AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8650 return getTokenKind() == Kind;
8651}
8652
8653StringRef AMDGPUAsmParser::getId() const {
8654 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8655}
8656
8657bool
8658AMDGPUAsmParser::trySkipId(const StringRef Id) {
8659 if (isId(Id)) {
8660 lex();
8661 return true;
8662 }
8663 return false;
8664}
8665
8666bool
8667AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8668 if (isToken(AsmToken::Identifier)) {
8669 StringRef Tok = getTokenStr();
8670 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8671 lex();
8672 return true;
8673 }
8674 }
8675 return false;
8676}
8677
8678bool
8679AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8680 if (isId(Id) && peekToken().is(Kind)) {
8681 lex();
8682 lex();
8683 return true;
8684 }
8685 return false;
8686}
8687
8688bool
8689AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8690 if (isToken(Kind)) {
8691 lex();
8692 return true;
8693 }
8694 return false;
8695}
8696
8697bool
8698AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8699 const StringRef ErrMsg) {
8700 if (!trySkipToken(Kind)) {
8701 Error(getLoc(), ErrMsg);
8702 return false;
8703 }
8704 return true;
8705}
8706
8707bool
8708AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8709 SMLoc S = getLoc();
8710
8711 const MCExpr *Expr;
8712 if (Parser.parseExpression(Expr))
8713 return false;
8714
8715 if (Expr->evaluateAsAbsolute(Imm))
8716 return true;
8717
8718 if (Expected.empty()) {
8719 Error(S, "expected absolute expression");
8720 } else {
8721 Error(S, Twine("expected ", Expected) +
8722 Twine(" or an absolute expression"));
8723 }
8724 return false;
8725}
8726
8727bool
8728AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8729 SMLoc S = getLoc();
8730
8731 const MCExpr *Expr;
8732 if (Parser.parseExpression(Expr))
8733 return false;
8734
8735 int64_t IntVal;
8736 if (Expr->evaluateAsAbsolute(IntVal)) {
8737 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8738 } else {
8739 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8740 }
8741 return true;
8742}
8743
8744bool
8745AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8746 if (isToken(AsmToken::String)) {
8747 Val = getToken().getStringContents();
8748 lex();
8749 return true;
8750 }
8751 Error(getLoc(), ErrMsg);
8752 return false;
8753}
8754
8755bool
8756AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8757 if (isToken(AsmToken::Identifier)) {
8758 Val = getTokenStr();
8759 lex();
8760 return true;
8761 }
8762 if (!ErrMsg.empty())
8763 Error(getLoc(), ErrMsg);
8764 return false;
8765}
8766
8767AsmToken
8768AMDGPUAsmParser::getToken() const {
8769 return Parser.getTok();
8770}
8771
8772AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8773 return isToken(AsmToken::EndOfStatement)
8774 ? getToken()
8775 : getLexer().peekTok(ShouldSkipSpace);
8776}
8777
8778void
8779AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8780 auto TokCount = getLexer().peekTokens(Tokens);
8781
8782 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8783 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8784}
8785
8787AMDGPUAsmParser::getTokenKind() const {
8788 return getLexer().getKind();
8789}
8790
8791SMLoc
8792AMDGPUAsmParser::getLoc() const {
8793 return getToken().getLoc();
8794}
8795
8796StringRef
8797AMDGPUAsmParser::getTokenStr() const {
8798 return getToken().getString();
8799}
8800
8801void
8802AMDGPUAsmParser::lex() {
8803 Parser.Lex();
8804}
8805
8806const AMDGPUOperand &
8807AMDGPUAsmParser::findMCOperand(const OperandVector &Operands,
8808 int MCOpIdx) const {
8809 for (const auto &Op : Operands) {
8810 const AMDGPUOperand &TargetOp = static_cast<AMDGPUOperand &>(*Op);
8811 if (TargetOp.getMCOpIdx() == MCOpIdx)
8812 return TargetOp;
8813 }
8814 llvm_unreachable("no such MC operand!");
8815}
8816
8817SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8818 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8819}
8820
8821// Returns one of the given locations that comes later in the source.
8822SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8823 return a.getPointer() < b.getPointer() ? b : a;
8824}
8825
8826SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8827 int MCOpIdx) const {
8828 return findMCOperand(Operands, MCOpIdx).getStartLoc();
8829}
8830
8831SMLoc
8832AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8833 const OperandVector &Operands) const {
8834 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8835 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8836 if (Test(Op))
8837 return Op.getStartLoc();
8838 }
8839 return getInstLoc(Operands);
8840}
8841
8842SMLoc
8843AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8844 const OperandVector &Operands) const {
8845 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8846 return getOperandLoc(Test, Operands);
8847}
8848
8849ParseStatus
8850AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8851 if (!trySkipToken(AsmToken::LCurly))
8852 return ParseStatus::NoMatch;
8853
8854 bool First = true;
8855 while (!trySkipToken(AsmToken::RCurly)) {
8856 if (!First &&
8857 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8858 return ParseStatus::Failure;
8859
8860 StringRef Id = getTokenStr();
8861 SMLoc IdLoc = getLoc();
8862 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8863 !skipToken(AsmToken::Colon, "colon expected"))
8864 return ParseStatus::Failure;
8865
8866 const auto *I =
8867 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8868 if (I == Fields.end())
8869 return Error(IdLoc, "unknown field");
8870 if ((*I)->IsDefined)
8871 return Error(IdLoc, "duplicate field");
8872
8873 // TODO: Support symbolic values.
8874 (*I)->Loc = getLoc();
8875 if (!parseExpr((*I)->Val))
8876 return ParseStatus::Failure;
8877 (*I)->IsDefined = true;
8878
8879 First = false;
8880 }
8881 return ParseStatus::Success;
8882}
8883
8884bool AMDGPUAsmParser::validateStructuredOpFields(
8886 return all_of(Fields, [this](const StructuredOpField *F) {
8887 return F->validate(*this);
8888 });
8889}
8890
8891//===----------------------------------------------------------------------===//
8892// swizzle
8893//===----------------------------------------------------------------------===//
8894
8896static unsigned
8897encodeBitmaskPerm(const unsigned AndMask,
8898 const unsigned OrMask,
8899 const unsigned XorMask) {
8900 using namespace llvm::AMDGPU::Swizzle;
8901
8902 return BITMASK_PERM_ENC |
8903 (AndMask << BITMASK_AND_SHIFT) |
8904 (OrMask << BITMASK_OR_SHIFT) |
8905 (XorMask << BITMASK_XOR_SHIFT);
8906}
8907
8908bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8909 const unsigned MaxVal,
8910 const Twine &ErrMsg, SMLoc &Loc) {
8911 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8912 return false;
8913 }
8914 Loc = getLoc();
8915 if (!parseExpr(Op)) {
8916 return false;
8917 }
8918 if (Op < MinVal || Op > MaxVal) {
8919 Error(Loc, ErrMsg);
8920 return false;
8921 }
8922
8923 return true;
8924}
8925
8926bool
8927AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8928 const unsigned MinVal,
8929 const unsigned MaxVal,
8930 const StringRef ErrMsg) {
8931 SMLoc Loc;
8932 for (unsigned i = 0; i < OpNum; ++i) {
8933 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8934 return false;
8935 }
8936
8937 return true;
8938}
8939
8940bool
8941AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8942 using namespace llvm::AMDGPU::Swizzle;
8943
8944 int64_t Lane[LANE_NUM];
8945 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8946 "expected a 2-bit lane id")) {
8948 for (unsigned I = 0; I < LANE_NUM; ++I) {
8949 Imm |= Lane[I] << (LANE_SHIFT * I);
8950 }
8951 return true;
8952 }
8953 return false;
8954}
8955
8956bool
8957AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8958 using namespace llvm::AMDGPU::Swizzle;
8959
8960 SMLoc Loc;
8961 int64_t GroupSize;
8962 int64_t LaneIdx;
8963
8964 if (!parseSwizzleOperand(GroupSize,
8965 2, 32,
8966 "group size must be in the interval [2,32]",
8967 Loc)) {
8968 return false;
8969 }
8970 if (!isPowerOf2_64(GroupSize)) {
8971 Error(Loc, "group size must be a power of two");
8972 return false;
8973 }
8974 if (parseSwizzleOperand(LaneIdx,
8975 0, GroupSize - 1,
8976 "lane id must be in the interval [0,group size - 1]",
8977 Loc)) {
8978 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8979 return true;
8980 }
8981 return false;
8982}
8983
8984bool
8985AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8986 using namespace llvm::AMDGPU::Swizzle;
8987
8988 SMLoc Loc;
8989 int64_t GroupSize;
8990
8991 if (!parseSwizzleOperand(GroupSize,
8992 2, 32,
8993 "group size must be in the interval [2,32]",
8994 Loc)) {
8995 return false;
8996 }
8997 if (!isPowerOf2_64(GroupSize)) {
8998 Error(Loc, "group size must be a power of two");
8999 return false;
9000 }
9001
9002 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
9003 return true;
9004}
9005
9006bool
9007AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
9008 using namespace llvm::AMDGPU::Swizzle;
9009
9010 SMLoc Loc;
9011 int64_t GroupSize;
9012
9013 if (!parseSwizzleOperand(GroupSize,
9014 1, 16,
9015 "group size must be in the interval [1,16]",
9016 Loc)) {
9017 return false;
9018 }
9019 if (!isPowerOf2_64(GroupSize)) {
9020 Error(Loc, "group size must be a power of two");
9021 return false;
9022 }
9023
9024 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
9025 return true;
9026}
9027
9028bool
9029AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
9030 using namespace llvm::AMDGPU::Swizzle;
9031
9032 if (!skipToken(AsmToken::Comma, "expected a comma")) {
9033 return false;
9034 }
9035
9036 StringRef Ctl;
9037 SMLoc StrLoc = getLoc();
9038 if (!parseString(Ctl)) {
9039 return false;
9040 }
9041 if (Ctl.size() != BITMASK_WIDTH) {
9042 Error(StrLoc, "expected a 5-character mask");
9043 return false;
9044 }
9045
9046 unsigned AndMask = 0;
9047 unsigned OrMask = 0;
9048 unsigned XorMask = 0;
9049
9050 for (size_t i = 0; i < Ctl.size(); ++i) {
9051 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
9052 switch(Ctl[i]) {
9053 default:
9054 Error(StrLoc, "invalid mask");
9055 return false;
9056 case '0':
9057 break;
9058 case '1':
9059 OrMask |= Mask;
9060 break;
9061 case 'p':
9062 AndMask |= Mask;
9063 break;
9064 case 'i':
9065 AndMask |= Mask;
9066 XorMask |= Mask;
9067 break;
9068 }
9069 }
9070
9071 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
9072 return true;
9073}
9074
9075bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
9076 using namespace llvm::AMDGPU::Swizzle;
9077
9078 if (!AMDGPU::isGFX9Plus(getSTI())) {
9079 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
9080 return false;
9081 }
9082
9083 int64_t Swizzle;
9084 SMLoc Loc;
9085 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
9086 "FFT swizzle must be in the interval [0," +
9087 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
9088 Loc))
9089 return false;
9090
9091 Imm = FFT_MODE_ENC | Swizzle;
9092 return true;
9093}
9094
9095bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
9096 using namespace llvm::AMDGPU::Swizzle;
9097
9098 if (!AMDGPU::isGFX9Plus(getSTI())) {
9099 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
9100 return false;
9101 }
9102
9103 SMLoc Loc;
9104 int64_t Direction;
9105
9106 if (!parseSwizzleOperand(Direction, 0, 1,
9107 "direction must be 0 (left) or 1 (right)", Loc))
9108 return false;
9109
9110 int64_t RotateSize;
9111 if (!parseSwizzleOperand(
9112 RotateSize, 0, ROTATE_MAX_SIZE,
9113 "number of threads to rotate must be in the interval [0," +
9114 Twine(ROTATE_MAX_SIZE) + Twine(']'),
9115 Loc))
9116 return false;
9117
9119 (RotateSize << ROTATE_SIZE_SHIFT);
9120 return true;
9121}
9122
9123bool
9124AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
9125
9126 SMLoc OffsetLoc = getLoc();
9127
9128 if (!parseExpr(Imm, "a swizzle macro")) {
9129 return false;
9130 }
9131 if (!isUInt<16>(Imm)) {
9132 Error(OffsetLoc, "expected a 16-bit offset");
9133 return false;
9134 }
9135 return true;
9136}
9137
9138bool
9139AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
9140 using namespace llvm::AMDGPU::Swizzle;
9141
9142 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
9143
9144 SMLoc ModeLoc = getLoc();
9145 bool Ok = false;
9146
9147 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
9148 Ok = parseSwizzleQuadPerm(Imm);
9149 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
9150 Ok = parseSwizzleBitmaskPerm(Imm);
9151 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
9152 Ok = parseSwizzleBroadcast(Imm);
9153 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
9154 Ok = parseSwizzleSwap(Imm);
9155 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
9156 Ok = parseSwizzleReverse(Imm);
9157 } else if (trySkipId(IdSymbolic[ID_FFT])) {
9158 Ok = parseSwizzleFFT(Imm);
9159 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
9160 Ok = parseSwizzleRotate(Imm);
9161 } else {
9162 Error(ModeLoc, "expected a swizzle mode");
9163 }
9164
9165 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
9166 }
9167
9168 return false;
9169}
9170
9171ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
9172 SMLoc S = getLoc();
9173 int64_t Imm = 0;
9174
9175 if (trySkipId("offset")) {
9176
9177 bool Ok = false;
9178 if (skipToken(AsmToken::Colon, "expected a colon")) {
9179 if (trySkipId("swizzle")) {
9180 Ok = parseSwizzleMacro(Imm);
9181 } else {
9182 Ok = parseSwizzleOffset(Imm);
9183 }
9184 }
9185
9186 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
9187
9189 }
9190 return ParseStatus::NoMatch;
9191}
9192
9193bool
9194AMDGPUOperand::isSwizzle() const {
9195 return isImmTy(ImmTySwizzle);
9196}
9197
9198//===----------------------------------------------------------------------===//
9199// VGPR Index Mode
9200//===----------------------------------------------------------------------===//
9201
9202int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
9203
9204 using namespace llvm::AMDGPU::VGPRIndexMode;
9205
9206 if (trySkipToken(AsmToken::RParen)) {
9207 return OFF;
9208 }
9209
9210 int64_t Imm = 0;
9211
9212 while (true) {
9213 unsigned Mode = 0;
9214 SMLoc S = getLoc();
9215
9216 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
9217 if (trySkipId(IdSymbolic[ModeId])) {
9218 Mode = 1 << ModeId;
9219 break;
9220 }
9221 }
9222
9223 if (Mode == 0) {
9224 Error(S, (Imm == 0)?
9225 "expected a VGPR index mode or a closing parenthesis" :
9226 "expected a VGPR index mode");
9227 return UNDEF;
9228 }
9229
9230 if (Imm & Mode) {
9231 Error(S, "duplicate VGPR index mode");
9232 return UNDEF;
9233 }
9234 Imm |= Mode;
9235
9236 if (trySkipToken(AsmToken::RParen))
9237 break;
9238 if (!skipToken(AsmToken::Comma,
9239 "expected a comma or a closing parenthesis"))
9240 return UNDEF;
9241 }
9242
9243 return Imm;
9244}
9245
9246ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
9247
9248 using namespace llvm::AMDGPU::VGPRIndexMode;
9249
9250 int64_t Imm = 0;
9251 SMLoc S = getLoc();
9252
9253 if (trySkipId("gpr_idx", AsmToken::LParen)) {
9254 Imm = parseGPRIdxMacro();
9255 if (Imm == UNDEF)
9256 return ParseStatus::Failure;
9257 } else {
9258 if (getParser().parseAbsoluteExpression(Imm))
9259 return ParseStatus::Failure;
9260 if (Imm < 0 || !isUInt<4>(Imm))
9261 return Error(S, "invalid immediate: only 4-bit values are legal");
9262 }
9263
9264 Operands.push_back(
9265 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
9266 return ParseStatus::Success;
9267}
9268
9269bool AMDGPUOperand::isGPRIdxMode() const {
9270 return isImmTy(ImmTyGprIdxMode);
9271}
9272
9273//===----------------------------------------------------------------------===//
9274// sopp branch targets
9275//===----------------------------------------------------------------------===//
9276
9277ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
9278
9279 // Make sure we are not parsing something
9280 // that looks like a label or an expression but is not.
9281 // This will improve error messages.
9282 if (isRegister() || isModifier())
9283 return ParseStatus::NoMatch;
9284
9285 if (!parseExpr(Operands))
9286 return ParseStatus::Failure;
9287
9288 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
9289 assert(Opr.isImm() || Opr.isExpr());
9290 SMLoc Loc = Opr.getStartLoc();
9291
9292 // Currently we do not support arbitrary expressions as branch targets.
9293 // Only labels and absolute expressions are accepted.
9294 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9295 Error(Loc, "expected an absolute expression or a label");
9296 } else if (Opr.isImm() && !Opr.isS16Imm()) {
9297 Error(Loc, "expected a 16-bit signed jump offset");
9298 }
9299
9300 return ParseStatus::Success;
9301}
9302
9303//===----------------------------------------------------------------------===//
9304// Boolean holding registers
9305//===----------------------------------------------------------------------===//
9306
9307ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
9308 return parseReg(Operands);
9309}
9310
9311//===----------------------------------------------------------------------===//
9312// mubuf
9313//===----------------------------------------------------------------------===//
9314
9315void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9316 const OperandVector &Operands,
9317 bool IsAtomic) {
9318 OptionalImmIndexMap OptionalIdx;
9319 unsigned FirstOperandIdx = 1;
9320 bool IsAtomicReturn = false;
9321
9322 if (IsAtomic) {
9323 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
9325 }
9326
9327 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
9328 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9329
9330 // Add the register arguments
9331 if (Op.isReg()) {
9332 Op.addRegOperands(Inst, 1);
9333 // Insert a tied src for atomic return dst.
9334 // This cannot be postponed as subsequent calls to
9335 // addImmOperands rely on correct number of MC operands.
9336 if (IsAtomicReturn && i == FirstOperandIdx)
9337 Op.addRegOperands(Inst, 1);
9338 continue;
9339 }
9340
9341 // Handle the case where soffset is an immediate
9342 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9343 Op.addImmOperands(Inst, 1);
9344 continue;
9345 }
9346
9347 // Handle tokens like 'offen' which are sometimes hard-coded into the
9348 // asm string. There are no MCInst operands for these.
9349 if (Op.isToken()) {
9350 continue;
9351 }
9352 assert(Op.isImm());
9353
9354 // Handle optional arguments
9355 OptionalIdx[Op.getImmTy()] = i;
9356 }
9357
9358 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
9359 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
9360 // Parse a dummy operand as a placeholder for the SWZ operand. This enforces
9361 // agreement between MCInstrDesc.getNumOperands and MCInst.getNumOperands.
9363}
9364
9365//===----------------------------------------------------------------------===//
9366// smrd
9367//===----------------------------------------------------------------------===//
9368
9369bool AMDGPUOperand::isSMRDOffset8() const {
9370 return isImmLiteral() && isUInt<8>(getImm());
9371}
9372
9373bool AMDGPUOperand::isSMEMOffset() const {
9374 // Offset range is checked later by validator.
9375 return isImmLiteral();
9376}
9377
9378bool AMDGPUOperand::isSMRDLiteralOffset() const {
9379 // 32-bit literals are only supported on CI and we only want to use them
9380 // when the offset is > 8-bits.
9381 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
9382}
9383
9384//===----------------------------------------------------------------------===//
9385// vop3
9386//===----------------------------------------------------------------------===//
9387
9388static bool ConvertOmodMul(int64_t &Mul) {
9389 if (Mul != 1 && Mul != 2 && Mul != 4)
9390 return false;
9391
9392 Mul >>= 1;
9393 return true;
9394}
9395
9396static bool ConvertOmodDiv(int64_t &Div) {
9397 if (Div == 1) {
9398 Div = 0;
9399 return true;
9400 }
9401
9402 if (Div == 2) {
9403 Div = 3;
9404 return true;
9405 }
9406
9407 return false;
9408}
9409
9410// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9411// This is intentional and ensures compatibility with sp3.
9412// See bug 35397 for details.
9413bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9414 if (BoundCtrl == 0 || BoundCtrl == 1) {
9415 if (!isGFX11Plus())
9416 BoundCtrl = 1;
9417 return true;
9418 }
9419 return false;
9420}
9421
9422void AMDGPUAsmParser::onBeginOfFile() {
9423 if (!getParser().getStreamer().getTargetStreamer() ||
9424 getSTI().getTargetTriple().getArch() == Triple::r600)
9425 return;
9426
9427 if (!getTargetStreamer().getTargetID())
9428 getTargetStreamer().initializeTargetID(getSTI(),
9429 getSTI().getFeatureString());
9430
9431 if (isHsaAbi(getSTI()))
9432 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9433}
9434
9435/// Parse AMDGPU specific expressions.
9436///
9437/// expr ::= or(expr, ...) |
9438/// max(expr, ...) |
9439/// min(expr, ...)
9440///
9441bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9442 using AGVK = AMDGPUMCExpr::VariantKind;
9443
9444 if (isToken(AsmToken::Identifier)) {
9445 StringRef TokenId = getTokenStr();
9446 AGVK VK = StringSwitch<AGVK>(TokenId)
9447 .Case("max", AGVK::AGVK_Max)
9448 .Case("min", AGVK::AGVK_Min)
9449 .Case("or", AGVK::AGVK_Or)
9450 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
9451 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9452 .Case("alignto", AGVK::AGVK_AlignTo)
9453 .Case("occupancy", AGVK::AGVK_Occupancy)
9454 .Case("instprefsize", AGVK::AGVK_InstPrefSize)
9455 .Default(AGVK::AGVK_None);
9456
9457 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
9459 uint64_t CommaCount = 0;
9460 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9461 lex(); // Eat '('
9462 while (true) {
9463 if (trySkipToken(AsmToken::RParen)) {
9464 if (Exprs.empty()) {
9465 Error(getToken().getLoc(),
9466 "empty " + Twine(TokenId) + " expression");
9467 return true;
9468 }
9469 if (CommaCount + 1 != Exprs.size()) {
9470 Error(getToken().getLoc(),
9471 "mismatch of commas in " + Twine(TokenId) + " expression");
9472 return true;
9473 }
9474 if (unsigned Expected = AMDGPUMCExpr::getNumExpectedArgs(VK);
9475 Expected && Exprs.size() != Expected) {
9476 Error(getToken().getLoc(), Twine(TokenId) + " expression expects " +
9477 Twine(Expected) + " operands");
9478 return true;
9479 }
9480 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
9481 return false;
9482 }
9483 const MCExpr *Expr;
9484 if (getParser().parseExpression(Expr, EndLoc))
9485 return true;
9486 Exprs.push_back(Expr);
9487 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
9488 if (LastTokenWasComma)
9489 CommaCount++;
9490 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
9491 Error(getToken().getLoc(),
9492 "unexpected token in " + Twine(TokenId) + " expression");
9493 return true;
9494 }
9495 }
9496 }
9497 }
9498 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
9499}
9500
9501ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9502 StringRef Name = getTokenStr();
9503 if (Name == "mul") {
9504 return parseIntWithPrefix("mul", Operands,
9505 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
9506 }
9507
9508 if (Name == "div") {
9509 return parseIntWithPrefix("div", Operands,
9510 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
9511 }
9512
9513 return ParseStatus::NoMatch;
9514}
9515
9516// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9517// the number of src operands present, then copies that bit into src0_modifiers.
9518static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9519 int Opc = Inst.getOpcode();
9520 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9521 if (OpSelIdx == -1)
9522 return;
9523
9524 int SrcNum;
9525 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9526 AMDGPU::OpName::src2};
9527 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
9528 ++SrcNum)
9529 ;
9530 assert(SrcNum > 0);
9531
9532 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9533
9534 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
9535 if (DstIdx == -1)
9536 return;
9537
9538 const MCOperand &DstOp = Inst.getOperand(DstIdx);
9539 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
9540 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9541 if (DstOp.isReg() &&
9542 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
9543 if (AMDGPU::isHi16Reg(DstOp.getReg(), MRI))
9544 ModVal |= SISrcMods::DST_OP_SEL;
9545 } else {
9546 if ((OpSel & (1 << SrcNum)) != 0)
9547 ModVal |= SISrcMods::DST_OP_SEL;
9548 }
9549 Inst.getOperand(ModIdx).setImm(ModVal);
9550}
9551
9552void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9553 const OperandVector &Operands) {
9554 cvtVOP3P(Inst, Operands);
9555 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9556}
9557
9558void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9559 OptionalImmIndexMap &OptionalIdx) {
9560 cvtVOP3P(Inst, Operands, OptionalIdx);
9561 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9562}
9563
9564static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9565 return
9566 // 1. This operand is input modifiers
9567 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9568 // 2. This is not last operand
9569 && Desc.NumOperands > (OpNum + 1)
9570 // 3. Next operand is register class
9571 && Desc.operands()[OpNum + 1].RegClass != -1
9572 // 4. Next register is not tied to any other operand
9573 && Desc.getOperandConstraint(OpNum + 1,
9575}
9576
9577void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) {
9578 unsigned Opc = Inst.getOpcode();
9579 constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9580 AMDGPU::OpName::src2};
9581 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9582 AMDGPU::OpName::src1_modifiers,
9583 AMDGPU::OpName::src2_modifiers};
9584 for (int J = 0; J < 3; ++J) {
9585 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9586 if (OpIdx == -1)
9587 // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but
9588 // no src1. So continue instead of break.
9589 continue;
9590
9591 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9592 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9593
9594 if ((OpSel & (1 << J)) != 0)
9595 ModVal |= SISrcMods::OP_SEL_0;
9596 // op_sel[3] is encoded in src0_modifiers.
9597 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9598 ModVal |= SISrcMods::DST_OP_SEL;
9599
9600 Inst.getOperand(ModIdx).setImm(ModVal);
9601 }
9602}
9603
9604void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9605{
9606 OptionalImmIndexMap OptionalIdx;
9607 unsigned Opc = Inst.getOpcode();
9608
9609 unsigned I = 1;
9610 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9611 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9612 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9613 }
9614
9615 for (unsigned E = Operands.size(); I != E; ++I) {
9616 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9618 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9619 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9620 Op.isInterpAttrChan()) {
9621 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9622 } else if (Op.isImmModifier()) {
9623 OptionalIdx[Op.getImmTy()] = I;
9624 } else {
9625 llvm_unreachable("unhandled operand type");
9626 }
9627 }
9628
9629 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9630 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9631 AMDGPUOperand::ImmTyHigh);
9632
9633 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9634 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9635 AMDGPUOperand::ImmTyClamp);
9636
9637 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9638 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9639 AMDGPUOperand::ImmTyOModSI);
9640
9641 // Some v_interp instructions use op_sel[3] for dst.
9642 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9643 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9644 AMDGPUOperand::ImmTyOpSel);
9645 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9646 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9647
9648 cvtOpSelHelper(Inst, OpSel);
9649 }
9650}
9651
9652void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9653{
9654 OptionalImmIndexMap OptionalIdx;
9655 unsigned Opc = Inst.getOpcode();
9656
9657 unsigned I = 1;
9658 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9659 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9660 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9661 }
9662
9663 for (unsigned E = Operands.size(); I != E; ++I) {
9664 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9666 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9667 } else if (Op.isImmModifier()) {
9668 OptionalIdx[Op.getImmTy()] = I;
9669 } else {
9670 llvm_unreachable("unhandled operand type");
9671 }
9672 }
9673
9674 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9675
9676 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9677 if (OpSelIdx != -1)
9678 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9679
9680 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9681
9682 if (OpSelIdx == -1)
9683 return;
9684
9685 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9686 cvtOpSelHelper(Inst, OpSel);
9687}
9688
9689void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9690 const OperandVector &Operands) {
9691 OptionalImmIndexMap OptionalIdx;
9692 unsigned Opc = Inst.getOpcode();
9693 unsigned I = 1;
9694 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9695
9696 const MCInstrDesc &Desc = MII.get(Opc);
9697
9698 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9699 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9700
9701 for (unsigned E = Operands.size(); I != E; ++I) {
9702 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9703 int NumOperands = Inst.getNumOperands();
9704 // The order of operands in MCInst and parsed operands are different.
9705 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9706 // indices for parsing scale values correctly.
9707 if (NumOperands == CbszOpIdx) {
9710 }
9711 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9712 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9713 } else if (Op.isImmModifier()) {
9714 OptionalIdx[Op.getImmTy()] = I;
9715 } else {
9716 Op.addRegOrImmOperands(Inst, 1);
9717 }
9718 }
9719
9720 // Insert CBSZ and BLGP operands for F8F6F4 variants
9721 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9722 if (CbszIdx != OptionalIdx.end()) {
9723 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9724 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9725 }
9726
9727 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9728 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9729 if (BlgpIdx != OptionalIdx.end()) {
9730 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9731 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9732 }
9733
9734 // Add dummy src_modifiers
9737
9738 // Handle op_sel fields
9739
9740 unsigned OpSel = 0;
9741 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9742 if (OpselIdx != OptionalIdx.end()) {
9743 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9744 .getImm();
9745 }
9746
9747 unsigned OpSelHi = 0;
9748 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9749 if (OpselHiIdx != OptionalIdx.end()) {
9750 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9751 .getImm();
9752 }
9753 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9754 AMDGPU::OpName::src1_modifiers};
9755
9756 for (unsigned J = 0; J < 2; ++J) {
9757 unsigned ModVal = 0;
9758 if (OpSel & (1 << J))
9759 ModVal |= SISrcMods::OP_SEL_0;
9760 if (OpSelHi & (1 << J))
9761 ModVal |= SISrcMods::OP_SEL_1;
9762
9763 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9764 Inst.getOperand(ModIdx).setImm(ModVal);
9765 }
9766}
9767
9768void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9769 OptionalImmIndexMap &OptionalIdx) {
9770 unsigned Opc = Inst.getOpcode();
9771
9772 unsigned I = 1;
9773 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9774 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9775 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9776 }
9777
9778 for (unsigned E = Operands.size(); I != E; ++I) {
9779 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9781 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9782 } else if (Op.isImmModifier()) {
9783 OptionalIdx[Op.getImmTy()] = I;
9784 } else {
9785 Op.addRegOrImmOperands(Inst, 1);
9786 }
9787 }
9788
9789 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))
9790 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9791 AMDGPUOperand::ImmTyScaleSel);
9792
9793 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9794 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9795 AMDGPUOperand::ImmTyClamp);
9796
9797 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9798 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9799 Inst.addOperand(Inst.getOperand(0));
9800 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9801 AMDGPUOperand::ImmTyByteSel);
9802 }
9803
9804 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9805 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9806 AMDGPUOperand::ImmTyOModSI);
9807
9808 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9809 // it has src2 register operand that is tied to dst operand
9810 // we don't allow modifiers for this operand in assembler so src2_modifiers
9811 // should be 0.
9812 if (isMAC(Opc)) {
9813 auto *it = Inst.begin();
9814 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9815 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9816 ++it;
9817 // Copy the operand to ensure it's not invalidated when Inst grows.
9818 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9819 }
9820}
9821
9822void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9823 OptionalImmIndexMap OptionalIdx;
9824 cvtVOP3(Inst, Operands, OptionalIdx);
9825}
9826
9827void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9828 OptionalImmIndexMap &OptIdx) {
9829 const int Opc = Inst.getOpcode();
9830 const MCInstrDesc &Desc = MII.get(Opc);
9831
9832 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9833
9834 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9835 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9836 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9837 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9838 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx11 ||
9839 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx11 ||
9840 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9841 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12 ||
9842 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx13 ||
9843 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx13) {
9844 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9845 Inst.addOperand(Inst.getOperand(0));
9846 }
9847
9848 // Append vdst_in only if a previous converter (cvtVOP3DPP for DPP variants,
9849 // cvtVOP3 for byte_sel variants) hasn't already placed it. Use the position
9850 // of the named operand to detect that, the same way cvtVOP3DPP does
9851 // internally.
9852 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9853 if (VdstInIdx != -1 && VdstInIdx == static_cast<int>(Inst.getNumOperands()))
9854 Inst.addOperand(Inst.getOperand(0));
9855
9856 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9857 if (BitOp3Idx != -1) {
9858 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9859 }
9860
9861 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9862 // instruction, and then figure out where to actually put the modifiers
9863
9864 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9865 if (OpSelIdx != -1) {
9866 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9867 }
9868
9869 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9870 if (OpSelHiIdx != -1) {
9871 int DefaultVal = IsPacked ? -1 : 0;
9872 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9873 DefaultVal);
9874 }
9875
9876 int MatrixAFMTIdx =
9877 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
9878 if (MatrixAFMTIdx != -1) {
9879 addOptionalImmOperand(Inst, Operands, OptIdx,
9880 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9881 }
9882
9883 int MatrixBFMTIdx =
9884 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
9885 if (MatrixBFMTIdx != -1) {
9886 addOptionalImmOperand(Inst, Operands, OptIdx,
9887 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9888 }
9889
9890 int MatrixAScaleIdx =
9891 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
9892 if (MatrixAScaleIdx != -1) {
9893 addOptionalImmOperand(Inst, Operands, OptIdx,
9894 AMDGPUOperand::ImmTyMatrixAScale, 0);
9895 }
9896
9897 int MatrixBScaleIdx =
9898 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
9899 if (MatrixBScaleIdx != -1) {
9900 addOptionalImmOperand(Inst, Operands, OptIdx,
9901 AMDGPUOperand::ImmTyMatrixBScale, 0);
9902 }
9903
9904 int MatrixAScaleFmtIdx =
9905 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9906 if (MatrixAScaleFmtIdx != -1) {
9907 addOptionalImmOperand(Inst, Operands, OptIdx,
9908 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9909 }
9910
9911 int MatrixBScaleFmtIdx =
9912 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9913 if (MatrixBScaleFmtIdx != -1) {
9914 addOptionalImmOperand(Inst, Operands, OptIdx,
9915 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9916 }
9917
9918 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9919 addOptionalImmOperand(Inst, Operands, OptIdx,
9920 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9921
9922 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9923 addOptionalImmOperand(Inst, Operands, OptIdx,
9924 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9925
9926 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9927 if (NegLoIdx != -1)
9928 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9929
9930 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9931 if (NegHiIdx != -1)
9932 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9933
9934 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9935 AMDGPU::OpName::src2};
9936 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9937 AMDGPU::OpName::src1_modifiers,
9938 AMDGPU::OpName::src2_modifiers};
9939
9940 unsigned OpSel = 0;
9941 unsigned OpSelHi = 0;
9942 unsigned NegLo = 0;
9943 unsigned NegHi = 0;
9944
9945 if (OpSelIdx != -1)
9946 OpSel = Inst.getOperand(OpSelIdx).getImm();
9947
9948 if (OpSelHiIdx != -1)
9949 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9950
9951 if (NegLoIdx != -1)
9952 NegLo = Inst.getOperand(NegLoIdx).getImm();
9953
9954 if (NegHiIdx != -1)
9955 NegHi = Inst.getOperand(NegHiIdx).getImm();
9956
9957 for (int J = 0; J < 3; ++J) {
9958 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9959 if (OpIdx == -1)
9960 break;
9961
9962 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9963
9964 if (ModIdx == -1)
9965 continue;
9966
9967 uint32_t ModVal = 0;
9968
9969 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9970 if (SrcOp.isReg() && getMRI()
9971 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9972 .contains(SrcOp.getReg())) {
9973 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9974 if (VGPRSuffixIsHi)
9975 ModVal |= SISrcMods::OP_SEL_0;
9976 } else {
9977 if ((OpSel & (1 << J)) != 0)
9978 ModVal |= SISrcMods::OP_SEL_0;
9979 }
9980
9981 if ((OpSelHi & (1 << J)) != 0)
9982 ModVal |= SISrcMods::OP_SEL_1;
9983
9984 if ((NegLo & (1 << J)) != 0)
9985 ModVal |= SISrcMods::NEG;
9986
9987 if ((NegHi & (1 << J)) != 0)
9988 ModVal |= SISrcMods::NEG_HI;
9989
9990 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9991 }
9992}
9993
9994void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9995 OptionalImmIndexMap OptIdx;
9996 cvtVOP3(Inst, Operands, OptIdx);
9997 cvtVOP3P(Inst, Operands, OptIdx);
9998}
9999
10001 unsigned i, unsigned Opc,
10002 AMDGPU::OpName OpName) {
10003 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
10004 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
10005 else
10006 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
10007}
10008
10009void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
10010 unsigned Opc = Inst.getOpcode();
10011
10012 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
10013 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
10014 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
10015 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
10016 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
10017
10018 OptionalImmIndexMap OptIdx;
10019 for (unsigned i = 5; i < Operands.size(); ++i) {
10020 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
10021 OptIdx[Op.getImmTy()] = i;
10022 }
10023
10024 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
10025 addOptionalImmOperand(Inst, Operands, OptIdx,
10026 AMDGPUOperand::ImmTyIndexKey8bit);
10027
10028 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
10029 addOptionalImmOperand(Inst, Operands, OptIdx,
10030 AMDGPUOperand::ImmTyIndexKey16bit);
10031
10032 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
10033 addOptionalImmOperand(Inst, Operands, OptIdx,
10034 AMDGPUOperand::ImmTyIndexKey32bit);
10035
10036 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10037 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
10038
10039 cvtVOP3P(Inst, Operands, OptIdx);
10040}
10041
10042//===----------------------------------------------------------------------===//
10043// VOPD
10044//===----------------------------------------------------------------------===//
10045
10046ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
10047 if (!hasVOPD(getSTI()))
10048 return ParseStatus::NoMatch;
10049
10050 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
10051 SMLoc S = getLoc();
10052 lex();
10053 lex();
10054 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
10055 SMLoc OpYLoc = getLoc();
10056 StringRef OpYName;
10057 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
10058 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
10059 return ParseStatus::Success;
10060 }
10061 return Error(OpYLoc, "expected a VOPDY instruction after ::");
10062 }
10063 return ParseStatus::NoMatch;
10064}
10065
10066// Create VOPD MCInst operands using parsed assembler operands.
10067void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
10068 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10069
10070 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
10071 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
10073 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10074 return;
10075 }
10076 if (Op.isReg()) {
10077 Op.addRegOperands(Inst, 1);
10078 return;
10079 }
10080 if (Op.isImm()) {
10081 Op.addImmOperands(Inst, 1);
10082 return;
10083 }
10084 llvm_unreachable("Unhandled operand type in cvtVOPD");
10085 };
10086
10087 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
10088
10089 // MCInst operands are ordered as follows:
10090 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
10091
10092 for (auto CompIdx : VOPD::COMPONENTS) {
10093 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
10094 }
10095
10096 for (auto CompIdx : VOPD::COMPONENTS) {
10097 const auto &CInfo = InstInfo[CompIdx];
10098 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
10099 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
10100 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
10101 if (CInfo.hasSrc2Acc())
10102 addOp(CInfo.getIndexOfDstInParsedOperands());
10103 }
10104
10105 int BitOp3Idx =
10106 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
10107 if (BitOp3Idx != -1) {
10108 OptionalImmIndexMap OptIdx;
10109 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
10110 if (Op.isImm())
10111 OptIdx[Op.getImmTy()] = Operands.size() - 1;
10112
10113 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
10114 }
10115}
10116
10117//===----------------------------------------------------------------------===//
10118// dpp
10119//===----------------------------------------------------------------------===//
10120
10121bool AMDGPUOperand::isDPP8() const {
10122 return isImmTy(ImmTyDPP8);
10123}
10124
10125bool AMDGPUOperand::isDPPCtrl() const {
10126 using namespace AMDGPU::DPP;
10127
10128 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
10129 if (result) {
10130 int64_t Imm = getImm();
10131 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
10132 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
10133 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
10134 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
10135 (Imm == DppCtrl::WAVE_SHL1) ||
10136 (Imm == DppCtrl::WAVE_ROL1) ||
10137 (Imm == DppCtrl::WAVE_SHR1) ||
10138 (Imm == DppCtrl::WAVE_ROR1) ||
10139 (Imm == DppCtrl::ROW_MIRROR) ||
10140 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
10141 (Imm == DppCtrl::BCAST15) ||
10142 (Imm == DppCtrl::BCAST31) ||
10143 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
10144 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
10145 }
10146 return false;
10147}
10148
10149//===----------------------------------------------------------------------===//
10150// mAI
10151//===----------------------------------------------------------------------===//
10152
10153bool AMDGPUOperand::isBLGP() const {
10154 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
10155}
10156
10157bool AMDGPUOperand::isS16Imm() const {
10158 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
10159}
10160
10161bool AMDGPUOperand::isU16Imm() const {
10162 return isImmLiteral() && isUInt<16>(getImm());
10163}
10164
10165//===----------------------------------------------------------------------===//
10166// dim
10167//===----------------------------------------------------------------------===//
10168
10169bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
10170 // We want to allow "dim:1D" etc.,
10171 // but the initial 1 is tokenized as an integer.
10172 std::string Token;
10173 if (isToken(AsmToken::Integer)) {
10174 SMLoc Loc = getToken().getEndLoc();
10175 Token = std::string(getTokenStr());
10176 lex();
10177 if (getLoc() != Loc)
10178 return false;
10179 }
10180
10181 StringRef Suffix;
10182 if (!parseId(Suffix))
10183 return false;
10184 Token += Suffix;
10185
10186 StringRef DimId = Token;
10187 DimId.consume_front("SQ_RSRC_IMG_");
10188
10189 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
10190 if (!DimInfo)
10191 return false;
10192
10193 Encoding = DimInfo->Encoding;
10194 return true;
10195}
10196
10197ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
10198 if (!isGFX10Plus())
10199 return ParseStatus::NoMatch;
10200
10201 SMLoc S = getLoc();
10202
10203 if (!trySkipId("dim", AsmToken::Colon))
10204 return ParseStatus::NoMatch;
10205
10206 unsigned Encoding;
10207 SMLoc Loc = getLoc();
10208 if (!parseDimId(Encoding))
10209 return Error(Loc, "invalid dim value");
10210
10211 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
10212 AMDGPUOperand::ImmTyDim));
10213 return ParseStatus::Success;
10214}
10215
10216//===----------------------------------------------------------------------===//
10217// dpp
10218//===----------------------------------------------------------------------===//
10219
10220ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
10221 SMLoc S = getLoc();
10222
10223 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
10224 return ParseStatus::NoMatch;
10225
10226 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
10227
10228 int64_t Sels[8];
10229
10230 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
10231 return ParseStatus::Failure;
10232
10233 for (size_t i = 0; i < 8; ++i) {
10234 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
10235 return ParseStatus::Failure;
10236
10237 SMLoc Loc = getLoc();
10238 if (getParser().parseAbsoluteExpression(Sels[i]))
10239 return ParseStatus::Failure;
10240 if (0 > Sels[i] || 7 < Sels[i])
10241 return Error(Loc, "expected a 3-bit value");
10242 }
10243
10244 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10245 return ParseStatus::Failure;
10246
10247 unsigned DPP8 = 0;
10248 for (size_t i = 0; i < 8; ++i)
10249 DPP8 |= (Sels[i] << (i * 3));
10250
10251 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
10252 return ParseStatus::Success;
10253}
10254
10255bool
10256AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
10257 const OperandVector &Operands) {
10258 if (Ctrl == "row_newbcast")
10259 return isGFX90A();
10260
10261 if (Ctrl == "row_share" ||
10262 Ctrl == "row_xmask")
10263 return isGFX10Plus();
10264
10265 if (Ctrl == "wave_shl" ||
10266 Ctrl == "wave_shr" ||
10267 Ctrl == "wave_rol" ||
10268 Ctrl == "wave_ror" ||
10269 Ctrl == "row_bcast")
10270 return isVI() || isGFX9();
10271
10272 return Ctrl == "row_mirror" ||
10273 Ctrl == "row_half_mirror" ||
10274 Ctrl == "quad_perm" ||
10275 Ctrl == "row_shl" ||
10276 Ctrl == "row_shr" ||
10277 Ctrl == "row_ror";
10278}
10279
10280int64_t
10281AMDGPUAsmParser::parseDPPCtrlPerm() {
10282 // quad_perm:[%d,%d,%d,%d]
10283
10284 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
10285 return -1;
10286
10287 int64_t Val = 0;
10288 for (int i = 0; i < 4; ++i) {
10289 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
10290 return -1;
10291
10292 int64_t Temp;
10293 SMLoc Loc = getLoc();
10294 if (getParser().parseAbsoluteExpression(Temp))
10295 return -1;
10296 if (Temp < 0 || Temp > 3) {
10297 Error(Loc, "expected a 2-bit value");
10298 return -1;
10299 }
10300
10301 Val += (Temp << i * 2);
10302 }
10303
10304 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10305 return -1;
10306
10307 return Val;
10308}
10309
10310int64_t
10311AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10312 using namespace AMDGPU::DPP;
10313
10314 // sel:%d
10315
10316 int64_t Val;
10317 SMLoc Loc = getLoc();
10318
10319 if (getParser().parseAbsoluteExpression(Val))
10320 return -1;
10321
10322 struct DppCtrlCheck {
10323 int64_t Ctrl;
10324 int Lo;
10325 int Hi;
10326 };
10327
10328 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
10329 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10330 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10331 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10332 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10333 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10334 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10335 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10336 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10337 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10338 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10339 .Default({-1, 0, 0});
10340
10341 bool Valid;
10342 if (Check.Ctrl == -1) {
10343 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
10344 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10345 } else {
10346 Valid = Check.Lo <= Val && Val <= Check.Hi;
10347 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
10348 }
10349
10350 if (!Valid) {
10351 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
10352 return -1;
10353 }
10354
10355 return Val;
10356}
10357
10358ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10359 using namespace AMDGPU::DPP;
10360
10361 if (!isToken(AsmToken::Identifier) ||
10362 !isSupportedDPPCtrl(getTokenStr(), Operands))
10363 return ParseStatus::NoMatch;
10364
10365 SMLoc S = getLoc();
10366 int64_t Val = -1;
10367 StringRef Ctrl;
10368
10369 parseId(Ctrl);
10370
10371 if (Ctrl == "row_mirror") {
10372 Val = DppCtrl::ROW_MIRROR;
10373 } else if (Ctrl == "row_half_mirror") {
10374 Val = DppCtrl::ROW_HALF_MIRROR;
10375 } else {
10376 if (skipToken(AsmToken::Colon, "expected a colon")) {
10377 if (Ctrl == "quad_perm") {
10378 Val = parseDPPCtrlPerm();
10379 } else {
10380 Val = parseDPPCtrlSel(Ctrl);
10381 }
10382 }
10383 }
10384
10385 if (Val == -1)
10386 return ParseStatus::Failure;
10387
10388 Operands.push_back(
10389 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10390 return ParseStatus::Success;
10391}
10392
10393void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10394 bool IsDPP8) {
10395 OptionalImmIndexMap OptionalIdx;
10396 unsigned Opc = Inst.getOpcode();
10397 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10398
10399 // MAC instructions are special because they have 'old'
10400 // operand which is not tied to dst (but assumed to be).
10401 // They also have dummy unused src2_modifiers.
10402 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
10403 int Src2ModIdx =
10404 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
10405 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10406 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
10407
10408 unsigned I = 1;
10409 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10410 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10411 }
10412
10413 int Fi = 0;
10414 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
10415 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10416 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx13 ||
10417 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10418 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx13 ||
10419 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10420 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx13 ||
10421 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
10422 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx13;
10423
10424 for (unsigned E = Operands.size(); I != E; ++I) {
10425
10426 if (IsMAC) {
10427 int NumOperands = Inst.getNumOperands();
10428 if (OldIdx == NumOperands) {
10429 // Handle old operand
10430 constexpr int DST_IDX = 0;
10431 Inst.addOperand(Inst.getOperand(DST_IDX));
10432 } else if (Src2ModIdx == NumOperands) {
10433 // Add unused dummy src2_modifiers
10435 }
10436 }
10437
10438 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10439 Inst.addOperand(Inst.getOperand(0));
10440 }
10441
10442 if (IsVOP3CvtSrDpp) {
10443 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10445 Inst.addOperand(MCOperand::createReg(MCRegister()));
10446 }
10447 }
10448
10449 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10451 if (TiedTo != -1) {
10452 assert((unsigned)TiedTo < Inst.getNumOperands());
10453 // handle tied old or src2 for MAC instructions
10454 Inst.addOperand(Inst.getOperand(TiedTo));
10455 }
10456 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10457 // Add the register arguments
10458 if (IsDPP8 && Op.isDppFI()) {
10459 Fi = Op.getImm();
10460 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10461 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10462 } else if (Op.isReg()) {
10463 Op.addRegOperands(Inst, 1);
10464 } else if (Op.isImm() &&
10465 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10466 Op.addImmOperands(Inst, 1);
10467 } else if (Op.isImm()) {
10468 OptionalIdx[Op.getImmTy()] = I;
10469 } else {
10470 llvm_unreachable("unhandled operand type");
10471 }
10472 }
10473
10474 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10475 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10476 AMDGPUOperand::ImmTyClamp);
10477
10478 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
10479 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10480 Inst.addOperand(Inst.getOperand(0));
10481 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10482 AMDGPUOperand::ImmTyByteSel);
10483 }
10484
10485 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10486 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
10487
10488 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10489 cvtVOP3P(Inst, Operands, OptionalIdx);
10490 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10491 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10492 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
10493 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
10494 }
10495
10496 if (IsDPP8) {
10497 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
10498 using namespace llvm::AMDGPU::DPP;
10499 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10500 } else {
10501 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
10502 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10503 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10504 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10505
10506 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
10507 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10508 AMDGPUOperand::ImmTyDppFI);
10509 }
10510}
10511
10512void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10513 OptionalImmIndexMap OptionalIdx;
10514
10515 unsigned I = 1;
10516 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10517 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10518 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10519 }
10520
10521 int Fi = 0;
10522 for (unsigned E = Operands.size(); I != E; ++I) {
10523 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10525 if (TiedTo != -1) {
10526 assert((unsigned)TiedTo < Inst.getNumOperands());
10527 // handle tied old or src2 for MAC instructions
10528 Inst.addOperand(Inst.getOperand(TiedTo));
10529 }
10530 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10531 // Add the register arguments
10532 if (Op.isReg() && validateVccOperand(Op.getReg())) {
10533 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10534 // Skip it.
10535 continue;
10536 }
10537
10538 if (IsDPP8) {
10539 if (Op.isDPP8()) {
10540 Op.addImmOperands(Inst, 1);
10541 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10542 Op.addRegWithFPInputModsOperands(Inst, 2);
10543 } else if (Op.isDppFI()) {
10544 Fi = Op.getImm();
10545 } else if (Op.isReg()) {
10546 Op.addRegOperands(Inst, 1);
10547 } else {
10548 llvm_unreachable("Invalid operand type");
10549 }
10550 } else {
10552 Op.addRegWithFPInputModsOperands(Inst, 2);
10553 } else if (Op.isReg()) {
10554 Op.addRegOperands(Inst, 1);
10555 } else if (Op.isDPPCtrl()) {
10556 Op.addImmOperands(Inst, 1);
10557 } else if (Op.isImm()) {
10558 // Handle optional arguments
10559 OptionalIdx[Op.getImmTy()] = I;
10560 } else {
10561 llvm_unreachable("Invalid operand type");
10562 }
10563 }
10564 }
10565
10566 if (IsDPP8) {
10567 using namespace llvm::AMDGPU::DPP;
10568 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10569 } else {
10570 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10571 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10572 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10573 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
10574 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10575 AMDGPUOperand::ImmTyDppFI);
10576 }
10577 }
10578}
10579
10580//===----------------------------------------------------------------------===//
10581// sdwa
10582//===----------------------------------------------------------------------===//
10583
10584ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10585 StringRef Prefix,
10586 AMDGPUOperand::ImmTy Type) {
10587 return parseStringOrIntWithPrefix(
10588 Operands, Prefix,
10589 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10590 Type);
10591}
10592
10593ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10594 return parseStringOrIntWithPrefix(
10595 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10596 AMDGPUOperand::ImmTySDWADstUnused);
10597}
10598
10599void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10600 cvtSDWA(Inst, Operands, SDWAInstType::VOP1);
10601}
10602
10603void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10604 cvtSDWA(Inst, Operands, SDWAInstType::VOP2);
10605}
10606
10607void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10608 cvtSDWA(Inst, Operands, SDWAInstType::VOP2, true, true);
10609}
10610
10611void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10612 cvtSDWA(Inst, Operands, SDWAInstType::VOP2, false, true);
10613}
10614
10615void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10616 cvtSDWA(Inst, Operands, SDWAInstType::VOPC, isVI());
10617}
10618
10619void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10620 SDWAInstType BasicInstType, bool SkipDstVcc,
10621 bool SkipSrcVcc) {
10622 using namespace llvm::AMDGPU::SDWA;
10623
10624 OptionalImmIndexMap OptionalIdx;
10625 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10626 bool SkippedVcc = false;
10627
10628 unsigned I = 1;
10629 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10630 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10631 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10632 }
10633
10634 for (unsigned E = Operands.size(); I != E; ++I) {
10635 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10636 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10637 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10638 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10639 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10640 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10641 // Skip VCC only if we didn't skip it on previous iteration.
10642 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10643 if (BasicInstType == SDWAInstType::VOP2 &&
10644 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10645 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10646 SkippedVcc = true;
10647 continue;
10648 }
10649 if (BasicInstType == SDWAInstType::VOPC && Inst.getNumOperands() == 0) {
10650 SkippedVcc = true;
10651 continue;
10652 }
10653 }
10655 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10656 } else if (Op.isImm()) {
10657 // Handle optional arguments
10658 OptionalIdx[Op.getImmTy()] = I;
10659 } else {
10660 llvm_unreachable("Invalid operand type");
10661 }
10662 SkippedVcc = false;
10663 }
10664
10665 const unsigned Opc = Inst.getOpcode();
10666 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10667 Opc != AMDGPU::V_NOP_sdwa_vi) {
10668 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10669 switch (BasicInstType) {
10670 case SDWAInstType::VOP1:
10671 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10672 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10673 AMDGPUOperand::ImmTyClamp, 0);
10674
10675 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10676 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10677 AMDGPUOperand::ImmTyOModSI, 0);
10678
10679 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10680 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10681 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10682
10683 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10684 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10685 AMDGPUOperand::ImmTySDWADstUnused,
10686 DstUnused::UNUSED_PRESERVE);
10687
10688 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10689 break;
10690
10691 case SDWAInstType::VOP2:
10692 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10693 AMDGPUOperand::ImmTyClamp, 0);
10694
10695 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10696 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10697
10698 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10699 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10700 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10701 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10702 break;
10703
10704 case SDWAInstType::VOPC:
10705 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10706 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10707 AMDGPUOperand::ImmTyClamp, 0);
10708 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10709 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10710 break;
10711 }
10712 }
10713
10714 // special case v_mac_{f16, f32}:
10715 // it has src2 register operand that is tied to dst operand
10716 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10717 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10718 auto *it = Inst.begin();
10719 std::advance(
10720 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10721 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10722 }
10723}
10724
10725/// Force static initialization.
10726extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10731
10732#define GET_MATCHER_IMPLEMENTATION
10733#define GET_MNEMONIC_SPELL_CHECKER
10734#define GET_MNEMONIC_CHECKER
10735#include "AMDGPUGenAsmMatcher.inc"
10736
10737ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10738 unsigned MCK) {
10739 switch (MCK) {
10740 case MCK_addr64:
10741 return parseTokenOp("addr64", Operands);
10742 case MCK_done:
10743 return parseNamedBit("done", Operands, AMDGPUOperand::ImmTyDone, true);
10744 case MCK_idxen:
10745 return parseTokenOp("idxen", Operands);
10746 case MCK_lds:
10747 return parseNamedBit("lds", Operands, AMDGPUOperand::ImmTyLDS,
10748 /*IgnoreNegative=*/true);
10749 case MCK_offen:
10750 return parseTokenOp("offen", Operands);
10751 case MCK_off:
10752 return parseTokenOp("off", Operands);
10753 case MCK_row_95_en:
10754 return parseNamedBit("row_en", Operands, AMDGPUOperand::ImmTyRowEn, true);
10755 case MCK_gds:
10756 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10757 case MCK_tfe:
10758 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10759 }
10760 return tryCustomParseOperand(Operands, MCK);
10761}
10762
10763// This function should be defined after auto-generated include so that we have
10764// MatchClassKind enum defined
10765unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10766 unsigned Kind) {
10767 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10768 // But MatchInstructionImpl() expects to meet token and fails to validate
10769 // operand. This method checks if we are given immediate operand but expect to
10770 // get corresponding token.
10771 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10772 switch (Kind) {
10773 case MCK_addr64:
10774 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10775 case MCK_gds:
10776 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10777 case MCK_lds:
10778 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10779 case MCK_idxen:
10780 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10781 case MCK_offen:
10782 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10783 case MCK_tfe:
10784 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10785 case MCK_done:
10786 return Operand.isDone() ? Match_Success : Match_InvalidOperand;
10787 case MCK_row_95_en:
10788 return Operand.isRowEn() ? Match_Success : Match_InvalidOperand;
10789 case MCK_SSrc_b32:
10790 // When operands have expression values, they will return true for isToken,
10791 // because it is not possible to distinguish between a token and an
10792 // expression at parse time. MatchInstructionImpl() will always try to
10793 // match an operand as a token, when isToken returns true, and when the
10794 // name of the expression is not a valid token, the match will fail,
10795 // so we need to handle it here.
10796 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10797 case MCK_SSrc_f32:
10798 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10799 case MCK_SOPPBrTarget:
10800 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10801 case MCK_VReg32OrOff:
10802 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10803 case MCK_InterpSlot:
10804 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10805 case MCK_InterpAttr:
10806 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10807 case MCK_InterpAttrChan:
10808 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10809 case MCK_SReg_64:
10810 case MCK_SReg_64_XEXEC:
10811 // Null is defined as a 32-bit register but
10812 // it should also be enabled with 64-bit operands or larger.
10813 // The following code enables it for SReg_64 and larger operands
10814 // used as source and destination. Remaining source
10815 // operands are handled in isInlinableImm.
10816 case MCK_SReg_96:
10817 case MCK_SReg_128:
10818 case MCK_SReg_256:
10819 case MCK_SReg_512:
10820 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10821 default:
10822 return Match_InvalidOperand;
10823 }
10824}
10825
10826//===----------------------------------------------------------------------===//
10827// endpgm
10828//===----------------------------------------------------------------------===//
10829
10830ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10831 SMLoc S = getLoc();
10832 int64_t Imm = 0;
10833
10834 if (!parseExpr(Imm)) {
10835 // The operand is optional, if not present default to 0
10836 Imm = 0;
10837 }
10838
10839 if (!isUInt<16>(Imm))
10840 return Error(S, "expected a 16-bit value");
10841
10842 Operands.push_back(
10843 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10844 return ParseStatus::Success;
10845}
10846
10847bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10848
10849//===----------------------------------------------------------------------===//
10850// Split Barrier
10851//===----------------------------------------------------------------------===//
10852
10853bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
#define Success
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
Enums shared between the AMDGPU backend (LLVM) and the ELF linker (LLD) for the .amdgpu....
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
#define X(NUM, ENUM, NAME)
Definition ELF.h:856
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_READNONE
Definition Compiler.h:317
#define LLVM_ABI
Definition Compiler.h:215
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
@ Default
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(GsymDataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:253
static bool hasFeature(StringRef Feature, const FeatureBitset &FeatureBits, ArrayRef< SubtargetFeatureKV > ProcFeatures)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:484
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
BinaryOperator * Mul
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static unsigned getNumExpectedArgs(VariantKind Kind)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
static std::optional< TargetID > parseTargetIDString(StringRef TargetIDDirective)
std::string toString() const
static const fltSemantics & IEEEsingle()
Definition APFloat.h:297
static const fltSemantics & BFloat()
Definition APFloat.h:296
static const fltSemantics & IEEEdouble()
Definition APFloat.h:298
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:345
static const fltSemantics & IEEEhalf()
Definition APFloat.h:295
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:361
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5901
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:130
size_t size() const
Get the array size.
Definition ArrayRef.h:141
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
Register getReg() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:342
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:352
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:427
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
SMLoc getLoc() const
Definition MCInst.h:208
void setLoc(SMLoc loc)
Definition MCInst.h:207
unsigned getOpcode() const
Definition MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232
void addOperand(const MCOperand Op)
Definition MCInst.h:215
iterator begin()
Definition MCInst.h:227
size_t size() const
Definition MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Definition MCInstrInfo.h:80
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
void setImm(int64_t Val)
Definition MCInst.h:89
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
bool isImm() const
Definition MCInst.h:66
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
const MCExpr * getExpr() const
Definition MCInst.h:118
bool isExpr() const
Definition MCInst.h:69
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool regsOverlap(MCRegister RegA, MCRegister RegB) const
Returns true if the two registers are equal or alias each other.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
void setRedefinable(bool Value)
Mark this symbol as redefinable.
Definition MCSymbol.h:210
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Definition Register.h:100
Represents a location in source code.
Definition SMLoc.h:22
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:35
constexpr const char * getPointer() const
Definition SMLoc.h:33
constexpr bool isValid() const
Definition SMLoc.h:28
SMLoc Start
Definition SMLoc.h:49
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:888
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:691
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:597
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:635
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:270
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:661
bool contains(StringRef key) const
Check if the set contains the given key.
Definition StringSet.h:60
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:212
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
static constexpr CustomOperand Operands[]
unsigned getAddressableNumSGPRs(const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
unsigned getLocalMemorySize(const MCSubtargetInfo &STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
constexpr const char *const ModMatrixFmt[]
constexpr const char *const ModMatrixScaleFmt[]
constexpr const char *const ModMatrixScale[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
FuncInfoFlags
Per-function flags packed into INFO_FLAGS entries.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
AMDGPU::TargetID TargetID
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
bool isValidWMMAScaleFmtCombination(unsigned AFmt, unsigned AScale, unsigned BFmt, unsigned BScale)
@ OPERAND_REG_IMM_V2FP64
Definition SIDefines.h:433
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:451
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:419
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:426
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:442
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:439
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:444
@ OPERAND_REG_IMM_V2INT64
Definition SIDefines.h:429
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:428
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:423
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:418
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:425
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:424
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:427
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:438
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:436
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:430
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:422
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:445
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:456
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:457
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:431
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:421
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:441
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:437
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:443
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:432
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:458
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:440
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:420
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:448
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
bool isGFX1250Plus(const MCSubtargetInfo &STI)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1438
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ OPERAND_IMMEDIATE
Definition MCInstrDesc.h:61
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
constexpr bool hasIntClamp(const T &...O)
Definition SIDefines.h:321
@ Valid
The data is already valid.
Context & getContext() const
Definition BasicBlock.h:99
bool isNull(StringRef S)
Definition YAMLTraits.h:571
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition Error.h:1129
@ Offset
Definition DWP.cpp:573
StringMapEntry< Value * > ValueName
Definition Value.h:56
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:206
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
LLVM_ABI void PrintError(const Twine &Msg)
Definition Error.cpp:104
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:362
Op::Description Desc
Target & getTheR600Target()
The target for R600 GPUs.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
Definition InstrProf.h:143
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:31
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
#define N
RegisterKind Kind
StringLiteral Name
void initDefault(const MCSubtargetInfo &STI, MCContext &Ctx, bool InitMCExpr=true)
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
SmallVector< std::pair< MCSymbol *, std::string >, 4 > IndirectCalls
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 8 > Calls
SmallVector< FuncInfo, 8 > Funcs
SmallVector< std::pair< MCSymbol *, std::string >, 4 > TypeIds
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 4 > Uses
Instruction set architecture version.
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...