LLVM 22.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/MC/MCSymbol.h"
46#include <optional>
47
48using namespace llvm;
49using namespace llvm::AMDGPU;
50using namespace llvm::amdhsa;
51
52namespace {
53
54class AMDGPUAsmParser;
55
56enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
57
58//===----------------------------------------------------------------------===//
59// Operand
60//===----------------------------------------------------------------------===//
61
62class AMDGPUOperand : public MCParsedAsmOperand {
63 enum KindTy {
64 Token,
65 Immediate,
66 Register,
67 Expression
68 } Kind;
69
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
72
73public:
74 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
75 : Kind(Kind_), AsmParser(AsmParser_) {}
76
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
78
79 struct Modifiers {
80 bool Abs = false;
81 bool Neg = false;
82 bool Sext = false;
83 LitModifier Lit = LitModifier::None;
84
85 bool hasFPModifiers() const { return Abs || Neg; }
86 bool hasIntModifiers() const { return Sext; }
87 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
88
89 int64_t getFPModifiersOperand() const {
90 int64_t Operand = 0;
91 Operand |= Abs ? SISrcMods::ABS : 0u;
92 Operand |= Neg ? SISrcMods::NEG : 0u;
93 return Operand;
94 }
95
96 int64_t getIntModifiersOperand() const {
97 int64_t Operand = 0;
98 Operand |= Sext ? SISrcMods::SEXT : 0u;
99 return Operand;
100 }
101
102 int64_t getModifiersOperand() const {
103 assert(!(hasFPModifiers() && hasIntModifiers())
104 && "fp and int modifiers should not be used simultaneously");
105 if (hasFPModifiers())
106 return getFPModifiersOperand();
107 if (hasIntModifiers())
108 return getIntModifiersOperand();
109 return 0;
110 }
111
112 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
113 };
114
115 enum ImmTy {
116 ImmTyNone,
117 ImmTyGDS,
118 ImmTyLDS,
119 ImmTyOffen,
120 ImmTyIdxen,
121 ImmTyAddr64,
122 ImmTyOffset,
123 ImmTyInstOffset,
124 ImmTyOffset0,
125 ImmTyOffset1,
126 ImmTySMEMOffsetMod,
127 ImmTyCPol,
128 ImmTyTFE,
129 ImmTyD16,
130 ImmTyClamp,
131 ImmTyOModSI,
132 ImmTySDWADstSel,
133 ImmTySDWASrc0Sel,
134 ImmTySDWASrc1Sel,
135 ImmTySDWADstUnused,
136 ImmTyDMask,
137 ImmTyDim,
138 ImmTyUNorm,
139 ImmTyDA,
140 ImmTyR128A16,
141 ImmTyA16,
142 ImmTyLWE,
143 ImmTyExpTgt,
144 ImmTyExpCompr,
145 ImmTyExpVM,
146 ImmTyFORMAT,
147 ImmTyHwreg,
148 ImmTyOff,
149 ImmTySendMsg,
150 ImmTyInterpSlot,
151 ImmTyInterpAttr,
152 ImmTyInterpAttrChan,
153 ImmTyOpSel,
154 ImmTyOpSelHi,
155 ImmTyNegLo,
156 ImmTyNegHi,
157 ImmTyIndexKey8bit,
158 ImmTyIndexKey16bit,
159 ImmTyIndexKey32bit,
160 ImmTyDPP8,
161 ImmTyDppCtrl,
162 ImmTyDppRowMask,
163 ImmTyDppBankMask,
164 ImmTyDppBoundCtrl,
165 ImmTyDppFI,
166 ImmTySwizzle,
167 ImmTyGprIdxMode,
168 ImmTyHigh,
169 ImmTyBLGP,
170 ImmTyCBSZ,
171 ImmTyABID,
172 ImmTyEndpgm,
173 ImmTyWaitVDST,
174 ImmTyWaitEXP,
175 ImmTyWaitVAVDst,
176 ImmTyWaitVMVSrc,
177 ImmTyBitOp3,
178 ImmTyMatrixAFMT,
179 ImmTyMatrixBFMT,
180 ImmTyMatrixAScale,
181 ImmTyMatrixBScale,
182 ImmTyMatrixAScaleFmt,
183 ImmTyMatrixBScaleFmt,
184 ImmTyMatrixAReuse,
185 ImmTyMatrixBReuse,
186 ImmTyScaleSel,
187 ImmTyByteSel,
188 };
189
190private:
191 struct TokOp {
192 const char *Data;
193 unsigned Length;
194 };
195
196 struct ImmOp {
197 int64_t Val;
198 ImmTy Type;
199 bool IsFPImm;
200 Modifiers Mods;
201 };
202
203 struct RegOp {
204 MCRegister RegNo;
205 Modifiers Mods;
206 };
207
208 union {
209 TokOp Tok;
210 ImmOp Imm;
211 RegOp Reg;
212 const MCExpr *Expr;
213 };
214
215 // The index of the associated MCInst operand.
216 mutable int MCOpIdx = -1;
217
218public:
219 bool isToken() const override { return Kind == Token; }
220
221 bool isSymbolRefExpr() const {
222 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
223 }
224
225 bool isImm() const override {
226 return Kind == Immediate;
227 }
228
229 bool isInlinableImm(MVT type) const;
230 bool isLiteralImm(MVT type) const;
231
232 bool isRegKind() const {
233 return Kind == Register;
234 }
235
236 bool isReg() const override {
237 return isRegKind() && !hasModifiers();
238 }
239
240 bool isRegOrInline(unsigned RCID, MVT type) const {
241 return isRegClass(RCID) || isInlinableImm(type);
242 }
243
244 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
245 return isRegOrInline(RCID, type) || isLiteralImm(type);
246 }
247
248 bool isRegOrImmWithInt16InputMods() const {
249 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
250 }
251
252 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
254 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
255 }
256
257 bool isRegOrImmWithInt32InputMods() const {
258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259 }
260
261 bool isRegOrInlineImmWithInt16InputMods() const {
262 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
263 }
264
265 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
266 return isRegOrInline(
267 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
268 }
269
270 bool isRegOrInlineImmWithInt32InputMods() const {
271 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
272 }
273
274 bool isRegOrImmWithInt64InputMods() const {
275 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
276 }
277
278 bool isRegOrImmWithFP16InputMods() const {
279 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
280 }
281
282 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
284 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
285 }
286
287 bool isRegOrImmWithFP32InputMods() const {
288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
289 }
290
291 bool isRegOrImmWithFP64InputMods() const {
292 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
293 }
294
295 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
296 return isRegOrInline(
297 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
298 }
299
300 bool isRegOrInlineImmWithFP32InputMods() const {
301 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
302 }
303
304 bool isRegOrInlineImmWithFP64InputMods() const {
305 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
306 }
307
308 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
309
310 bool isVRegWithFP32InputMods() const {
311 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
312 }
313
314 bool isVRegWithFP64InputMods() const {
315 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
316 }
317
318 bool isPackedFP16InputMods() const {
319 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
320 }
321
322 bool isPackedVGPRFP32InputMods() const {
323 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
324 }
325
326 bool isVReg() const {
327 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
328 isRegClass(AMDGPU::VReg_64RegClassID) ||
329 isRegClass(AMDGPU::VReg_96RegClassID) ||
330 isRegClass(AMDGPU::VReg_128RegClassID) ||
331 isRegClass(AMDGPU::VReg_160RegClassID) ||
332 isRegClass(AMDGPU::VReg_192RegClassID) ||
333 isRegClass(AMDGPU::VReg_256RegClassID) ||
334 isRegClass(AMDGPU::VReg_512RegClassID) ||
335 isRegClass(AMDGPU::VReg_1024RegClassID);
336 }
337
338 bool isVReg32() const {
339 return isRegClass(AMDGPU::VGPR_32RegClassID);
340 }
341
342 bool isVReg32OrOff() const {
343 return isOff() || isVReg32();
344 }
345
346 bool isNull() const {
347 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
348 }
349
350 bool isAV_LdSt_32_Align2_RegOp() const {
351 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
352 isRegClass(AMDGPU::AGPR_32RegClassID);
353 }
354
355 bool isVRegWithInputMods() const;
356 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
357 template <bool IsFake16> bool isT16VRegWithInputMods() const;
358
359 bool isSDWAOperand(MVT type) const;
360 bool isSDWAFP16Operand() const;
361 bool isSDWAFP32Operand() const;
362 bool isSDWAInt16Operand() const;
363 bool isSDWAInt32Operand() const;
364
365 bool isImmTy(ImmTy ImmT) const {
366 return isImm() && Imm.Type == ImmT;
367 }
368
369 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
370
371 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
372
373 bool isImmModifier() const {
374 return isImm() && Imm.Type != ImmTyNone;
375 }
376
377 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
378 bool isDim() const { return isImmTy(ImmTyDim); }
379 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
380 bool isOff() const { return isImmTy(ImmTyOff); }
381 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
382 bool isOffen() const { return isImmTy(ImmTyOffen); }
383 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
384 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
385 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
386 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
387 bool isGDS() const { return isImmTy(ImmTyGDS); }
388 bool isLDS() const { return isImmTy(ImmTyLDS); }
389 bool isCPol() const { return isImmTy(ImmTyCPol); }
390 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
391 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
392 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
393 bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
394 bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
395 bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
396 bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
397 bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
398 bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
399 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
400 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
401 bool isTFE() const { return isImmTy(ImmTyTFE); }
402 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
403 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
404 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
405 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
406 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
407 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
408 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
409 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
410 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
411 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
412 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
413 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
414 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
415 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
416
417 bool isRegOrImm() const {
418 return isReg() || isImm();
419 }
420
421 bool isRegClass(unsigned RCID) const;
422
423 bool isInlineValue() const;
424
425 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
426 return isRegOrInline(RCID, type) && !hasModifiers();
427 }
428
429 bool isSCSrcB16() const {
430 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
431 }
432
433 bool isSCSrcV2B16() const {
434 return isSCSrcB16();
435 }
436
437 bool isSCSrc_b32() const {
438 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
439 }
440
441 bool isSCSrc_b64() const {
442 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
443 }
444
445 bool isBoolReg() const;
446
447 bool isSCSrcF16() const {
448 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
449 }
450
451 bool isSCSrcV2F16() const {
452 return isSCSrcF16();
453 }
454
455 bool isSCSrcF32() const {
456 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
457 }
458
459 bool isSCSrcF64() const {
460 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
461 }
462
463 bool isSSrc_b32() const {
464 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
465 }
466
467 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
468
469 bool isSSrcV2B16() const {
470 llvm_unreachable("cannot happen");
471 return isSSrc_b16();
472 }
473
474 bool isSSrc_b64() const {
475 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
476 // See isVSrc64().
477 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
478 (((const MCTargetAsmParser *)AsmParser)
479 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
480 isExpr());
481 }
482
483 bool isSSrc_f32() const {
484 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
485 }
486
487 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
488
489 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
490
491 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
492
493 bool isSSrcV2F16() const {
494 llvm_unreachable("cannot happen");
495 return isSSrc_f16();
496 }
497
498 bool isSSrcV2FP32() const {
499 llvm_unreachable("cannot happen");
500 return isSSrc_f32();
501 }
502
503 bool isSCSrcV2FP32() const {
504 llvm_unreachable("cannot happen");
505 return isSCSrcF32();
506 }
507
508 bool isSSrcV2INT32() const {
509 llvm_unreachable("cannot happen");
510 return isSSrc_b32();
511 }
512
513 bool isSCSrcV2INT32() const {
514 llvm_unreachable("cannot happen");
515 return isSCSrc_b32();
516 }
517
518 bool isSSrcOrLds_b32() const {
519 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
520 isLiteralImm(MVT::i32) || isExpr();
521 }
522
523 bool isVCSrc_b32() const {
524 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
525 }
526
527 bool isVCSrc_b32_Lo256() const {
528 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
529 }
530
531 bool isVCSrc_b64_Lo256() const {
532 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
533 }
534
535 bool isVCSrc_b64() const {
536 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
537 }
538
539 bool isVCSrcT_b16() const {
540 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
541 }
542
543 bool isVCSrcTB16_Lo128() const {
544 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
545 }
546
547 bool isVCSrcFake16B16_Lo128() const {
548 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
549 }
550
551 bool isVCSrc_b16() const {
552 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
553 }
554
555 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
556
557 bool isVCSrc_f32() const {
558 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
559 }
560
561 bool isVCSrc_f64() const {
562 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
563 }
564
565 bool isVCSrcTBF16() const {
566 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
567 }
568
569 bool isVCSrcT_f16() const {
570 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
571 }
572
573 bool isVCSrcT_bf16() const {
574 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
575 }
576
577 bool isVCSrcTBF16_Lo128() const {
578 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
579 }
580
581 bool isVCSrcTF16_Lo128() const {
582 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
583 }
584
585 bool isVCSrcFake16BF16_Lo128() const {
586 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
587 }
588
589 bool isVCSrcFake16F16_Lo128() const {
590 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
591 }
592
593 bool isVCSrc_bf16() const {
594 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
595 }
596
597 bool isVCSrc_f16() const {
598 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
599 }
600
601 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
602
603 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
604
605 bool isVSrc_b32() const {
606 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
607 }
608
609 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
610
611 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
612
613 bool isVSrcT_b16_Lo128() const {
614 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
615 }
616
617 bool isVSrcFake16_b16_Lo128() const {
618 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
619 }
620
621 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
622
623 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
624
625 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
626
627 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
628
629 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
630
631 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
632
633 bool isVSrc_f32() const {
634 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
635 }
636
637 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
638
639 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
640
641 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
642
643 bool isVSrcT_bf16_Lo128() const {
644 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
645 }
646
647 bool isVSrcT_f16_Lo128() const {
648 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
649 }
650
651 bool isVSrcFake16_bf16_Lo128() const {
652 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
653 }
654
655 bool isVSrcFake16_f16_Lo128() const {
656 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
657 }
658
659 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
660
661 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
662
663 bool isVSrc_v2bf16() const {
664 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
665 }
666
667 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
668
669 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
670
671 bool isVISrcB32() const {
672 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
673 }
674
675 bool isVISrcB16() const {
676 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
677 }
678
679 bool isVISrcV2B16() const {
680 return isVISrcB16();
681 }
682
683 bool isVISrcF32() const {
684 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
685 }
686
687 bool isVISrcF16() const {
688 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
689 }
690
691 bool isVISrcV2F16() const {
692 return isVISrcF16() || isVISrcB32();
693 }
694
695 bool isVISrc_64_bf16() const {
696 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
697 }
698
699 bool isVISrc_64_f16() const {
700 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
701 }
702
703 bool isVISrc_64_b32() const {
704 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
705 }
706
707 bool isVISrc_64B64() const {
708 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
709 }
710
711 bool isVISrc_64_f64() const {
712 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
713 }
714
715 bool isVISrc_64V2FP32() const {
716 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
717 }
718
719 bool isVISrc_64V2INT32() const {
720 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
721 }
722
723 bool isVISrc_256_b32() const {
724 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
725 }
726
727 bool isVISrc_256_f32() const {
728 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
729 }
730
731 bool isVISrc_256B64() const {
732 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
733 }
734
735 bool isVISrc_256_f64() const {
736 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
737 }
738
739 bool isVISrc_512_f64() const {
740 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
741 }
742
743 bool isVISrc_128B16() const {
744 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
745 }
746
747 bool isVISrc_128V2B16() const {
748 return isVISrc_128B16();
749 }
750
751 bool isVISrc_128_b32() const {
752 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
753 }
754
755 bool isVISrc_128_f32() const {
756 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
757 }
758
759 bool isVISrc_256V2FP32() const {
760 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
761 }
762
763 bool isVISrc_256V2INT32() const {
764 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
765 }
766
767 bool isVISrc_512_b32() const {
768 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
769 }
770
771 bool isVISrc_512B16() const {
772 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
773 }
774
775 bool isVISrc_512V2B16() const {
776 return isVISrc_512B16();
777 }
778
779 bool isVISrc_512_f32() const {
780 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
781 }
782
783 bool isVISrc_512F16() const {
784 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
785 }
786
787 bool isVISrc_512V2F16() const {
788 return isVISrc_512F16() || isVISrc_512_b32();
789 }
790
791 bool isVISrc_1024_b32() const {
792 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
793 }
794
795 bool isVISrc_1024B16() const {
796 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
797 }
798
799 bool isVISrc_1024V2B16() const {
800 return isVISrc_1024B16();
801 }
802
803 bool isVISrc_1024_f32() const {
804 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
805 }
806
807 bool isVISrc_1024F16() const {
808 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
809 }
810
811 bool isVISrc_1024V2F16() const {
812 return isVISrc_1024F16() || isVISrc_1024_b32();
813 }
814
815 bool isAISrcB32() const {
816 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
817 }
818
819 bool isAISrcB16() const {
820 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
821 }
822
823 bool isAISrcV2B16() const {
824 return isAISrcB16();
825 }
826
827 bool isAISrcF32() const {
828 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
829 }
830
831 bool isAISrcF16() const {
832 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
833 }
834
835 bool isAISrcV2F16() const {
836 return isAISrcF16() || isAISrcB32();
837 }
838
839 bool isAISrc_64B64() const {
840 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
841 }
842
843 bool isAISrc_64_f64() const {
844 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
845 }
846
847 bool isAISrc_128_b32() const {
848 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
849 }
850
851 bool isAISrc_128B16() const {
852 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
853 }
854
855 bool isAISrc_128V2B16() const {
856 return isAISrc_128B16();
857 }
858
859 bool isAISrc_128_f32() const {
860 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
861 }
862
863 bool isAISrc_128F16() const {
864 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
865 }
866
867 bool isAISrc_128V2F16() const {
868 return isAISrc_128F16() || isAISrc_128_b32();
869 }
870
871 bool isVISrc_128_bf16() const {
872 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
873 }
874
875 bool isVISrc_128_f16() const {
876 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
877 }
878
879 bool isVISrc_128V2F16() const {
880 return isVISrc_128_f16() || isVISrc_128_b32();
881 }
882
883 bool isAISrc_256B64() const {
884 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
885 }
886
887 bool isAISrc_256_f64() const {
888 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
889 }
890
891 bool isAISrc_512_b32() const {
892 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
893 }
894
895 bool isAISrc_512B16() const {
896 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
897 }
898
899 bool isAISrc_512V2B16() const {
900 return isAISrc_512B16();
901 }
902
903 bool isAISrc_512_f32() const {
904 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
905 }
906
907 bool isAISrc_512F16() const {
908 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
909 }
910
911 bool isAISrc_512V2F16() const {
912 return isAISrc_512F16() || isAISrc_512_b32();
913 }
914
915 bool isAISrc_1024_b32() const {
916 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
917 }
918
919 bool isAISrc_1024B16() const {
920 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
921 }
922
923 bool isAISrc_1024V2B16() const {
924 return isAISrc_1024B16();
925 }
926
927 bool isAISrc_1024_f32() const {
928 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
929 }
930
931 bool isAISrc_1024F16() const {
932 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
933 }
934
935 bool isAISrc_1024V2F16() const {
936 return isAISrc_1024F16() || isAISrc_1024_b32();
937 }
938
939 bool isKImmFP32() const {
940 return isLiteralImm(MVT::f32);
941 }
942
943 bool isKImmFP16() const {
944 return isLiteralImm(MVT::f16);
945 }
946
947 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
948
949 bool isMem() const override {
950 return false;
951 }
952
953 bool isExpr() const {
954 return Kind == Expression;
955 }
956
957 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
958
959 bool isSWaitCnt() const;
960 bool isDepCtr() const;
961 bool isSDelayALU() const;
962 bool isHwreg() const;
963 bool isSendMsg() const;
964 bool isSplitBarrier() const;
965 bool isSwizzle() const;
966 bool isSMRDOffset8() const;
967 bool isSMEMOffset() const;
968 bool isSMRDLiteralOffset() const;
969 bool isDPP8() const;
970 bool isDPPCtrl() const;
971 bool isBLGP() const;
972 bool isGPRIdxMode() const;
973 bool isS16Imm() const;
974 bool isU16Imm() const;
975 bool isEndpgm() const;
976
977 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
978 return [this, P]() { return P(*this); };
979 }
980
981 StringRef getToken() const {
982 assert(isToken());
983 return StringRef(Tok.Data, Tok.Length);
984 }
985
986 int64_t getImm() const {
987 assert(isImm());
988 return Imm.Val;
989 }
990
991 void setImm(int64_t Val) {
992 assert(isImm());
993 Imm.Val = Val;
994 }
995
996 ImmTy getImmTy() const {
997 assert(isImm());
998 return Imm.Type;
999 }
1000
1001 MCRegister getReg() const override {
1002 assert(isRegKind());
1003 return Reg.RegNo;
1004 }
1005
1006 SMLoc getStartLoc() const override {
1007 return StartLoc;
1008 }
1009
1010 SMLoc getEndLoc() const override {
1011 return EndLoc;
1012 }
1013
1014 SMRange getLocRange() const {
1015 return SMRange(StartLoc, EndLoc);
1016 }
1017
1018 int getMCOpIdx() const { return MCOpIdx; }
1019
1020 Modifiers getModifiers() const {
1021 assert(isRegKind() || isImmTy(ImmTyNone));
1022 return isRegKind() ? Reg.Mods : Imm.Mods;
1023 }
1024
1025 void setModifiers(Modifiers Mods) {
1026 assert(isRegKind() || isImmTy(ImmTyNone));
1027 if (isRegKind())
1028 Reg.Mods = Mods;
1029 else
1030 Imm.Mods = Mods;
1031 }
1032
1033 bool hasModifiers() const {
1034 return getModifiers().hasModifiers();
1035 }
1036
1037 bool hasFPModifiers() const {
1038 return getModifiers().hasFPModifiers();
1039 }
1040
1041 bool hasIntModifiers() const {
1042 return getModifiers().hasIntModifiers();
1043 }
1044
1045 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1046
1047 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1048
1049 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1050
1051 void addRegOperands(MCInst &Inst, unsigned N) const;
1052
1053 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1054 if (isRegKind())
1055 addRegOperands(Inst, N);
1056 else
1057 addImmOperands(Inst, N);
1058 }
1059
1060 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1061 Modifiers Mods = getModifiers();
1062 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1063 if (isRegKind()) {
1064 addRegOperands(Inst, N);
1065 } else {
1066 addImmOperands(Inst, N, false);
1067 }
1068 }
1069
1070 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1071 assert(!hasIntModifiers());
1072 addRegOrImmWithInputModsOperands(Inst, N);
1073 }
1074
1075 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1076 assert(!hasFPModifiers());
1077 addRegOrImmWithInputModsOperands(Inst, N);
1078 }
1079
1080 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1081 Modifiers Mods = getModifiers();
1082 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1083 assert(isRegKind());
1084 addRegOperands(Inst, N);
1085 }
1086
1087 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1088 assert(!hasIntModifiers());
1089 addRegWithInputModsOperands(Inst, N);
1090 }
1091
1092 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1093 assert(!hasFPModifiers());
1094 addRegWithInputModsOperands(Inst, N);
1095 }
1096
1097 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1098 // clang-format off
1099 switch (Type) {
1100 case ImmTyNone: OS << "None"; break;
1101 case ImmTyGDS: OS << "GDS"; break;
1102 case ImmTyLDS: OS << "LDS"; break;
1103 case ImmTyOffen: OS << "Offen"; break;
1104 case ImmTyIdxen: OS << "Idxen"; break;
1105 case ImmTyAddr64: OS << "Addr64"; break;
1106 case ImmTyOffset: OS << "Offset"; break;
1107 case ImmTyInstOffset: OS << "InstOffset"; break;
1108 case ImmTyOffset0: OS << "Offset0"; break;
1109 case ImmTyOffset1: OS << "Offset1"; break;
1110 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1111 case ImmTyCPol: OS << "CPol"; break;
1112 case ImmTyIndexKey8bit: OS << "index_key"; break;
1113 case ImmTyIndexKey16bit: OS << "index_key"; break;
1114 case ImmTyIndexKey32bit: OS << "index_key"; break;
1115 case ImmTyTFE: OS << "TFE"; break;
1116 case ImmTyD16: OS << "D16"; break;
1117 case ImmTyFORMAT: OS << "FORMAT"; break;
1118 case ImmTyClamp: OS << "Clamp"; break;
1119 case ImmTyOModSI: OS << "OModSI"; break;
1120 case ImmTyDPP8: OS << "DPP8"; break;
1121 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1122 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1123 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1124 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1125 case ImmTyDppFI: OS << "DppFI"; break;
1126 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1127 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1128 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1129 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1130 case ImmTyDMask: OS << "DMask"; break;
1131 case ImmTyDim: OS << "Dim"; break;
1132 case ImmTyUNorm: OS << "UNorm"; break;
1133 case ImmTyDA: OS << "DA"; break;
1134 case ImmTyR128A16: OS << "R128A16"; break;
1135 case ImmTyA16: OS << "A16"; break;
1136 case ImmTyLWE: OS << "LWE"; break;
1137 case ImmTyOff: OS << "Off"; break;
1138 case ImmTyExpTgt: OS << "ExpTgt"; break;
1139 case ImmTyExpCompr: OS << "ExpCompr"; break;
1140 case ImmTyExpVM: OS << "ExpVM"; break;
1141 case ImmTyHwreg: OS << "Hwreg"; break;
1142 case ImmTySendMsg: OS << "SendMsg"; break;
1143 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1144 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1145 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1146 case ImmTyOpSel: OS << "OpSel"; break;
1147 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1148 case ImmTyNegLo: OS << "NegLo"; break;
1149 case ImmTyNegHi: OS << "NegHi"; break;
1150 case ImmTySwizzle: OS << "Swizzle"; break;
1151 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1152 case ImmTyHigh: OS << "High"; break;
1153 case ImmTyBLGP: OS << "BLGP"; break;
1154 case ImmTyCBSZ: OS << "CBSZ"; break;
1155 case ImmTyABID: OS << "ABID"; break;
1156 case ImmTyEndpgm: OS << "Endpgm"; break;
1157 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1158 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1159 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1160 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1161 case ImmTyBitOp3: OS << "BitOp3"; break;
1162 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1163 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1164 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1165 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1166 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1167 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1168 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1169 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1170 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1171 case ImmTyByteSel: OS << "ByteSel" ; break;
1172 }
1173 // clang-format on
1174 }
1175
1176 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1177 switch (Kind) {
1178 case Register:
1179 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1180 << " mods: " << Reg.Mods << '>';
1181 break;
1182 case Immediate:
1183 OS << '<' << getImm();
1184 if (getImmTy() != ImmTyNone) {
1185 OS << " type: "; printImmTy(OS, getImmTy());
1186 }
1187 OS << " mods: " << Imm.Mods << '>';
1188 break;
1189 case Token:
1190 OS << '\'' << getToken() << '\'';
1191 break;
1192 case Expression:
1193 OS << "<expr ";
1194 MAI.printExpr(OS, *Expr);
1195 OS << '>';
1196 break;
1197 }
1198 }
1199
1200 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1201 int64_t Val, SMLoc Loc,
1202 ImmTy Type = ImmTyNone,
1203 bool IsFPImm = false) {
1204 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1205 Op->Imm.Val = Val;
1206 Op->Imm.IsFPImm = IsFPImm;
1207 Op->Imm.Type = Type;
1208 Op->Imm.Mods = Modifiers();
1209 Op->StartLoc = Loc;
1210 Op->EndLoc = Loc;
1211 return Op;
1212 }
1213
1214 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1215 StringRef Str, SMLoc Loc,
1216 bool HasExplicitEncodingSize = true) {
1217 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1218 Res->Tok.Data = Str.data();
1219 Res->Tok.Length = Str.size();
1220 Res->StartLoc = Loc;
1221 Res->EndLoc = Loc;
1222 return Res;
1223 }
1224
1225 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1226 MCRegister Reg, SMLoc S, SMLoc E) {
1227 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1228 Op->Reg.RegNo = Reg;
1229 Op->Reg.Mods = Modifiers();
1230 Op->StartLoc = S;
1231 Op->EndLoc = E;
1232 return Op;
1233 }
1234
1235 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1236 const class MCExpr *Expr, SMLoc S) {
1237 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1238 Op->Expr = Expr;
1239 Op->StartLoc = S;
1240 Op->EndLoc = S;
1241 return Op;
1242 }
1243};
1244
1245raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1246 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1247 return OS;
1248}
1249
1250//===----------------------------------------------------------------------===//
1251// AsmParser
1252//===----------------------------------------------------------------------===//
1253
1254// TODO: define GET_SUBTARGET_FEATURE_NAME
1255#define GET_REGISTER_MATCHER
1256#include "AMDGPUGenAsmMatcher.inc"
1257#undef GET_REGISTER_MATCHER
1258#undef GET_SUBTARGET_FEATURE_NAME
1259
1260// Holds info related to the current kernel, e.g. count of SGPRs used.
1261// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1262// .amdgpu_hsa_kernel or at EOF.
1263class KernelScopeInfo {
1264 int SgprIndexUnusedMin = -1;
1265 int VgprIndexUnusedMin = -1;
1266 int AgprIndexUnusedMin = -1;
1267 MCContext *Ctx = nullptr;
1268 MCSubtargetInfo const *MSTI = nullptr;
1269
1270 void usesSgprAt(int i) {
1271 if (i >= SgprIndexUnusedMin) {
1272 SgprIndexUnusedMin = ++i;
1273 if (Ctx) {
1274 MCSymbol* const Sym =
1275 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1276 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1277 }
1278 }
1279 }
1280
1281 void usesVgprAt(int i) {
1282 if (i >= VgprIndexUnusedMin) {
1283 VgprIndexUnusedMin = ++i;
1284 if (Ctx) {
1285 MCSymbol* const Sym =
1286 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1287 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1288 VgprIndexUnusedMin);
1289 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1290 }
1291 }
1292 }
1293
1294 void usesAgprAt(int i) {
1295 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1296 if (!hasMAIInsts(*MSTI))
1297 return;
1298
1299 if (i >= AgprIndexUnusedMin) {
1300 AgprIndexUnusedMin = ++i;
1301 if (Ctx) {
1302 MCSymbol* const Sym =
1303 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1304 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1305
1306 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1307 MCSymbol* const vSym =
1308 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1309 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1310 VgprIndexUnusedMin);
1311 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1312 }
1313 }
1314 }
1315
1316public:
1317 KernelScopeInfo() = default;
1318
1319 void initialize(MCContext &Context) {
1320 Ctx = &Context;
1321 MSTI = Ctx->getSubtargetInfo();
1322
1323 usesSgprAt(SgprIndexUnusedMin = -1);
1324 usesVgprAt(VgprIndexUnusedMin = -1);
1325 if (hasMAIInsts(*MSTI)) {
1326 usesAgprAt(AgprIndexUnusedMin = -1);
1327 }
1328 }
1329
1330 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1331 unsigned RegWidth) {
1332 switch (RegKind) {
1333 case IS_SGPR:
1334 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1335 break;
1336 case IS_AGPR:
1337 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1338 break;
1339 case IS_VGPR:
1340 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1341 break;
1342 default:
1343 break;
1344 }
1345 }
1346};
1347
1348class AMDGPUAsmParser : public MCTargetAsmParser {
1349 MCAsmParser &Parser;
1350
1351 unsigned ForcedEncodingSize = 0;
1352 bool ForcedDPP = false;
1353 bool ForcedSDWA = false;
1354 KernelScopeInfo KernelScope;
1355 const unsigned HwMode;
1356
1357 /// @name Auto-generated Match Functions
1358 /// {
1359
1360#define GET_ASSEMBLER_HEADER
1361#include "AMDGPUGenAsmMatcher.inc"
1362
1363 /// }
1364
1365 /// Get size of register operand
1366 unsigned getRegOperandSize(const MCInstrDesc &Desc, unsigned OpNo) const {
1367 assert(OpNo < Desc.NumOperands);
1368 int16_t RCID = MII.getOpRegClassID(Desc.operands()[OpNo], HwMode);
1369 return getRegBitWidth(RCID) / 8;
1370 }
1371
1372private:
1373 void createConstantSymbol(StringRef Id, int64_t Val);
1374
1375 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1376 bool OutOfRangeError(SMRange Range);
1377 /// Calculate VGPR/SGPR blocks required for given target, reserved
1378 /// registers, and user-specified NextFreeXGPR values.
1379 ///
1380 /// \param Features [in] Target features, used for bug corrections.
1381 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1382 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1383 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1384 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1385 /// descriptor field, if valid.
1386 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1387 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1388 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1389 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1390 /// \param VGPRBlocks [out] Result VGPR block count.
1391 /// \param SGPRBlocks [out] Result SGPR block count.
1392 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1393 const MCExpr *FlatScrUsed, bool XNACKUsed,
1394 std::optional<bool> EnableWavefrontSize32,
1395 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1396 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1397 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1398 bool ParseDirectiveAMDGCNTarget();
1399 bool ParseDirectiveAMDHSACodeObjectVersion();
1400 bool ParseDirectiveAMDHSAKernel();
1401 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1402 bool ParseDirectiveAMDKernelCodeT();
1403 // TODO: Possibly make subtargetHasRegister const.
1404 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1405 bool ParseDirectiveAMDGPUHsaKernel();
1406
1407 bool ParseDirectiveISAVersion();
1408 bool ParseDirectiveHSAMetadata();
1409 bool ParseDirectivePALMetadataBegin();
1410 bool ParseDirectivePALMetadata();
1411 bool ParseDirectiveAMDGPULDS();
1412
1413 /// Common code to parse out a block of text (typically YAML) between start and
1414 /// end directives.
1415 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1416 const char *AssemblerDirectiveEnd,
1417 std::string &CollectString);
1418
1419 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1420 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1421 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1422 unsigned &RegNum, unsigned &RegWidth,
1423 bool RestoreOnFailure = false);
1424 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1425 unsigned &RegNum, unsigned &RegWidth,
1426 SmallVectorImpl<AsmToken> &Tokens);
1427 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1428 unsigned &RegWidth,
1429 SmallVectorImpl<AsmToken> &Tokens);
1430 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1431 unsigned &RegWidth,
1432 SmallVectorImpl<AsmToken> &Tokens);
1433 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1434 unsigned &RegWidth,
1435 SmallVectorImpl<AsmToken> &Tokens);
1436 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1437 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1438 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1439
1440 bool isRegister();
1441 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1442 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1443 void initializeGprCountSymbol(RegisterKind RegKind);
1444 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1445 unsigned RegWidth);
1446 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1447 bool IsAtomic);
1448
1449public:
1450 enum OperandMode {
1451 OperandMode_Default,
1452 OperandMode_NSA,
1453 };
1454
1455 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1456
1457 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1458 const MCInstrInfo &MII, const MCTargetOptions &Options)
1459 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser),
1460 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1462
1463 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1464
1465 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1466 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1467 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1468 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1469 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1470 } else {
1471 createConstantSymbol(".option.machine_version_major", ISA.Major);
1472 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1473 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1474 }
1475 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1476 initializeGprCountSymbol(IS_VGPR);
1477 initializeGprCountSymbol(IS_SGPR);
1478 } else
1479 KernelScope.initialize(getContext());
1480
1481 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1482 createConstantSymbol(Symbol, Code);
1483
1484 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1485 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1486 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1487 }
1488
1489 bool hasMIMG_R128() const {
1490 return AMDGPU::hasMIMG_R128(getSTI());
1491 }
1492
1493 bool hasPackedD16() const {
1494 return AMDGPU::hasPackedD16(getSTI());
1495 }
1496
1497 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1498
1499 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1500
1501 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1502
1503 bool isSI() const {
1504 return AMDGPU::isSI(getSTI());
1505 }
1506
1507 bool isCI() const {
1508 return AMDGPU::isCI(getSTI());
1509 }
1510
1511 bool isVI() const {
1512 return AMDGPU::isVI(getSTI());
1513 }
1514
1515 bool isGFX9() const {
1516 return AMDGPU::isGFX9(getSTI());
1517 }
1518
1519 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1520 bool isGFX90A() const {
1521 return AMDGPU::isGFX90A(getSTI());
1522 }
1523
1524 bool isGFX940() const {
1525 return AMDGPU::isGFX940(getSTI());
1526 }
1527
1528 bool isGFX9Plus() const {
1529 return AMDGPU::isGFX9Plus(getSTI());
1530 }
1531
1532 bool isGFX10() const {
1533 return AMDGPU::isGFX10(getSTI());
1534 }
1535
1536 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1537
1538 bool isGFX11() const {
1539 return AMDGPU::isGFX11(getSTI());
1540 }
1541
1542 bool isGFX11Plus() const {
1543 return AMDGPU::isGFX11Plus(getSTI());
1544 }
1545
1546 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1547
1548 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1549
1550 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1551
1552 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1553
1554 bool isGFX10_BEncoding() const {
1555 return AMDGPU::isGFX10_BEncoding(getSTI());
1556 }
1557
1558 bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
1559
1560 bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
1561
1562 bool hasInv2PiInlineImm() const {
1563 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1564 }
1565
1566 bool has64BitLiterals() const {
1567 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1568 }
1569
1570 bool hasFlatOffsets() const {
1571 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1572 }
1573
1574 bool hasTrue16Insts() const {
1575 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1576 }
1577
1578 bool hasArchitectedFlatScratch() const {
1579 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1580 }
1581
1582 bool hasSGPR102_SGPR103() const {
1583 return !isVI() && !isGFX9();
1584 }
1585
1586 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1587
1588 bool hasIntClamp() const {
1589 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1590 }
1591
1592 bool hasPartialNSAEncoding() const {
1593 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1594 }
1595
1596 bool hasGloballyAddressableScratch() const {
1597 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1598 }
1599
1600 unsigned getNSAMaxSize(bool HasSampler = false) const {
1601 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1602 }
1603
1604 unsigned getMaxNumUserSGPRs() const {
1605 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1606 }
1607
1608 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1609
1610 AMDGPUTargetStreamer &getTargetStreamer() {
1611 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1612 return static_cast<AMDGPUTargetStreamer &>(TS);
1613 }
1614
1615 MCContext &getContext() const {
1616 // We need this const_cast because for some reason getContext() is not const
1617 // in MCAsmParser.
1618 return const_cast<AMDGPUAsmParser *>(this)->MCTargetAsmParser::getContext();
1619 }
1620
1621 const MCRegisterInfo *getMRI() const {
1622 return getContext().getRegisterInfo();
1623 }
1624
1625 const MCInstrInfo *getMII() const {
1626 return &MII;
1627 }
1628
1629 // FIXME: This should not be used. Instead, should use queries derived from
1630 // getAvailableFeatures().
1631 const FeatureBitset &getFeatureBits() const {
1632 return getSTI().getFeatureBits();
1633 }
1634
1635 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1636 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1637 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1638
1639 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1640 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1641 bool isForcedDPP() const { return ForcedDPP; }
1642 bool isForcedSDWA() const { return ForcedSDWA; }
1643 ArrayRef<unsigned> getMatchedVariants() const;
1644 StringRef getMatchedVariantName() const;
1645
1646 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1647 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1648 bool RestoreOnFailure);
1649 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1650 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1651 SMLoc &EndLoc) override;
1652 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1653 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1654 unsigned Kind) override;
1655 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1656 OperandVector &Operands, MCStreamer &Out,
1657 uint64_t &ErrorInfo,
1658 bool MatchingInlineAsm) override;
1659 bool ParseDirective(AsmToken DirectiveID) override;
1660 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1661 OperandMode Mode = OperandMode_Default);
1662 StringRef parseMnemonicSuffix(StringRef Name);
1663 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1664 SMLoc NameLoc, OperandVector &Operands) override;
1665 //bool ProcessInstruction(MCInst &Inst);
1666
1667 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1668
1669 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1670
1671 ParseStatus
1672 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1673 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1674 std::function<bool(int64_t &)> ConvertResult = nullptr);
1675
1676 ParseStatus parseOperandArrayWithPrefix(
1677 const char *Prefix, OperandVector &Operands,
1678 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1679 bool (*ConvertResult)(int64_t &) = nullptr);
1680
1681 ParseStatus
1682 parseNamedBit(StringRef Name, OperandVector &Operands,
1683 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1684 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1685 ParseStatus parseCPol(OperandVector &Operands);
1686 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1687 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1688 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1689 SMLoc &StringLoc);
1690 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1691 StringRef Name,
1692 ArrayRef<const char *> Ids,
1693 int64_t &IntVal);
1694 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1695 StringRef Name,
1696 ArrayRef<const char *> Ids,
1697 AMDGPUOperand::ImmTy Type);
1698
1699 bool isModifier();
1700 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1701 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1702 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1703 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1704 bool parseSP3NegModifier();
1705 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1706 LitModifier Lit = LitModifier::None);
1707 ParseStatus parseReg(OperandVector &Operands);
1708 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1709 LitModifier Lit = LitModifier::None);
1710 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1711 bool AllowImm = true);
1712 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1713 bool AllowImm = true);
1714 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1715 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1716 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1717 ParseStatus tryParseIndexKey(OperandVector &Operands,
1718 AMDGPUOperand::ImmTy ImmTy);
1719 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1720 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1721 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1722 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1723 AMDGPUOperand::ImmTy Type);
1724 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1725 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1726 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1727 AMDGPUOperand::ImmTy Type);
1728 ParseStatus parseMatrixAScale(OperandVector &Operands);
1729 ParseStatus parseMatrixBScale(OperandVector &Operands);
1730 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1731 AMDGPUOperand::ImmTy Type);
1732 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1733 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1734
1735 ParseStatus parseDfmtNfmt(int64_t &Format);
1736 ParseStatus parseUfmt(int64_t &Format);
1737 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1738 int64_t &Format);
1739 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1740 int64_t &Format);
1741 ParseStatus parseFORMAT(OperandVector &Operands);
1742 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1743 ParseStatus parseNumericFormat(int64_t &Format);
1744 ParseStatus parseFlatOffset(OperandVector &Operands);
1745 ParseStatus parseR128A16(OperandVector &Operands);
1746 ParseStatus parseBLGP(OperandVector &Operands);
1747 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1748 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1749
1750 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1751
1752 bool parseCnt(int64_t &IntVal);
1753 ParseStatus parseSWaitCnt(OperandVector &Operands);
1754
1755 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1756 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1757 ParseStatus parseDepCtr(OperandVector &Operands);
1758
1759 bool parseDelay(int64_t &Delay);
1760 ParseStatus parseSDelayALU(OperandVector &Operands);
1761
1762 ParseStatus parseHwreg(OperandVector &Operands);
1763
1764private:
1765 struct OperandInfoTy {
1766 SMLoc Loc;
1767 int64_t Val;
1768 bool IsSymbolic = false;
1769 bool IsDefined = false;
1770
1771 OperandInfoTy(int64_t Val) : Val(Val) {}
1772 };
1773
1774 struct StructuredOpField : OperandInfoTy {
1775 StringLiteral Id;
1776 StringLiteral Desc;
1777 unsigned Width;
1778 bool IsDefined = false;
1779
1780 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1781 int64_t Default)
1782 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1783 virtual ~StructuredOpField() = default;
1784
1785 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1786 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1787 return false;
1788 }
1789
1790 virtual bool validate(AMDGPUAsmParser &Parser) const {
1791 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1792 return Error(Parser, "not supported on this GPU");
1793 if (!isUIntN(Width, Val))
1794 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1795 return true;
1796 }
1797 };
1798
1799 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1800 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1801
1802 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1803 bool validateSendMsg(const OperandInfoTy &Msg,
1804 const OperandInfoTy &Op,
1805 const OperandInfoTy &Stream);
1806
1807 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1808 OperandInfoTy &Width);
1809
1810 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1811
1812 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1813 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1814 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1815
1816 SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1817 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1818 const OperandVector &Operands) const;
1819 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1820 const OperandVector &Operands) const;
1821 SMLoc getInstLoc(const OperandVector &Operands) const;
1822
1823 bool validateInstruction(const MCInst &Inst, SMLoc IDLoc,
1824 const OperandVector &Operands);
1825 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1826 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1827 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1828 bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1829 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1830 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1831 bool AsVOPD3);
1832 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1833 bool tryVOPD(const MCInst &Inst);
1834 bool tryVOPD3(const MCInst &Inst);
1835 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1836
1837 bool validateIntClampSupported(const MCInst &Inst);
1838 bool validateMIMGAtomicDMask(const MCInst &Inst);
1839 bool validateMIMGGatherDMask(const MCInst &Inst);
1840 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1841 bool validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc);
1842 bool validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc);
1843 bool validateMIMGD16(const MCInst &Inst);
1844 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1845 bool validateTensorR128(const MCInst &Inst);
1846 bool validateMIMGMSAA(const MCInst &Inst);
1847 bool validateOpSel(const MCInst &Inst);
1848 bool validateTrue16OpSel(const MCInst &Inst);
1849 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1850 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1851 bool validateVccOperand(MCRegister Reg) const;
1852 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1853 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1854 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1855 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1856 bool validateAGPRLdSt(const MCInst &Inst) const;
1857 bool validateVGPRAlign(const MCInst &Inst) const;
1858 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1859 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1860 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1861 bool validateDivScale(const MCInst &Inst);
1862 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1863 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1864 SMLoc IDLoc);
1865 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1866 const unsigned CPol);
1867 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1868 bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1869 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1870 unsigned getConstantBusLimit(unsigned Opcode) const;
1871 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1872 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1873 MCRegister findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1874
1875 bool isSupportedMnemo(StringRef Mnemo,
1876 const FeatureBitset &FBS);
1877 bool isSupportedMnemo(StringRef Mnemo,
1878 const FeatureBitset &FBS,
1879 ArrayRef<unsigned> Variants);
1880 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1881
1882 bool isId(const StringRef Id) const;
1883 bool isId(const AsmToken &Token, const StringRef Id) const;
1884 bool isToken(const AsmToken::TokenKind Kind) const;
1885 StringRef getId() const;
1886 bool trySkipId(const StringRef Id);
1887 bool trySkipId(const StringRef Pref, const StringRef Id);
1888 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1889 bool trySkipToken(const AsmToken::TokenKind Kind);
1890 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1891 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1892 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1893
1894 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1895 AsmToken::TokenKind getTokenKind() const;
1896 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1897 bool parseExpr(OperandVector &Operands);
1898 StringRef getTokenStr() const;
1899 AsmToken peekToken(bool ShouldSkipSpace = true);
1900 AsmToken getToken() const;
1901 SMLoc getLoc() const;
1902 void lex();
1903
1904public:
1905 void onBeginOfFile() override;
1906 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1907
1908 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1909
1910 ParseStatus parseExpTgt(OperandVector &Operands);
1911 ParseStatus parseSendMsg(OperandVector &Operands);
1912 ParseStatus parseInterpSlot(OperandVector &Operands);
1913 ParseStatus parseInterpAttr(OperandVector &Operands);
1914 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1915 ParseStatus parseBoolReg(OperandVector &Operands);
1916
1917 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1918 const unsigned MaxVal, const Twine &ErrMsg,
1919 SMLoc &Loc);
1920 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1921 const unsigned MinVal,
1922 const unsigned MaxVal,
1923 const StringRef ErrMsg);
1924 ParseStatus parseSwizzle(OperandVector &Operands);
1925 bool parseSwizzleOffset(int64_t &Imm);
1926 bool parseSwizzleMacro(int64_t &Imm);
1927 bool parseSwizzleQuadPerm(int64_t &Imm);
1928 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1929 bool parseSwizzleBroadcast(int64_t &Imm);
1930 bool parseSwizzleSwap(int64_t &Imm);
1931 bool parseSwizzleReverse(int64_t &Imm);
1932 bool parseSwizzleFFT(int64_t &Imm);
1933 bool parseSwizzleRotate(int64_t &Imm);
1934
1935 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1936 int64_t parseGPRIdxMacro();
1937
1938 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1939 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1940
1941 ParseStatus parseOModSI(OperandVector &Operands);
1942
1943 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1944 OptionalImmIndexMap &OptionalIdx);
1945 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1946 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1947 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1948 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1949 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1950
1951 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1952 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1953 OptionalImmIndexMap &OptionalIdx);
1954 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1955 OptionalImmIndexMap &OptionalIdx);
1956
1957 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1958 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1959 void cvtOpSelHelper(MCInst &Inst, unsigned OpSel);
1960
1961 bool parseDimId(unsigned &Encoding);
1962 ParseStatus parseDim(OperandVector &Operands);
1963 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1964 ParseStatus parseDPP8(OperandVector &Operands);
1965 ParseStatus parseDPPCtrl(OperandVector &Operands);
1966 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1967 int64_t parseDPPCtrlSel(StringRef Ctrl);
1968 int64_t parseDPPCtrlPerm();
1969 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1970 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1971 cvtDPP(Inst, Operands, true);
1972 }
1973 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1974 bool IsDPP8 = false);
1975 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1976 cvtVOP3DPP(Inst, Operands, true);
1977 }
1978
1979 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1980 AMDGPUOperand::ImmTy Type);
1981 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1982 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1983 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1984 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1985 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1986 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1987 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1988 uint64_t BasicInstType,
1989 bool SkipDstVcc = false,
1990 bool SkipSrcVcc = false);
1991
1992 ParseStatus parseEndpgm(OperandVector &Operands);
1993
1994 ParseStatus parseVOPD(OperandVector &Operands);
1995};
1996
1997} // end anonymous namespace
1998
1999// May be called with integer type with equivalent bitwidth.
2000static const fltSemantics *getFltSemantics(unsigned Size) {
2001 switch (Size) {
2002 case 4:
2003 return &APFloat::IEEEsingle();
2004 case 8:
2005 return &APFloat::IEEEdouble();
2006 case 2:
2007 return &APFloat::IEEEhalf();
2008 default:
2009 llvm_unreachable("unsupported fp type");
2010 }
2011}
2012
2014 return getFltSemantics(VT.getSizeInBits() / 8);
2015}
2016
2018 switch (OperandType) {
2019 // When floating-point immediate is used as operand of type i16, the 32-bit
2020 // representation of the constant truncated to the 16 LSBs should be used.
2035 return &APFloat::IEEEsingle();
2042 return &APFloat::IEEEdouble();
2049 return &APFloat::IEEEhalf();
2054 return &APFloat::BFloat();
2055 default:
2056 llvm_unreachable("unsupported fp type");
2057 }
2058}
2059
2060//===----------------------------------------------------------------------===//
2061// Operand
2062//===----------------------------------------------------------------------===//
2063
2064static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2065 bool Lost;
2066
2067 // Convert literal to single precision
2070 &Lost);
2071 // We allow precision lost but not overflow or underflow
2072 if (Status != APFloat::opOK &&
2073 Lost &&
2074 ((Status & APFloat::opOverflow) != 0 ||
2075 (Status & APFloat::opUnderflow) != 0)) {
2076 return false;
2077 }
2078
2079 return true;
2080}
2081
2082static bool isSafeTruncation(int64_t Val, unsigned Size) {
2083 return isUIntN(Size, Val) || isIntN(Size, Val);
2084}
2085
2086static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2087 if (VT.getScalarType() == MVT::i16)
2088 return isInlinableLiteral32(Val, HasInv2Pi);
2089
2090 if (VT.getScalarType() == MVT::f16)
2091 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2092
2093 assert(VT.getScalarType() == MVT::bf16);
2094
2095 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2096}
2097
2098bool AMDGPUOperand::isInlinableImm(MVT type) const {
2099
2100 // This is a hack to enable named inline values like
2101 // shared_base with both 32-bit and 64-bit operands.
2102 // Note that these values are defined as
2103 // 32-bit operands only.
2104 if (isInlineValue()) {
2105 return true;
2106 }
2107
2108 if (!isImmTy(ImmTyNone)) {
2109 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2110 return false;
2111 }
2112
2113 if (getModifiers().Lit != LitModifier::None)
2114 return false;
2115
2116 // TODO: We should avoid using host float here. It would be better to
2117 // check the float bit values which is what a few other places do.
2118 // We've had bot failures before due to weird NaN support on mips hosts.
2119
2120 APInt Literal(64, Imm.Val);
2121
2122 if (Imm.IsFPImm) { // We got fp literal token
2123 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2125 AsmParser->hasInv2PiInlineImm());
2126 }
2127
2128 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2129 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2130 return false;
2131
2132 if (type.getScalarSizeInBits() == 16) {
2133 bool Lost = false;
2134 switch (type.getScalarType().SimpleTy) {
2135 default:
2136 llvm_unreachable("unknown 16-bit type");
2137 case MVT::bf16:
2138 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2139 &Lost);
2140 break;
2141 case MVT::f16:
2142 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2143 &Lost);
2144 break;
2145 case MVT::i16:
2146 FPLiteral.convert(APFloatBase::IEEEsingle(),
2147 APFloat::rmNearestTiesToEven, &Lost);
2148 break;
2149 }
2150 // We need to use 32-bit representation here because when a floating-point
2151 // inline constant is used as an i16 operand, its 32-bit representation
2152 // representation will be used. We will need the 32-bit value to check if
2153 // it is FP inline constant.
2154 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2155 return isInlineableLiteralOp16(ImmVal, type,
2156 AsmParser->hasInv2PiInlineImm());
2157 }
2158
2159 // Check if single precision literal is inlinable
2161 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2162 AsmParser->hasInv2PiInlineImm());
2163 }
2164
2165 // We got int literal token.
2166 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2168 AsmParser->hasInv2PiInlineImm());
2169 }
2170
2171 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2172 return false;
2173 }
2174
2175 if (type.getScalarSizeInBits() == 16) {
2177 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2178 type, AsmParser->hasInv2PiInlineImm());
2179 }
2180
2182 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2183 AsmParser->hasInv2PiInlineImm());
2184}
2185
2186bool AMDGPUOperand::isLiteralImm(MVT type) const {
2187 // Check that this immediate can be added as literal
2188 if (!isImmTy(ImmTyNone)) {
2189 return false;
2190 }
2191
2192 bool Allow64Bit =
2193 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2194
2195 if (!Imm.IsFPImm) {
2196 // We got int literal token.
2197
2198 if (type == MVT::f64 && hasFPModifiers()) {
2199 // Cannot apply fp modifiers to int literals preserving the same semantics
2200 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2201 // disable these cases.
2202 return false;
2203 }
2204
2205 unsigned Size = type.getSizeInBits();
2206 if (Size == 64) {
2207 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2208 return true;
2209 Size = 32;
2210 }
2211
2212 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2213 // types.
2214 return isSafeTruncation(Imm.Val, Size);
2215 }
2216
2217 // We got fp literal token
2218 if (type == MVT::f64) { // Expected 64-bit fp operand
2219 // We would set low 64-bits of literal to zeroes but we accept this literals
2220 return true;
2221 }
2222
2223 if (type == MVT::i64) { // Expected 64-bit int operand
2224 // We don't allow fp literals in 64-bit integer instructions. It is
2225 // unclear how we should encode them.
2226 return false;
2227 }
2228
2229 // We allow fp literals with f16x2 operands assuming that the specified
2230 // literal goes into the lower half and the upper half is zero. We also
2231 // require that the literal may be losslessly converted to f16.
2232 //
2233 // For i16x2 operands, we assume that the specified literal is encoded as a
2234 // single-precision float. This is pretty odd, but it matches SP3 and what
2235 // happens in hardware.
2236 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2237 : (type == MVT::v2i16) ? MVT::f32
2238 : (type == MVT::v2f32) ? MVT::f32
2239 : type;
2240
2241 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2242 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2243}
2244
2245bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2246 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2247}
2248
2249bool AMDGPUOperand::isVRegWithInputMods() const {
2250 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2251 // GFX90A allows DPP on 64-bit operands.
2252 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2253 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2254}
2255
2256template <bool IsFake16>
2257bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2258 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2259 : AMDGPU::VGPR_16_Lo128RegClassID);
2260}
2261
2262template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2263 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2264 : AMDGPU::VGPR_16RegClassID);
2265}
2266
2267bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2268 if (AsmParser->isVI())
2269 return isVReg32();
2270 if (AsmParser->isGFX9Plus())
2271 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2272 return false;
2273}
2274
2275bool AMDGPUOperand::isSDWAFP16Operand() const {
2276 return isSDWAOperand(MVT::f16);
2277}
2278
2279bool AMDGPUOperand::isSDWAFP32Operand() const {
2280 return isSDWAOperand(MVT::f32);
2281}
2282
2283bool AMDGPUOperand::isSDWAInt16Operand() const {
2284 return isSDWAOperand(MVT::i16);
2285}
2286
2287bool AMDGPUOperand::isSDWAInt32Operand() const {
2288 return isSDWAOperand(MVT::i32);
2289}
2290
2291bool AMDGPUOperand::isBoolReg() const {
2292 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2293 (AsmParser->isWave32() && isSCSrc_b32()));
2294}
2295
2296uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2297{
2298 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2299 assert(Size == 2 || Size == 4 || Size == 8);
2300
2301 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2302
2303 if (Imm.Mods.Abs) {
2304 Val &= ~FpSignMask;
2305 }
2306 if (Imm.Mods.Neg) {
2307 Val ^= FpSignMask;
2308 }
2309
2310 return Val;
2311}
2312
2313void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2314 MCOpIdx = Inst.getNumOperands();
2315
2316 if (isExpr()) {
2318 return;
2319 }
2320
2321 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2322 Inst.getNumOperands())) {
2323 addLiteralImmOperand(Inst, Imm.Val,
2324 ApplyModifiers &
2325 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2326 } else {
2327 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2329 }
2330}
2331
2332void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2333 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2334 auto OpNum = Inst.getNumOperands();
2335 // Check that this operand accepts literals
2336 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2337
2338 if (ApplyModifiers) {
2339 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2340 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2341 Val = applyInputFPModifiers(Val, Size);
2342 }
2343
2344 APInt Literal(64, Val);
2345 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2346
2347 bool CanUse64BitLiterals =
2348 AsmParser->has64BitLiterals() &&
2349 !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
2350 LitModifier Lit = getModifiers().Lit;
2351 MCContext &Ctx = AsmParser->getContext();
2352
2353 if (Imm.IsFPImm) { // We got fp literal token
2354 switch (OpTy) {
2360 if (Lit == LitModifier::None &&
2362 AsmParser->hasInv2PiInlineImm())) {
2363 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2364 return;
2365 }
2366
2367 // Non-inlineable
2368 if (AMDGPU::isSISrcFPOperand(InstDesc,
2369 OpNum)) { // Expected 64-bit fp operand
2370 bool HasMandatoryLiteral =
2371 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2372 // For fp operands we check if low 32 bits are zeros
2373 if (Literal.getLoBits(32) != 0 &&
2374 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2375 !HasMandatoryLiteral) {
2376 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2377 Inst.getLoc(),
2378 "Can't encode literal as exact 64-bit floating-point operand. "
2379 "Low 32-bits will be set to zero");
2380 Val &= 0xffffffff00000000u;
2381 }
2382
2383 if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 ||
2386 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2387 (isInt<32>(Val) || isUInt<32>(Val))) {
2388 // The floating-point operand will be verbalized as an
2389 // integer one. If that integer happens to fit 32 bits, on
2390 // re-assembling it will be intepreted as the high half of
2391 // the actual value, so we have to wrap it into lit64().
2392 Lit = LitModifier::Lit64;
2393 } else if (Lit == LitModifier::Lit) {
2394 // For FP64 operands lit() specifies the high half of the value.
2395 Val = Hi_32(Val);
2396 }
2397 }
2398 break;
2399 }
2400
2401 // We don't allow fp literals in 64-bit integer instructions. It is
2402 // unclear how we should encode them. This case should be checked earlier
2403 // in predicate methods (isLiteralImm())
2404 llvm_unreachable("fp literal in 64-bit integer instruction.");
2405
2407 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2408 (isInt<32>(Val) || isUInt<32>(Val)))
2409 Lit = LitModifier::Lit64;
2410 break;
2411
2416 if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2417 Literal == 0x3fc45f306725feed) {
2418 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2419 // loss of precision. The constant represents ideomatic fp32 value of
2420 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2421 // bits. Prevent rounding below.
2422 Inst.addOperand(MCOperand::createImm(0x3e22));
2423 return;
2424 }
2425 [[fallthrough]];
2426
2447 bool lost;
2448 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2449 // Convert literal to single precision
2450 FPLiteral.convert(*getOpFltSemantics(OpTy),
2451 APFloat::rmNearestTiesToEven, &lost);
2452 // We allow precision lost but not overflow or underflow. This should be
2453 // checked earlier in isLiteralImm()
2454
2455 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2456 break;
2457 }
2458 default:
2459 llvm_unreachable("invalid operand size");
2460 }
2461
2462 if (Lit != LitModifier::None) {
2463 Inst.addOperand(
2465 } else {
2467 }
2468 return;
2469 }
2470
2471 // We got int literal token.
2472 // Only sign extend inline immediates.
2473 switch (OpTy) {
2487 break;
2488
2491 if (Lit == LitModifier::None &&
2492 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2494 return;
2495 }
2496
2497 // When the 32 MSBs are not zero (effectively means it can't be safely
2498 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2499 // the lit modifier is explicitly used, we need to truncate it to the 32
2500 // LSBs.
2501 if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit)
2502 Val = Lo_32(Val);
2503 break;
2504
2508 if (Lit == LitModifier::None &&
2509 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2511 return;
2512 }
2513
2514 // If the target doesn't support 64-bit literals, we need to use the
2515 // constant as the high 32 MSBs of a double-precision floating point value.
2516 if (!AsmParser->has64BitLiterals()) {
2517 Val = static_cast<uint64_t>(Val) << 32;
2518 } else {
2519 // Now the target does support 64-bit literals, there are two cases
2520 // where we still want to use src_literal encoding:
2521 // 1) explicitly forced by using lit modifier;
2522 // 2) the value is a valid 32-bit representation (signed or unsigned),
2523 // meanwhile not forced by lit64 modifier.
2524 if (Lit == LitModifier::Lit ||
2525 (Lit != LitModifier::Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2526 Val = static_cast<uint64_t>(Val) << 32;
2527 }
2528
2529 // For FP64 operands lit() specifies the high half of the value.
2530 if (Lit == LitModifier::Lit)
2531 Val = Hi_32(Val);
2532 break;
2533
2545 break;
2546
2548 if ((isInt<32>(Val) || isUInt<32>(Val)) && Lit != LitModifier::Lit64)
2549 Val <<= 32;
2550 break;
2551
2552 default:
2553 llvm_unreachable("invalid operand type");
2554 }
2555
2556 if (Lit != LitModifier::None) {
2557 Inst.addOperand(
2559 } else {
2561 }
2562}
2563
2564void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2565 MCOpIdx = Inst.getNumOperands();
2566 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2567}
2568
2569bool AMDGPUOperand::isInlineValue() const {
2570 return isRegKind() && ::isInlineValue(getReg());
2571}
2572
2573//===----------------------------------------------------------------------===//
2574// AsmParser
2575//===----------------------------------------------------------------------===//
2576
2577void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2578 // TODO: make those pre-defined variables read-only.
2579 // Currently there is none suitable machinery in the core llvm-mc for this.
2580 // MCSymbol::isRedefinable is intended for another purpose, and
2581 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2582 MCContext &Ctx = getContext();
2583 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2585}
2586
2587static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2588 if (Is == IS_VGPR) {
2589 switch (RegWidth) {
2590 default: return -1;
2591 case 32:
2592 return AMDGPU::VGPR_32RegClassID;
2593 case 64:
2594 return AMDGPU::VReg_64RegClassID;
2595 case 96:
2596 return AMDGPU::VReg_96RegClassID;
2597 case 128:
2598 return AMDGPU::VReg_128RegClassID;
2599 case 160:
2600 return AMDGPU::VReg_160RegClassID;
2601 case 192:
2602 return AMDGPU::VReg_192RegClassID;
2603 case 224:
2604 return AMDGPU::VReg_224RegClassID;
2605 case 256:
2606 return AMDGPU::VReg_256RegClassID;
2607 case 288:
2608 return AMDGPU::VReg_288RegClassID;
2609 case 320:
2610 return AMDGPU::VReg_320RegClassID;
2611 case 352:
2612 return AMDGPU::VReg_352RegClassID;
2613 case 384:
2614 return AMDGPU::VReg_384RegClassID;
2615 case 512:
2616 return AMDGPU::VReg_512RegClassID;
2617 case 1024:
2618 return AMDGPU::VReg_1024RegClassID;
2619 }
2620 } else if (Is == IS_TTMP) {
2621 switch (RegWidth) {
2622 default: return -1;
2623 case 32:
2624 return AMDGPU::TTMP_32RegClassID;
2625 case 64:
2626 return AMDGPU::TTMP_64RegClassID;
2627 case 128:
2628 return AMDGPU::TTMP_128RegClassID;
2629 case 256:
2630 return AMDGPU::TTMP_256RegClassID;
2631 case 512:
2632 return AMDGPU::TTMP_512RegClassID;
2633 }
2634 } else if (Is == IS_SGPR) {
2635 switch (RegWidth) {
2636 default: return -1;
2637 case 32:
2638 return AMDGPU::SGPR_32RegClassID;
2639 case 64:
2640 return AMDGPU::SGPR_64RegClassID;
2641 case 96:
2642 return AMDGPU::SGPR_96RegClassID;
2643 case 128:
2644 return AMDGPU::SGPR_128RegClassID;
2645 case 160:
2646 return AMDGPU::SGPR_160RegClassID;
2647 case 192:
2648 return AMDGPU::SGPR_192RegClassID;
2649 case 224:
2650 return AMDGPU::SGPR_224RegClassID;
2651 case 256:
2652 return AMDGPU::SGPR_256RegClassID;
2653 case 288:
2654 return AMDGPU::SGPR_288RegClassID;
2655 case 320:
2656 return AMDGPU::SGPR_320RegClassID;
2657 case 352:
2658 return AMDGPU::SGPR_352RegClassID;
2659 case 384:
2660 return AMDGPU::SGPR_384RegClassID;
2661 case 512:
2662 return AMDGPU::SGPR_512RegClassID;
2663 }
2664 } else if (Is == IS_AGPR) {
2665 switch (RegWidth) {
2666 default: return -1;
2667 case 32:
2668 return AMDGPU::AGPR_32RegClassID;
2669 case 64:
2670 return AMDGPU::AReg_64RegClassID;
2671 case 96:
2672 return AMDGPU::AReg_96RegClassID;
2673 case 128:
2674 return AMDGPU::AReg_128RegClassID;
2675 case 160:
2676 return AMDGPU::AReg_160RegClassID;
2677 case 192:
2678 return AMDGPU::AReg_192RegClassID;
2679 case 224:
2680 return AMDGPU::AReg_224RegClassID;
2681 case 256:
2682 return AMDGPU::AReg_256RegClassID;
2683 case 288:
2684 return AMDGPU::AReg_288RegClassID;
2685 case 320:
2686 return AMDGPU::AReg_320RegClassID;
2687 case 352:
2688 return AMDGPU::AReg_352RegClassID;
2689 case 384:
2690 return AMDGPU::AReg_384RegClassID;
2691 case 512:
2692 return AMDGPU::AReg_512RegClassID;
2693 case 1024:
2694 return AMDGPU::AReg_1024RegClassID;
2695 }
2696 }
2697 return -1;
2698}
2699
2702 .Case("exec", AMDGPU::EXEC)
2703 .Case("vcc", AMDGPU::VCC)
2704 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2705 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2706 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2707 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2708 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2709 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2710 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2711 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2712 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2713 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2714 .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2715 .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2716 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2717 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2718 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2719 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2720 .Case("m0", AMDGPU::M0)
2721 .Case("vccz", AMDGPU::SRC_VCCZ)
2722 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2723 .Case("execz", AMDGPU::SRC_EXECZ)
2724 .Case("src_execz", AMDGPU::SRC_EXECZ)
2725 .Case("scc", AMDGPU::SRC_SCC)
2726 .Case("src_scc", AMDGPU::SRC_SCC)
2727 .Case("tba", AMDGPU::TBA)
2728 .Case("tma", AMDGPU::TMA)
2729 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2730 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2731 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2732 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2733 .Case("vcc_lo", AMDGPU::VCC_LO)
2734 .Case("vcc_hi", AMDGPU::VCC_HI)
2735 .Case("exec_lo", AMDGPU::EXEC_LO)
2736 .Case("exec_hi", AMDGPU::EXEC_HI)
2737 .Case("tma_lo", AMDGPU::TMA_LO)
2738 .Case("tma_hi", AMDGPU::TMA_HI)
2739 .Case("tba_lo", AMDGPU::TBA_LO)
2740 .Case("tba_hi", AMDGPU::TBA_HI)
2741 .Case("pc", AMDGPU::PC_REG)
2742 .Case("null", AMDGPU::SGPR_NULL)
2743 .Default(AMDGPU::NoRegister);
2744}
2745
2746bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2747 SMLoc &EndLoc, bool RestoreOnFailure) {
2748 auto R = parseRegister();
2749 if (!R) return true;
2750 assert(R->isReg());
2751 RegNo = R->getReg();
2752 StartLoc = R->getStartLoc();
2753 EndLoc = R->getEndLoc();
2754 return false;
2755}
2756
2757bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2758 SMLoc &EndLoc) {
2759 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2760}
2761
2762ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2763 SMLoc &EndLoc) {
2764 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2765 bool PendingErrors = getParser().hasPendingError();
2766 getParser().clearPendingErrors();
2767 if (PendingErrors)
2768 return ParseStatus::Failure;
2769 if (Result)
2770 return ParseStatus::NoMatch;
2771 return ParseStatus::Success;
2772}
2773
2774bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2775 RegisterKind RegKind,
2776 MCRegister Reg1, SMLoc Loc) {
2777 switch (RegKind) {
2778 case IS_SPECIAL:
2779 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2780 Reg = AMDGPU::EXEC;
2781 RegWidth = 64;
2782 return true;
2783 }
2784 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2785 Reg = AMDGPU::FLAT_SCR;
2786 RegWidth = 64;
2787 return true;
2788 }
2789 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2790 Reg = AMDGPU::XNACK_MASK;
2791 RegWidth = 64;
2792 return true;
2793 }
2794 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2795 Reg = AMDGPU::VCC;
2796 RegWidth = 64;
2797 return true;
2798 }
2799 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2800 Reg = AMDGPU::TBA;
2801 RegWidth = 64;
2802 return true;
2803 }
2804 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2805 Reg = AMDGPU::TMA;
2806 RegWidth = 64;
2807 return true;
2808 }
2809 Error(Loc, "register does not fit in the list");
2810 return false;
2811 case IS_VGPR:
2812 case IS_SGPR:
2813 case IS_AGPR:
2814 case IS_TTMP:
2815 if (Reg1 != Reg + RegWidth / 32) {
2816 Error(Loc, "registers in a list must have consecutive indices");
2817 return false;
2818 }
2819 RegWidth += 32;
2820 return true;
2821 default:
2822 llvm_unreachable("unexpected register kind");
2823 }
2824}
2825
2826struct RegInfo {
2828 RegisterKind Kind;
2829};
2830
2831static constexpr RegInfo RegularRegisters[] = {
2832 {{"v"}, IS_VGPR},
2833 {{"s"}, IS_SGPR},
2834 {{"ttmp"}, IS_TTMP},
2835 {{"acc"}, IS_AGPR},
2836 {{"a"}, IS_AGPR},
2837};
2838
2839static bool isRegularReg(RegisterKind Kind) {
2840 return Kind == IS_VGPR ||
2841 Kind == IS_SGPR ||
2842 Kind == IS_TTMP ||
2843 Kind == IS_AGPR;
2844}
2845
2847 for (const RegInfo &Reg : RegularRegisters)
2848 if (Str.starts_with(Reg.Name))
2849 return &Reg;
2850 return nullptr;
2851}
2852
2853static bool getRegNum(StringRef Str, unsigned& Num) {
2854 return !Str.getAsInteger(10, Num);
2855}
2856
2857bool
2858AMDGPUAsmParser::isRegister(const AsmToken &Token,
2859 const AsmToken &NextToken) const {
2860
2861 // A list of consecutive registers: [s0,s1,s2,s3]
2862 if (Token.is(AsmToken::LBrac))
2863 return true;
2864
2865 if (!Token.is(AsmToken::Identifier))
2866 return false;
2867
2868 // A single register like s0 or a range of registers like s[0:1]
2869
2870 StringRef Str = Token.getString();
2871 const RegInfo *Reg = getRegularRegInfo(Str);
2872 if (Reg) {
2873 StringRef RegName = Reg->Name;
2874 StringRef RegSuffix = Str.substr(RegName.size());
2875 if (!RegSuffix.empty()) {
2876 RegSuffix.consume_back(".l");
2877 RegSuffix.consume_back(".h");
2878 unsigned Num;
2879 // A single register with an index: rXX
2880 if (getRegNum(RegSuffix, Num))
2881 return true;
2882 } else {
2883 // A range of registers: r[XX:YY].
2884 if (NextToken.is(AsmToken::LBrac))
2885 return true;
2886 }
2887 }
2888
2889 return getSpecialRegForName(Str).isValid();
2890}
2891
2892bool
2893AMDGPUAsmParser::isRegister()
2894{
2895 return isRegister(getToken(), peekToken());
2896}
2897
2898MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2899 unsigned SubReg, unsigned RegWidth,
2900 SMLoc Loc) {
2901 assert(isRegularReg(RegKind));
2902
2903 unsigned AlignSize = 1;
2904 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2905 // SGPR and TTMP registers must be aligned.
2906 // Max required alignment is 4 dwords.
2907 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2908 }
2909
2910 if (RegNum % AlignSize != 0) {
2911 Error(Loc, "invalid register alignment");
2912 return MCRegister();
2913 }
2914
2915 unsigned RegIdx = RegNum / AlignSize;
2916 int RCID = getRegClass(RegKind, RegWidth);
2917 if (RCID == -1) {
2918 Error(Loc, "invalid or unsupported register size");
2919 return MCRegister();
2920 }
2921
2922 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2923 const MCRegisterClass RC = TRI->getRegClass(RCID);
2924 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2925 Error(Loc, "register index is out of range");
2926 return AMDGPU::NoRegister;
2927 }
2928
2929 if (RegKind == IS_VGPR && !isGFX1250() && RegIdx + RegWidth / 32 > 256) {
2930 Error(Loc, "register index is out of range");
2931 return MCRegister();
2932 }
2933
2934 MCRegister Reg = RC.getRegister(RegIdx);
2935
2936 if (SubReg) {
2937 Reg = TRI->getSubReg(Reg, SubReg);
2938
2939 // Currently all regular registers have their .l and .h subregisters, so
2940 // we should never need to generate an error here.
2941 assert(Reg && "Invalid subregister!");
2942 }
2943
2944 return Reg;
2945}
2946
2947bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
2948 unsigned &SubReg) {
2949 int64_t RegLo, RegHi;
2950 if (!skipToken(AsmToken::LBrac, "missing register index"))
2951 return false;
2952
2953 SMLoc FirstIdxLoc = getLoc();
2954 SMLoc SecondIdxLoc;
2955
2956 if (!parseExpr(RegLo))
2957 return false;
2958
2959 if (trySkipToken(AsmToken::Colon)) {
2960 SecondIdxLoc = getLoc();
2961 if (!parseExpr(RegHi))
2962 return false;
2963 } else {
2964 RegHi = RegLo;
2965 }
2966
2967 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2968 return false;
2969
2970 if (!isUInt<32>(RegLo)) {
2971 Error(FirstIdxLoc, "invalid register index");
2972 return false;
2973 }
2974
2975 if (!isUInt<32>(RegHi)) {
2976 Error(SecondIdxLoc, "invalid register index");
2977 return false;
2978 }
2979
2980 if (RegLo > RegHi) {
2981 Error(FirstIdxLoc, "first register index should not exceed second index");
2982 return false;
2983 }
2984
2985 if (RegHi == RegLo) {
2986 StringRef RegSuffix = getTokenStr();
2987 if (RegSuffix == ".l") {
2988 SubReg = AMDGPU::lo16;
2989 lex();
2990 } else if (RegSuffix == ".h") {
2991 SubReg = AMDGPU::hi16;
2992 lex();
2993 }
2994 }
2995
2996 Num = static_cast<unsigned>(RegLo);
2997 RegWidth = 32 * ((RegHi - RegLo) + 1);
2998
2999 return true;
3000}
3001
3002MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3003 unsigned &RegNum,
3004 unsigned &RegWidth,
3005 SmallVectorImpl<AsmToken> &Tokens) {
3006 assert(isToken(AsmToken::Identifier));
3007 MCRegister Reg = getSpecialRegForName(getTokenStr());
3008 if (Reg) {
3009 RegNum = 0;
3010 RegWidth = 32;
3011 RegKind = IS_SPECIAL;
3012 Tokens.push_back(getToken());
3013 lex(); // skip register name
3014 }
3015 return Reg;
3016}
3017
3018MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3019 unsigned &RegNum,
3020 unsigned &RegWidth,
3021 SmallVectorImpl<AsmToken> &Tokens) {
3022 assert(isToken(AsmToken::Identifier));
3023 StringRef RegName = getTokenStr();
3024 auto Loc = getLoc();
3025
3026 const RegInfo *RI = getRegularRegInfo(RegName);
3027 if (!RI) {
3028 Error(Loc, "invalid register name");
3029 return MCRegister();
3030 }
3031
3032 Tokens.push_back(getToken());
3033 lex(); // skip register name
3034
3035 RegKind = RI->Kind;
3036 StringRef RegSuffix = RegName.substr(RI->Name.size());
3037 unsigned SubReg = NoSubRegister;
3038 if (!RegSuffix.empty()) {
3039 if (RegSuffix.consume_back(".l"))
3040 SubReg = AMDGPU::lo16;
3041 else if (RegSuffix.consume_back(".h"))
3042 SubReg = AMDGPU::hi16;
3043
3044 // Single 32-bit register: vXX.
3045 if (!getRegNum(RegSuffix, RegNum)) {
3046 Error(Loc, "invalid register index");
3047 return MCRegister();
3048 }
3049 RegWidth = 32;
3050 } else {
3051 // Range of registers: v[XX:YY]. ":YY" is optional.
3052 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3053 return MCRegister();
3054 }
3055
3056 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3057}
3058
3059MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3060 unsigned &RegNum, unsigned &RegWidth,
3061 SmallVectorImpl<AsmToken> &Tokens) {
3062 MCRegister Reg;
3063 auto ListLoc = getLoc();
3064
3065 if (!skipToken(AsmToken::LBrac,
3066 "expected a register or a list of registers")) {
3067 return MCRegister();
3068 }
3069
3070 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3071
3072 auto Loc = getLoc();
3073 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3074 return MCRegister();
3075 if (RegWidth != 32) {
3076 Error(Loc, "expected a single 32-bit register");
3077 return MCRegister();
3078 }
3079
3080 for (; trySkipToken(AsmToken::Comma); ) {
3081 RegisterKind NextRegKind;
3082 MCRegister NextReg;
3083 unsigned NextRegNum, NextRegWidth;
3084 Loc = getLoc();
3085
3086 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3087 NextRegNum, NextRegWidth,
3088 Tokens)) {
3089 return MCRegister();
3090 }
3091 if (NextRegWidth != 32) {
3092 Error(Loc, "expected a single 32-bit register");
3093 return MCRegister();
3094 }
3095 if (NextRegKind != RegKind) {
3096 Error(Loc, "registers in a list must be of the same kind");
3097 return MCRegister();
3098 }
3099 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3100 return MCRegister();
3101 }
3102
3103 if (!skipToken(AsmToken::RBrac,
3104 "expected a comma or a closing square bracket")) {
3105 return MCRegister();
3106 }
3107
3108 if (isRegularReg(RegKind))
3109 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3110
3111 return Reg;
3112}
3113
3114bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3115 MCRegister &Reg, unsigned &RegNum,
3116 unsigned &RegWidth,
3117 SmallVectorImpl<AsmToken> &Tokens) {
3118 auto Loc = getLoc();
3119 Reg = MCRegister();
3120
3121 if (isToken(AsmToken::Identifier)) {
3122 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3123 if (!Reg)
3124 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3125 } else {
3126 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3127 }
3128
3129 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3130 if (!Reg) {
3131 assert(Parser.hasPendingError());
3132 return false;
3133 }
3134
3135 if (!subtargetHasRegister(*TRI, Reg)) {
3136 if (Reg == AMDGPU::SGPR_NULL) {
3137 Error(Loc, "'null' operand is not supported on this GPU");
3138 } else {
3140 " register not available on this GPU");
3141 }
3142 return false;
3143 }
3144
3145 return true;
3146}
3147
3148bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3149 MCRegister &Reg, unsigned &RegNum,
3150 unsigned &RegWidth,
3151 bool RestoreOnFailure /*=false*/) {
3152 Reg = MCRegister();
3153
3155 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3156 if (RestoreOnFailure) {
3157 while (!Tokens.empty()) {
3158 getLexer().UnLex(Tokens.pop_back_val());
3159 }
3160 }
3161 return true;
3162 }
3163 return false;
3164}
3165
3166std::optional<StringRef>
3167AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3168 switch (RegKind) {
3169 case IS_VGPR:
3170 return StringRef(".amdgcn.next_free_vgpr");
3171 case IS_SGPR:
3172 return StringRef(".amdgcn.next_free_sgpr");
3173 default:
3174 return std::nullopt;
3175 }
3176}
3177
3178void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3179 auto SymbolName = getGprCountSymbolName(RegKind);
3180 assert(SymbolName && "initializing invalid register kind");
3181 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3183 Sym->setRedefinable(true);
3184}
3185
3186bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3187 unsigned DwordRegIndex,
3188 unsigned RegWidth) {
3189 // Symbols are only defined for GCN targets
3190 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3191 return true;
3192
3193 auto SymbolName = getGprCountSymbolName(RegKind);
3194 if (!SymbolName)
3195 return true;
3196 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3197
3198 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3199 int64_t OldCount;
3200
3201 if (!Sym->isVariable())
3202 return !Error(getLoc(),
3203 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3204 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3205 return !Error(
3206 getLoc(),
3207 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3208
3209 if (OldCount <= NewMax)
3211
3212 return true;
3213}
3214
3215std::unique_ptr<AMDGPUOperand>
3216AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3217 const auto &Tok = getToken();
3218 SMLoc StartLoc = Tok.getLoc();
3219 SMLoc EndLoc = Tok.getEndLoc();
3220 RegisterKind RegKind;
3221 MCRegister Reg;
3222 unsigned RegNum, RegWidth;
3223
3224 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3225 return nullptr;
3226 }
3227 if (isHsaAbi(getSTI())) {
3228 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3229 return nullptr;
3230 } else
3231 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3232 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3233}
3234
3235ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3236 bool HasSP3AbsModifier, LitModifier Lit) {
3237 // TODO: add syntactic sugar for 1/(2*PI)
3238
3239 if (isRegister() || isModifier())
3240 return ParseStatus::NoMatch;
3241
3242 if (Lit == LitModifier::None) {
3243 if (trySkipId("lit"))
3244 Lit = LitModifier::Lit;
3245 else if (trySkipId("lit64"))
3246 Lit = LitModifier::Lit64;
3247
3248 if (Lit != LitModifier::None) {
3249 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3250 return ParseStatus::Failure;
3251 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, Lit);
3252 if (S.isSuccess() &&
3253 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3254 return ParseStatus::Failure;
3255 return S;
3256 }
3257 }
3258
3259 const auto& Tok = getToken();
3260 const auto& NextTok = peekToken();
3261 bool IsReal = Tok.is(AsmToken::Real);
3262 SMLoc S = getLoc();
3263 bool Negate = false;
3264
3265 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3266 lex();
3267 IsReal = true;
3268 Negate = true;
3269 }
3270
3271 AMDGPUOperand::Modifiers Mods;
3272 Mods.Lit = Lit;
3273
3274 if (IsReal) {
3275 // Floating-point expressions are not supported.
3276 // Can only allow floating-point literals with an
3277 // optional sign.
3278
3279 StringRef Num = getTokenStr();
3280 lex();
3281
3282 APFloat RealVal(APFloat::IEEEdouble());
3283 auto roundMode = APFloat::rmNearestTiesToEven;
3284 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3285 return ParseStatus::Failure;
3286 if (Negate)
3287 RealVal.changeSign();
3288
3289 Operands.push_back(
3290 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3291 AMDGPUOperand::ImmTyNone, true));
3292 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3293 Op.setModifiers(Mods);
3294
3295 return ParseStatus::Success;
3296
3297 } else {
3298 int64_t IntVal;
3299 const MCExpr *Expr;
3300 SMLoc S = getLoc();
3301
3302 if (HasSP3AbsModifier) {
3303 // This is a workaround for handling expressions
3304 // as arguments of SP3 'abs' modifier, for example:
3305 // |1.0|
3306 // |-1|
3307 // |1+x|
3308 // This syntax is not compatible with syntax of standard
3309 // MC expressions (due to the trailing '|').
3310 SMLoc EndLoc;
3311 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3312 return ParseStatus::Failure;
3313 } else {
3314 if (Parser.parseExpression(Expr))
3315 return ParseStatus::Failure;
3316 }
3317
3318 if (Expr->evaluateAsAbsolute(IntVal)) {
3319 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3320 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3321 Op.setModifiers(Mods);
3322 } else {
3323 if (Lit != LitModifier::None)
3324 return ParseStatus::NoMatch;
3325 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3326 }
3327
3328 return ParseStatus::Success;
3329 }
3330
3331 return ParseStatus::NoMatch;
3332}
3333
3334ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3335 if (!isRegister())
3336 return ParseStatus::NoMatch;
3337
3338 if (auto R = parseRegister()) {
3339 assert(R->isReg());
3340 Operands.push_back(std::move(R));
3341 return ParseStatus::Success;
3342 }
3343 return ParseStatus::Failure;
3344}
3345
3346ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3347 bool HasSP3AbsMod, LitModifier Lit) {
3348 ParseStatus Res = parseReg(Operands);
3349 if (!Res.isNoMatch())
3350 return Res;
3351 if (isModifier())
3352 return ParseStatus::NoMatch;
3353 return parseImm(Operands, HasSP3AbsMod, Lit);
3354}
3355
3356bool
3357AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3358 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3359 const auto &str = Token.getString();
3360 return str == "abs" || str == "neg" || str == "sext";
3361 }
3362 return false;
3363}
3364
3365bool
3366AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3367 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3368}
3369
3370bool
3371AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3372 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3373}
3374
3375bool
3376AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3377 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3378}
3379
3380// Check if this is an operand modifier or an opcode modifier
3381// which may look like an expression but it is not. We should
3382// avoid parsing these modifiers as expressions. Currently
3383// recognized sequences are:
3384// |...|
3385// abs(...)
3386// neg(...)
3387// sext(...)
3388// -reg
3389// -|...|
3390// -abs(...)
3391// name:...
3392//
3393bool
3394AMDGPUAsmParser::isModifier() {
3395
3396 AsmToken Tok = getToken();
3397 AsmToken NextToken[2];
3398 peekTokens(NextToken);
3399
3400 return isOperandModifier(Tok, NextToken[0]) ||
3401 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3402 isOpcodeModifierWithVal(Tok, NextToken[0]);
3403}
3404
3405// Check if the current token is an SP3 'neg' modifier.
3406// Currently this modifier is allowed in the following context:
3407//
3408// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3409// 2. Before an 'abs' modifier: -abs(...)
3410// 3. Before an SP3 'abs' modifier: -|...|
3411//
3412// In all other cases "-" is handled as a part
3413// of an expression that follows the sign.
3414//
3415// Note: When "-" is followed by an integer literal,
3416// this is interpreted as integer negation rather
3417// than a floating-point NEG modifier applied to N.
3418// Beside being contr-intuitive, such use of floating-point
3419// NEG modifier would have resulted in different meaning
3420// of integer literals used with VOP1/2/C and VOP3,
3421// for example:
3422// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3423// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3424// Negative fp literals with preceding "-" are
3425// handled likewise for uniformity
3426//
3427bool
3428AMDGPUAsmParser::parseSP3NegModifier() {
3429
3430 AsmToken NextToken[2];
3431 peekTokens(NextToken);
3432
3433 if (isToken(AsmToken::Minus) &&
3434 (isRegister(NextToken[0], NextToken[1]) ||
3435 NextToken[0].is(AsmToken::Pipe) ||
3436 isId(NextToken[0], "abs"))) {
3437 lex();
3438 return true;
3439 }
3440
3441 return false;
3442}
3443
3444ParseStatus
3445AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3446 bool AllowImm) {
3447 bool Neg, SP3Neg;
3448 bool Abs, SP3Abs;
3449 SMLoc Loc;
3450
3451 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3452 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3453 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3454
3455 SP3Neg = parseSP3NegModifier();
3456
3457 Loc = getLoc();
3458 Neg = trySkipId("neg");
3459 if (Neg && SP3Neg)
3460 return Error(Loc, "expected register or immediate");
3461 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3462 return ParseStatus::Failure;
3463
3464 Abs = trySkipId("abs");
3465 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3466 return ParseStatus::Failure;
3467
3468 LitModifier Lit = LitModifier::None;
3469 if (trySkipId("lit")) {
3470 Lit = LitModifier::Lit;
3471 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3472 return ParseStatus::Failure;
3473 } else if (trySkipId("lit64")) {
3474 Lit = LitModifier::Lit64;
3475 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3476 return ParseStatus::Failure;
3477 if (!has64BitLiterals())
3478 return Error(Loc, "lit64 is not supported on this GPU");
3479 }
3480
3481 Loc = getLoc();
3482 SP3Abs = trySkipToken(AsmToken::Pipe);
3483 if (Abs && SP3Abs)
3484 return Error(Loc, "expected register or immediate");
3485
3486 ParseStatus Res;
3487 if (AllowImm) {
3488 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3489 } else {
3490 Res = parseReg(Operands);
3491 }
3492 if (!Res.isSuccess())
3493 return (SP3Neg || Neg || SP3Abs || Abs || Lit != LitModifier::None)
3495 : Res;
3496
3497 if (Lit != LitModifier::None && !Operands.back()->isImm())
3498 Error(Loc, "expected immediate with lit modifier");
3499
3500 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3501 return ParseStatus::Failure;
3502 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3503 return ParseStatus::Failure;
3504 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3505 return ParseStatus::Failure;
3506 if (Lit != LitModifier::None &&
3507 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3508 return ParseStatus::Failure;
3509
3510 AMDGPUOperand::Modifiers Mods;
3511 Mods.Abs = Abs || SP3Abs;
3512 Mods.Neg = Neg || SP3Neg;
3513 Mods.Lit = Lit;
3514
3515 if (Mods.hasFPModifiers() || Lit != LitModifier::None) {
3516 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3517 if (Op.isExpr())
3518 return Error(Op.getStartLoc(), "expected an absolute expression");
3519 Op.setModifiers(Mods);
3520 }
3521 return ParseStatus::Success;
3522}
3523
3524ParseStatus
3525AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3526 bool AllowImm) {
3527 bool Sext = trySkipId("sext");
3528 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3529 return ParseStatus::Failure;
3530
3531 ParseStatus Res;
3532 if (AllowImm) {
3533 Res = parseRegOrImm(Operands);
3534 } else {
3535 Res = parseReg(Operands);
3536 }
3537 if (!Res.isSuccess())
3538 return Sext ? ParseStatus::Failure : Res;
3539
3540 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3541 return ParseStatus::Failure;
3542
3543 AMDGPUOperand::Modifiers Mods;
3544 Mods.Sext = Sext;
3545
3546 if (Mods.hasIntModifiers()) {
3547 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3548 if (Op.isExpr())
3549 return Error(Op.getStartLoc(), "expected an absolute expression");
3550 Op.setModifiers(Mods);
3551 }
3552
3553 return ParseStatus::Success;
3554}
3555
3556ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3557 return parseRegOrImmWithFPInputMods(Operands, false);
3558}
3559
3560ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3561 return parseRegOrImmWithIntInputMods(Operands, false);
3562}
3563
3564ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3565 auto Loc = getLoc();
3566 if (trySkipId("off")) {
3567 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3568 AMDGPUOperand::ImmTyOff, false));
3569 return ParseStatus::Success;
3570 }
3571
3572 if (!isRegister())
3573 return ParseStatus::NoMatch;
3574
3575 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3576 if (Reg) {
3577 Operands.push_back(std::move(Reg));
3578 return ParseStatus::Success;
3579 }
3580
3581 return ParseStatus::Failure;
3582}
3583
3584unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3585 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3586
3587 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3588 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3589 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3590 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3591 return Match_InvalidOperand;
3592
3593 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3594 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3595 // v_mac_f32/16 allow only dst_sel == DWORD;
3596 auto OpNum =
3597 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3598 const auto &Op = Inst.getOperand(OpNum);
3599 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3600 return Match_InvalidOperand;
3601 }
3602 }
3603
3604 // Asm can first try to match VOPD or VOPD3. By failing early here with
3605 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3606 // Checking later during validateInstruction does not give a chance to retry
3607 // parsing as a different encoding.
3608 if (tryAnotherVOPDEncoding(Inst))
3609 return Match_InvalidOperand;
3610
3611 return Match_Success;
3612}
3613
3623
3624// What asm variants we should check
3625ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3626 if (isForcedDPP() && isForcedVOP3()) {
3627 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3628 return ArrayRef(Variants);
3629 }
3630 if (getForcedEncodingSize() == 32) {
3631 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3632 return ArrayRef(Variants);
3633 }
3634
3635 if (isForcedVOP3()) {
3636 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3637 return ArrayRef(Variants);
3638 }
3639
3640 if (isForcedSDWA()) {
3641 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3643 return ArrayRef(Variants);
3644 }
3645
3646 if (isForcedDPP()) {
3647 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3648 return ArrayRef(Variants);
3649 }
3650
3651 return getAllVariants();
3652}
3653
3654StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3655 if (isForcedDPP() && isForcedVOP3())
3656 return "e64_dpp";
3657
3658 if (getForcedEncodingSize() == 32)
3659 return "e32";
3660
3661 if (isForcedVOP3())
3662 return "e64";
3663
3664 if (isForcedSDWA())
3665 return "sdwa";
3666
3667 if (isForcedDPP())
3668 return "dpp";
3669
3670 return "";
3671}
3672
3673MCRegister
3674AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3675 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3676 for (MCPhysReg Reg : Desc.implicit_uses()) {
3677 switch (Reg) {
3678 case AMDGPU::FLAT_SCR:
3679 case AMDGPU::VCC:
3680 case AMDGPU::VCC_LO:
3681 case AMDGPU::VCC_HI:
3682 case AMDGPU::M0:
3683 return Reg;
3684 default:
3685 break;
3686 }
3687 }
3688 return MCRegister();
3689}
3690
3691// NB: This code is correct only when used to check constant
3692// bus limitations because GFX7 support no f16 inline constants.
3693// Note that there are no cases when a GFX7 opcode violates
3694// constant bus limitations due to the use of an f16 constant.
3695bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3696 unsigned OpIdx) const {
3697 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3698
3701 return false;
3702 }
3703
3704 const MCOperand &MO = Inst.getOperand(OpIdx);
3705
3706 int64_t Val = MO.isImm() ? MO.getImm() : getLitValue(MO.getExpr());
3707 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3708
3709 switch (OpSize) { // expected operand size
3710 case 8:
3711 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3712 case 4:
3713 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3714 case 2: {
3715 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3718 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3719
3723
3727
3731
3734 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3735
3738 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3739
3741 return false;
3742
3743 llvm_unreachable("invalid operand type");
3744 }
3745 default:
3746 llvm_unreachable("invalid operand size");
3747 }
3748}
3749
3750unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3751 if (!isGFX10Plus())
3752 return 1;
3753
3754 switch (Opcode) {
3755 // 64-bit shift instructions can use only one scalar value input
3756 case AMDGPU::V_LSHLREV_B64_e64:
3757 case AMDGPU::V_LSHLREV_B64_gfx10:
3758 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3759 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3760 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3761 case AMDGPU::V_LSHRREV_B64_e64:
3762 case AMDGPU::V_LSHRREV_B64_gfx10:
3763 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3764 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3765 case AMDGPU::V_ASHRREV_I64_e64:
3766 case AMDGPU::V_ASHRREV_I64_gfx10:
3767 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3768 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3769 case AMDGPU::V_LSHL_B64_e64:
3770 case AMDGPU::V_LSHR_B64_e64:
3771 case AMDGPU::V_ASHR_I64_e64:
3772 return 1;
3773 default:
3774 return 2;
3775 }
3776}
3777
3778constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3780
3781// Get regular operand indices in the same order as specified
3782// in the instruction (but append mandatory literals to the end).
3784 bool AddMandatoryLiterals = false) {
3785
3786 int16_t ImmIdx =
3787 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3788
3789 if (isVOPD(Opcode)) {
3790 int16_t ImmXIdx =
3791 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3792
3793 return {getNamedOperandIdx(Opcode, OpName::src0X),
3794 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3795 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3796 getNamedOperandIdx(Opcode, OpName::src0Y),
3797 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3798 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3799 ImmXIdx,
3800 ImmIdx};
3801 }
3802
3803 return {getNamedOperandIdx(Opcode, OpName::src0),
3804 getNamedOperandIdx(Opcode, OpName::src1),
3805 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3806}
3807
3808bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3809 const MCOperand &MO = Inst.getOperand(OpIdx);
3810 if (MO.isImm())
3811 return !isInlineConstant(Inst, OpIdx);
3812 if (MO.isReg()) {
3813 auto Reg = MO.getReg();
3814 if (!Reg)
3815 return false;
3816 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3817 auto PReg = mc2PseudoReg(Reg);
3818 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3819 }
3820 return true;
3821}
3822
3823// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3824// Writelane is special in that it can use SGPR and M0 (which would normally
3825// count as using the constant bus twice - but in this case it is allowed since
3826// the lane selector doesn't count as a use of the constant bus). However, it is
3827// still required to abide by the 1 SGPR rule.
3828static bool checkWriteLane(const MCInst &Inst) {
3829 const unsigned Opcode = Inst.getOpcode();
3830 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3831 return false;
3832 const MCOperand &LaneSelOp = Inst.getOperand(2);
3833 if (!LaneSelOp.isReg())
3834 return false;
3835 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3836 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3837}
3838
3839bool AMDGPUAsmParser::validateConstantBusLimitations(
3840 const MCInst &Inst, const OperandVector &Operands) {
3841 const unsigned Opcode = Inst.getOpcode();
3842 const MCInstrDesc &Desc = MII.get(Opcode);
3843 MCRegister LastSGPR;
3844 unsigned ConstantBusUseCount = 0;
3845 unsigned NumLiterals = 0;
3846 unsigned LiteralSize;
3847
3848 if (!(Desc.TSFlags &
3851 !isVOPD(Opcode))
3852 return true;
3853
3854 if (checkWriteLane(Inst))
3855 return true;
3856
3857 // Check special imm operands (used by madmk, etc)
3858 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3859 ++NumLiterals;
3860 LiteralSize = 4;
3861 }
3862
3863 SmallDenseSet<MCRegister> SGPRsUsed;
3864 MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3865 if (SGPRUsed) {
3866 SGPRsUsed.insert(SGPRUsed);
3867 ++ConstantBusUseCount;
3868 }
3869
3870 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3871
3872 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3873
3874 for (int OpIdx : OpIndices) {
3875 if (OpIdx == -1)
3876 continue;
3877
3878 const MCOperand &MO = Inst.getOperand(OpIdx);
3879 if (usesConstantBus(Inst, OpIdx)) {
3880 if (MO.isReg()) {
3881 LastSGPR = mc2PseudoReg(MO.getReg());
3882 // Pairs of registers with a partial intersections like these
3883 // s0, s[0:1]
3884 // flat_scratch_lo, flat_scratch
3885 // flat_scratch_lo, flat_scratch_hi
3886 // are theoretically valid but they are disabled anyway.
3887 // Note that this code mimics SIInstrInfo::verifyInstruction
3888 if (SGPRsUsed.insert(LastSGPR).second) {
3889 ++ConstantBusUseCount;
3890 }
3891 } else { // Expression or a literal
3892
3893 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3894 continue; // special operand like VINTERP attr_chan
3895
3896 // An instruction may use only one literal.
3897 // This has been validated on the previous step.
3898 // See validateVOPLiteral.
3899 // This literal may be used as more than one operand.
3900 // If all these operands are of the same size,
3901 // this literal counts as one scalar value.
3902 // Otherwise it counts as 2 scalar values.
3903 // See "GFX10 Shader Programming", section 3.6.2.3.
3904
3906 if (Size < 4)
3907 Size = 4;
3908
3909 if (NumLiterals == 0) {
3910 NumLiterals = 1;
3911 LiteralSize = Size;
3912 } else if (LiteralSize != Size) {
3913 NumLiterals = 2;
3914 }
3915 }
3916 }
3917
3918 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3919 Error(getOperandLoc(Operands, OpIdx),
3920 "invalid operand (violates constant bus restrictions)");
3921 return false;
3922 }
3923 }
3924 return true;
3925}
3926
3927std::optional<unsigned>
3928AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
3929
3930 const unsigned Opcode = Inst.getOpcode();
3931 if (!isVOPD(Opcode))
3932 return {};
3933
3934 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3935
3936 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3937 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3938 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3939 ? Opr.getReg()
3940 : MCRegister();
3941 };
3942
3943 // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3944 // source-cache.
3945 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3946 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3947 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
3948 bool AllowSameVGPR = isGFX1250();
3949
3950 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3951 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3952 int I = getNamedOperandIdx(Opcode, OpName);
3953 const MCOperand &Op = Inst.getOperand(I);
3954 if (!Op.isImm())
3955 continue;
3956 int64_t Imm = Op.getImm();
3957 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
3958 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
3959 return (unsigned)I;
3960 }
3961
3962 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3963 OpName::vsrc2Y, OpName::imm}) {
3964 int I = getNamedOperandIdx(Opcode, OpName);
3965 if (I == -1)
3966 continue;
3967 const MCOperand &Op = Inst.getOperand(I);
3968 if (Op.isImm())
3969 return (unsigned)I;
3970 }
3971 }
3972
3973 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3974 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
3975 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
3976
3977 return InvalidCompOprIdx;
3978}
3979
3980bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
3981 const OperandVector &Operands) {
3982
3983 unsigned Opcode = Inst.getOpcode();
3984 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
3985
3986 if (AsVOPD3) {
3987 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
3988 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
3989 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
3990 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
3991 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
3992 }
3993 }
3994
3995 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
3996 if (!InvalidCompOprIdx.has_value())
3997 return true;
3998
3999 auto CompOprIdx = *InvalidCompOprIdx;
4000 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4001 auto ParsedIdx =
4002 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
4003 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4004 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4005
4006 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4007 if (CompOprIdx == VOPD::Component::DST) {
4008 if (AsVOPD3)
4009 Error(Loc, "dst registers must be distinct");
4010 else
4011 Error(Loc, "one dst register must be even and the other odd");
4012 } else {
4013 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4014 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
4015 " operands must use different VGPR banks");
4016 }
4017
4018 return false;
4019}
4020
4021// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4022// potentially used as VOPD3 with the same operands.
4023bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4024 // First check if it fits VOPD
4025 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4026 if (!InvalidCompOprIdx.has_value())
4027 return false;
4028
4029 // Then if it fits VOPD3
4030 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4031 if (InvalidCompOprIdx.has_value()) {
4032 // If failed operand is dst it is better to show error about VOPD3
4033 // instruction as it has more capabilities and error message will be
4034 // more informative. If the dst is not legal for VOPD3, then it is not
4035 // legal for VOPD either.
4036 if (*InvalidCompOprIdx == VOPD::Component::DST)
4037 return true;
4038
4039 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4040 // with a conflict in tied implicit src2 of fmac and no asm operand to
4041 // to point to.
4042 return false;
4043 }
4044 return true;
4045}
4046
4047// \returns true is a VOPD3 instruction can be also represented as a shorter
4048// VOPD encoding.
4049bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4050 const unsigned Opcode = Inst.getOpcode();
4051 const auto &II = getVOPDInstInfo(Opcode, &MII);
4052 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4053 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4054 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4055 return false;
4056
4057 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4058 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4059 // be parsed as VOPD which does not accept src2.
4060 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4061 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4062 return false;
4063
4064 // If any modifiers are set this cannot be VOPD.
4065 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4066 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4067 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4068 int I = getNamedOperandIdx(Opcode, OpName);
4069 if (I == -1)
4070 continue;
4071 if (Inst.getOperand(I).getImm())
4072 return false;
4073 }
4074
4075 return !tryVOPD3(Inst);
4076}
4077
4078// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4079// form but switch to VOPD3 otherwise.
4080bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4081 const unsigned Opcode = Inst.getOpcode();
4082 if (!isGFX1250() || !isVOPD(Opcode))
4083 return false;
4084
4085 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4086 return tryVOPD(Inst);
4087 return tryVOPD3(Inst);
4088}
4089
4090bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4091
4092 const unsigned Opc = Inst.getOpcode();
4093 const MCInstrDesc &Desc = MII.get(Opc);
4094
4095 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4096 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4097 assert(ClampIdx != -1);
4098 return Inst.getOperand(ClampIdx).getImm() == 0;
4099 }
4100
4101 return true;
4102}
4103
4106
4107bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) {
4108
4109 const unsigned Opc = Inst.getOpcode();
4110 const MCInstrDesc &Desc = MII.get(Opc);
4111
4112 if ((Desc.TSFlags & MIMGFlags) == 0)
4113 return true;
4114
4115 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4116 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4117 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4118
4119 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4120 return true;
4121
4122 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4123 return true;
4124
4125 unsigned VDataSize = getRegOperandSize(Desc, VDataIdx);
4126 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4127 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4128 if (DMask == 0)
4129 DMask = 1;
4130
4131 bool IsPackedD16 = false;
4132 unsigned DataSize =
4133 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4134 if (hasPackedD16()) {
4135 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4136 IsPackedD16 = D16Idx >= 0;
4137 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4138 DataSize = (DataSize + 1) / 2;
4139 }
4140
4141 if ((VDataSize / 4) == DataSize + TFESize)
4142 return true;
4143
4144 StringRef Modifiers;
4145 if (isGFX90A())
4146 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4147 else
4148 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4149
4150 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4151 return false;
4152}
4153
4154bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
4155 const unsigned Opc = Inst.getOpcode();
4156 const MCInstrDesc &Desc = MII.get(Opc);
4157
4158 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4159 return true;
4160
4161 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4162
4163 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4165 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4166 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4167 ? AMDGPU::OpName::srsrc
4168 : AMDGPU::OpName::rsrc;
4169 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4170 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4171 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4172
4173 assert(VAddr0Idx != -1);
4174 assert(SrsrcIdx != -1);
4175 assert(SrsrcIdx > VAddr0Idx);
4176
4177 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4178 if (BaseOpcode->BVH) {
4179 if (IsA16 == BaseOpcode->A16)
4180 return true;
4181 Error(IDLoc, "image address size does not match a16");
4182 return false;
4183 }
4184
4185 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4186 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4187 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4188 unsigned ActualAddrSize =
4189 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(Desc, VAddr0Idx) / 4;
4190
4191 unsigned ExpectedAddrSize =
4192 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4193
4194 if (IsNSA) {
4195 if (hasPartialNSAEncoding() &&
4196 ExpectedAddrSize >
4198 int VAddrLastIdx = SrsrcIdx - 1;
4199 unsigned VAddrLastSize = getRegOperandSize(Desc, VAddrLastIdx) / 4;
4200
4201 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4202 }
4203 } else {
4204 if (ExpectedAddrSize > 12)
4205 ExpectedAddrSize = 16;
4206
4207 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4208 // This provides backward compatibility for assembly created
4209 // before 160b/192b/224b types were directly supported.
4210 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4211 return true;
4212 }
4213
4214 if (ActualAddrSize == ExpectedAddrSize)
4215 return true;
4216
4217 Error(IDLoc, "image address size does not match dim and a16");
4218 return false;
4219}
4220
4221bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4222
4223 const unsigned Opc = Inst.getOpcode();
4224 const MCInstrDesc &Desc = MII.get(Opc);
4225
4226 if ((Desc.TSFlags & MIMGFlags) == 0)
4227 return true;
4228 if (!Desc.mayLoad() || !Desc.mayStore())
4229 return true; // Not atomic
4230
4231 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4232 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4233
4234 // This is an incomplete check because image_atomic_cmpswap
4235 // may only use 0x3 and 0xf while other atomic operations
4236 // may use 0x1 and 0x3. However these limitations are
4237 // verified when we check that dmask matches dst size.
4238 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4239}
4240
4241bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4242
4243 const unsigned Opc = Inst.getOpcode();
4244 const MCInstrDesc &Desc = MII.get(Opc);
4245
4246 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4247 return true;
4248
4249 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4250 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4251
4252 // GATHER4 instructions use dmask in a different fashion compared to
4253 // other MIMG instructions. The only useful DMASK values are
4254 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4255 // (red,red,red,red) etc.) The ISA document doesn't mention
4256 // this.
4257 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4258}
4259
4260bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4261 const OperandVector &Operands) {
4262 if (!isGFX10Plus())
4263 return true;
4264
4265 const unsigned Opc = Inst.getOpcode();
4266 const MCInstrDesc &Desc = MII.get(Opc);
4267
4268 if ((Desc.TSFlags & MIMGFlags) == 0)
4269 return true;
4270
4271 // image_bvh_intersect_ray instructions do not have dim
4273 return true;
4274
4275 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4276 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4277 if (Op.isDim())
4278 return true;
4279 }
4280 return false;
4281}
4282
4283bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4284 const unsigned Opc = Inst.getOpcode();
4285 const MCInstrDesc &Desc = MII.get(Opc);
4286
4287 if ((Desc.TSFlags & MIMGFlags) == 0)
4288 return true;
4289
4290 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4291 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4293
4294 if (!BaseOpcode->MSAA)
4295 return true;
4296
4297 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4298 assert(DimIdx != -1);
4299
4300 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4301 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4302
4303 return DimInfo->MSAA;
4304}
4305
4306static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4307{
4308 switch (Opcode) {
4309 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4310 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4311 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4312 return true;
4313 default:
4314 return false;
4315 }
4316}
4317
4318// movrels* opcodes should only allow VGPRS as src0.
4319// This is specified in .td description for vop1/vop3,
4320// but sdwa is handled differently. See isSDWAOperand.
4321bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4322 const OperandVector &Operands) {
4323
4324 const unsigned Opc = Inst.getOpcode();
4325 const MCInstrDesc &Desc = MII.get(Opc);
4326
4327 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4328 return true;
4329
4330 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4331 assert(Src0Idx != -1);
4332
4333 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4334 if (Src0.isReg()) {
4335 auto Reg = mc2PseudoReg(Src0.getReg());
4336 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4337 if (!isSGPR(Reg, TRI))
4338 return true;
4339 }
4340
4341 Error(getOperandLoc(Operands, Src0Idx), "source operand must be a VGPR");
4342 return false;
4343}
4344
4345bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4346 const OperandVector &Operands) {
4347
4348 const unsigned Opc = Inst.getOpcode();
4349
4350 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4351 return true;
4352
4353 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4354 assert(Src0Idx != -1);
4355
4356 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4357 if (!Src0.isReg())
4358 return true;
4359
4360 auto Reg = mc2PseudoReg(Src0.getReg());
4361 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4362 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4363 Error(getOperandLoc(Operands, Src0Idx),
4364 "source operand must be either a VGPR or an inline constant");
4365 return false;
4366 }
4367
4368 return true;
4369}
4370
4371bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4372 const OperandVector &Operands) {
4373 unsigned Opcode = Inst.getOpcode();
4374 const MCInstrDesc &Desc = MII.get(Opcode);
4375
4376 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4377 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4378 return true;
4379
4380 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4381 if (Src2Idx == -1)
4382 return true;
4383
4384 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4385 Error(getOperandLoc(Operands, Src2Idx),
4386 "inline constants are not allowed for this operand");
4387 return false;
4388 }
4389
4390 return true;
4391}
4392
4393bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4394 const OperandVector &Operands) {
4395 const unsigned Opc = Inst.getOpcode();
4396 const MCInstrDesc &Desc = MII.get(Opc);
4397
4398 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4399 return true;
4400
4401 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4402 if (BlgpIdx != -1) {
4403 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4404 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4405
4406 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4407 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4408
4409 // Validate the correct register size was used for the floating point
4410 // format operands
4411
4412 bool Success = true;
4413 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4414 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4415 Error(getOperandLoc(Operands, Src0Idx),
4416 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4417 Success = false;
4418 }
4419
4420 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4421 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4422 Error(getOperandLoc(Operands, Src1Idx),
4423 "wrong register tuple size for blgp value " + Twine(BLGP));
4424 Success = false;
4425 }
4426
4427 return Success;
4428 }
4429 }
4430
4431 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4432 if (Src2Idx == -1)
4433 return true;
4434
4435 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4436 if (!Src2.isReg())
4437 return true;
4438
4439 MCRegister Src2Reg = Src2.getReg();
4440 MCRegister DstReg = Inst.getOperand(0).getReg();
4441 if (Src2Reg == DstReg)
4442 return true;
4443
4444 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4445 if (TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[0], HwMode))
4446 .getSizeInBits() <= 128)
4447 return true;
4448
4449 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4450 Error(getOperandLoc(Operands, Src2Idx),
4451 "source 2 operand must not partially overlap with dst");
4452 return false;
4453 }
4454
4455 return true;
4456}
4457
4458bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4459 switch (Inst.getOpcode()) {
4460 default:
4461 return true;
4462 case V_DIV_SCALE_F32_gfx6_gfx7:
4463 case V_DIV_SCALE_F32_vi:
4464 case V_DIV_SCALE_F32_gfx10:
4465 case V_DIV_SCALE_F64_gfx6_gfx7:
4466 case V_DIV_SCALE_F64_vi:
4467 case V_DIV_SCALE_F64_gfx10:
4468 break;
4469 }
4470
4471 // TODO: Check that src0 = src1 or src2.
4472
4473 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4474 AMDGPU::OpName::src2_modifiers,
4475 AMDGPU::OpName::src2_modifiers}) {
4476 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4477 .getImm() &
4479 return false;
4480 }
4481 }
4482
4483 return true;
4484}
4485
4486bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4487
4488 const unsigned Opc = Inst.getOpcode();
4489 const MCInstrDesc &Desc = MII.get(Opc);
4490
4491 if ((Desc.TSFlags & MIMGFlags) == 0)
4492 return true;
4493
4494 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4495 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4496 if (isCI() || isSI())
4497 return false;
4498 }
4499
4500 return true;
4501}
4502
4503bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4504 const unsigned Opc = Inst.getOpcode();
4505 const MCInstrDesc &Desc = MII.get(Opc);
4506
4507 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4508 return true;
4509
4510 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4511
4512 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4513}
4514
4515static bool IsRevOpcode(const unsigned Opcode)
4516{
4517 switch (Opcode) {
4518 case AMDGPU::V_SUBREV_F32_e32:
4519 case AMDGPU::V_SUBREV_F32_e64:
4520 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4521 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4522 case AMDGPU::V_SUBREV_F32_e32_vi:
4523 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4524 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4525 case AMDGPU::V_SUBREV_F32_e64_vi:
4526
4527 case AMDGPU::V_SUBREV_CO_U32_e32:
4528 case AMDGPU::V_SUBREV_CO_U32_e64:
4529 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4530 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4531
4532 case AMDGPU::V_SUBBREV_U32_e32:
4533 case AMDGPU::V_SUBBREV_U32_e64:
4534 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4535 case AMDGPU::V_SUBBREV_U32_e32_vi:
4536 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4537 case AMDGPU::V_SUBBREV_U32_e64_vi:
4538
4539 case AMDGPU::V_SUBREV_U32_e32:
4540 case AMDGPU::V_SUBREV_U32_e64:
4541 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4542 case AMDGPU::V_SUBREV_U32_e32_vi:
4543 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4544 case AMDGPU::V_SUBREV_U32_e64_vi:
4545
4546 case AMDGPU::V_SUBREV_F16_e32:
4547 case AMDGPU::V_SUBREV_F16_e64:
4548 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4549 case AMDGPU::V_SUBREV_F16_e32_vi:
4550 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4551 case AMDGPU::V_SUBREV_F16_e64_vi:
4552
4553 case AMDGPU::V_SUBREV_U16_e32:
4554 case AMDGPU::V_SUBREV_U16_e64:
4555 case AMDGPU::V_SUBREV_U16_e32_vi:
4556 case AMDGPU::V_SUBREV_U16_e64_vi:
4557
4558 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4559 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4560 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4561
4562 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4563 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4564
4565 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4566 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4567
4568 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4569 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4570
4571 case AMDGPU::V_LSHRREV_B32_e32:
4572 case AMDGPU::V_LSHRREV_B32_e64:
4573 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4574 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4575 case AMDGPU::V_LSHRREV_B32_e32_vi:
4576 case AMDGPU::V_LSHRREV_B32_e64_vi:
4577 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4578 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4579
4580 case AMDGPU::V_ASHRREV_I32_e32:
4581 case AMDGPU::V_ASHRREV_I32_e64:
4582 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4583 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4584 case AMDGPU::V_ASHRREV_I32_e32_vi:
4585 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4586 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4587 case AMDGPU::V_ASHRREV_I32_e64_vi:
4588
4589 case AMDGPU::V_LSHLREV_B32_e32:
4590 case AMDGPU::V_LSHLREV_B32_e64:
4591 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4592 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4593 case AMDGPU::V_LSHLREV_B32_e32_vi:
4594 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4595 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4596 case AMDGPU::V_LSHLREV_B32_e64_vi:
4597
4598 case AMDGPU::V_LSHLREV_B16_e32:
4599 case AMDGPU::V_LSHLREV_B16_e64:
4600 case AMDGPU::V_LSHLREV_B16_e32_vi:
4601 case AMDGPU::V_LSHLREV_B16_e64_vi:
4602 case AMDGPU::V_LSHLREV_B16_gfx10:
4603
4604 case AMDGPU::V_LSHRREV_B16_e32:
4605 case AMDGPU::V_LSHRREV_B16_e64:
4606 case AMDGPU::V_LSHRREV_B16_e32_vi:
4607 case AMDGPU::V_LSHRREV_B16_e64_vi:
4608 case AMDGPU::V_LSHRREV_B16_gfx10:
4609
4610 case AMDGPU::V_ASHRREV_I16_e32:
4611 case AMDGPU::V_ASHRREV_I16_e64:
4612 case AMDGPU::V_ASHRREV_I16_e32_vi:
4613 case AMDGPU::V_ASHRREV_I16_e64_vi:
4614 case AMDGPU::V_ASHRREV_I16_gfx10:
4615
4616 case AMDGPU::V_LSHLREV_B64_e64:
4617 case AMDGPU::V_LSHLREV_B64_gfx10:
4618 case AMDGPU::V_LSHLREV_B64_vi:
4619
4620 case AMDGPU::V_LSHRREV_B64_e64:
4621 case AMDGPU::V_LSHRREV_B64_gfx10:
4622 case AMDGPU::V_LSHRREV_B64_vi:
4623
4624 case AMDGPU::V_ASHRREV_I64_e64:
4625 case AMDGPU::V_ASHRREV_I64_gfx10:
4626 case AMDGPU::V_ASHRREV_I64_vi:
4627
4628 case AMDGPU::V_PK_LSHLREV_B16:
4629 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4630 case AMDGPU::V_PK_LSHLREV_B16_vi:
4631
4632 case AMDGPU::V_PK_LSHRREV_B16:
4633 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4634 case AMDGPU::V_PK_LSHRREV_B16_vi:
4635 case AMDGPU::V_PK_ASHRREV_I16:
4636 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4637 case AMDGPU::V_PK_ASHRREV_I16_vi:
4638 return true;
4639 default:
4640 return false;
4641 }
4642}
4643
4644bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4645 const OperandVector &Operands) {
4646 using namespace SIInstrFlags;
4647 const unsigned Opcode = Inst.getOpcode();
4648 const MCInstrDesc &Desc = MII.get(Opcode);
4649
4650 // lds_direct register is defined so that it can be used
4651 // with 9-bit operands only. Ignore encodings which do not accept these.
4652 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4653 if ((Desc.TSFlags & Enc) == 0)
4654 return true;
4655
4656 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4657 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4658 if (SrcIdx == -1)
4659 break;
4660 const auto &Src = Inst.getOperand(SrcIdx);
4661 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4662
4663 if (isGFX90A() || isGFX11Plus()) {
4664 Error(getOperandLoc(Operands, SrcIdx),
4665 "lds_direct is not supported on this GPU");
4666 return false;
4667 }
4668
4669 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) {
4670 Error(getOperandLoc(Operands, SrcIdx),
4671 "lds_direct cannot be used with this instruction");
4672 return false;
4673 }
4674
4675 if (SrcName != OpName::src0) {
4676 Error(getOperandLoc(Operands, SrcIdx),
4677 "lds_direct may be used as src0 only");
4678 return false;
4679 }
4680 }
4681 }
4682
4683 return true;
4684}
4685
4686SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4687 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4688 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4689 if (Op.isFlatOffset())
4690 return Op.getStartLoc();
4691 }
4692 return getLoc();
4693}
4694
4695bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4696 const OperandVector &Operands) {
4697 auto Opcode = Inst.getOpcode();
4698 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4699 if (OpNum == -1)
4700 return true;
4701
4702 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4703 if ((TSFlags & SIInstrFlags::FLAT))
4704 return validateFlatOffset(Inst, Operands);
4705
4706 if ((TSFlags & SIInstrFlags::SMRD))
4707 return validateSMEMOffset(Inst, Operands);
4708
4709 const auto &Op = Inst.getOperand(OpNum);
4710 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4711 if (isGFX12Plus() &&
4712 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4713 const unsigned OffsetSize = 24;
4714 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
4715 Error(getFlatOffsetLoc(Operands),
4716 Twine("expected a ") + Twine(OffsetSize - 1) +
4717 "-bit unsigned offset for buffer ops");
4718 return false;
4719 }
4720 } else {
4721 const unsigned OffsetSize = 16;
4722 if (!isUIntN(OffsetSize, Op.getImm())) {
4723 Error(getFlatOffsetLoc(Operands),
4724 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4725 return false;
4726 }
4727 }
4728 return true;
4729}
4730
4731bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4732 const OperandVector &Operands) {
4733 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4734 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4735 return true;
4736
4737 auto Opcode = Inst.getOpcode();
4738 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4739 assert(OpNum != -1);
4740
4741 const auto &Op = Inst.getOperand(OpNum);
4742 if (!hasFlatOffsets() && Op.getImm() != 0) {
4743 Error(getFlatOffsetLoc(Operands),
4744 "flat offset modifier is not supported on this GPU");
4745 return false;
4746 }
4747
4748 // For pre-GFX12 FLAT instructions the offset must be positive;
4749 // MSB is ignored and forced to zero.
4750 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4751 bool AllowNegative =
4753 isGFX12Plus();
4754 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4755 Error(getFlatOffsetLoc(Operands),
4756 Twine("expected a ") +
4757 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4758 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4759 return false;
4760 }
4761
4762 return true;
4763}
4764
4765SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4766 // Start with second operand because SMEM Offset cannot be dst or src0.
4767 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4768 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4769 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4770 return Op.getStartLoc();
4771 }
4772 return getLoc();
4773}
4774
4775bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4776 const OperandVector &Operands) {
4777 if (isCI() || isSI())
4778 return true;
4779
4780 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4781 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4782 return true;
4783
4784 auto Opcode = Inst.getOpcode();
4785 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4786 if (OpNum == -1)
4787 return true;
4788
4789 const auto &Op = Inst.getOperand(OpNum);
4790 if (!Op.isImm())
4791 return true;
4792
4793 uint64_t Offset = Op.getImm();
4794 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4797 return true;
4798
4799 Error(getSMEMOffsetLoc(Operands),
4800 isGFX12Plus() && IsBuffer
4801 ? "expected a 23-bit unsigned offset for buffer ops"
4802 : isGFX12Plus() ? "expected a 24-bit signed offset"
4803 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4804 : "expected a 21-bit signed offset");
4805
4806 return false;
4807}
4808
4809bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4810 const OperandVector &Operands) {
4811 unsigned Opcode = Inst.getOpcode();
4812 const MCInstrDesc &Desc = MII.get(Opcode);
4813 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4814 return true;
4815
4816 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4817 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4818
4819 const int OpIndices[] = { Src0Idx, Src1Idx };
4820
4821 unsigned NumExprs = 0;
4822 unsigned NumLiterals = 0;
4823 int64_t LiteralValue;
4824
4825 for (int OpIdx : OpIndices) {
4826 if (OpIdx == -1) break;
4827
4828 const MCOperand &MO = Inst.getOperand(OpIdx);
4829 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4831 bool IsLit = false;
4832 std::optional<int64_t> Imm;
4833 if (MO.isImm()) {
4834 Imm = MO.getImm();
4835 } else if (MO.isExpr()) {
4836 if (isLitExpr(MO.getExpr())) {
4837 IsLit = true;
4838 Imm = getLitValue(MO.getExpr());
4839 }
4840 } else {
4841 continue;
4842 }
4843
4844 if (!Imm.has_value()) {
4845 ++NumExprs;
4846 } else if (!isInlineConstant(Inst, OpIdx)) {
4847 auto OpType = static_cast<AMDGPU::OperandType>(
4848 Desc.operands()[OpIdx].OperandType);
4849 int64_t Value = encode32BitLiteral(*Imm, OpType, IsLit);
4850 if (NumLiterals == 0 || LiteralValue != Value) {
4852 ++NumLiterals;
4853 }
4854 }
4855 }
4856 }
4857
4858 if (NumLiterals + NumExprs <= 1)
4859 return true;
4860
4861 Error(getOperandLoc(Operands, Src1Idx),
4862 "only one unique literal operand is allowed");
4863 return false;
4864}
4865
4866bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4867 const unsigned Opc = Inst.getOpcode();
4868 if (isPermlane16(Opc)) {
4869 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4870 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4871
4872 if (OpSel & ~3)
4873 return false;
4874 }
4875
4876 uint64_t TSFlags = MII.get(Opc).TSFlags;
4877
4878 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4879 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4880 if (OpSelIdx != -1) {
4881 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4882 return false;
4883 }
4884 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4885 if (OpSelHiIdx != -1) {
4886 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4887 return false;
4888 }
4889 }
4890
4891 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4892 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4893 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4894 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4895 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4896 if (OpSel & 3)
4897 return false;
4898 }
4899
4900 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4901 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4902 // the first SGPR and use it for both the low and high operations.
4903 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4904 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4905 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4906 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4907 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4908
4909 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4910 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4911 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4912 unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4913
4914 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4915
4916 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
4917 unsigned Mask = 1U << Index;
4918 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
4919 };
4920
4921 if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
4922 !VerifyOneSGPR(/*Index=*/0))
4923 return false;
4924 if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
4925 !VerifyOneSGPR(/*Index=*/1))
4926 return false;
4927
4928 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4929 if (Src2Idx != -1) {
4930 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4931 if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
4932 !VerifyOneSGPR(/*Index=*/2))
4933 return false;
4934 }
4935 }
4936
4937 return true;
4938}
4939
4940bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4941 if (!hasTrue16Insts())
4942 return true;
4943 const MCRegisterInfo *MRI = getMRI();
4944 const unsigned Opc = Inst.getOpcode();
4945 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4946 if (OpSelIdx == -1)
4947 return true;
4948 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
4949 // If the value is 0 we could have a default OpSel Operand, so conservatively
4950 // allow it.
4951 if (OpSelOpValue == 0)
4952 return true;
4953 unsigned OpCount = 0;
4954 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4955 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4956 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
4957 if (OpIdx == -1)
4958 continue;
4959 const MCOperand &Op = Inst.getOperand(OpIdx);
4960 if (Op.isReg() &&
4961 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
4962 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
4963 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
4964 if (OpSelOpIsHi != VGPRSuffixIsHi)
4965 return false;
4966 }
4967 ++OpCount;
4968 }
4969
4970 return true;
4971}
4972
4973bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
4974 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4975
4976 const unsigned Opc = Inst.getOpcode();
4977 uint64_t TSFlags = MII.get(Opc).TSFlags;
4978
4979 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4980 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4981 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4982 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4983 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4984 !(TSFlags & SIInstrFlags::IsSWMMAC))
4985 return true;
4986
4987 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4988 if (NegIdx == -1)
4989 return true;
4990
4991 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4992
4993 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4994 // on some src operands but not allowed on other.
4995 // It is convenient that such instructions don't have src_modifiers operand
4996 // for src operands that don't allow neg because they also don't allow opsel.
4997
4998 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4999 AMDGPU::OpName::src1_modifiers,
5000 AMDGPU::OpName::src2_modifiers};
5001
5002 for (unsigned i = 0; i < 3; ++i) {
5003 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
5004 if (Neg & (1 << i))
5005 return false;
5006 }
5007 }
5008
5009 return true;
5010}
5011
5012bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
5013 const OperandVector &Operands) {
5014 const unsigned Opc = Inst.getOpcode();
5015 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
5016 if (DppCtrlIdx >= 0) {
5017 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
5018
5019 if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
5020 AMDGPU::isDPALU_DPP(MII.get(Opc), MII, getSTI())) {
5021 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5022 // only on GFX12.
5023 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5024 Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
5025 : "DP ALU dpp only supports row_newbcast");
5026 return false;
5027 }
5028 }
5029
5030 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
5031 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5032
5033 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
5034 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5035 if (Src1Idx >= 0) {
5036 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5037 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5038 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
5039 Error(getOperandLoc(Operands, Src1Idx),
5040 "invalid operand for instruction");
5041 return false;
5042 }
5043 if (Src1.isImm()) {
5044 Error(getInstLoc(Operands),
5045 "src1 immediate operand invalid for instruction");
5046 return false;
5047 }
5048 }
5049 }
5050
5051 return true;
5052}
5053
5054// Check if VCC register matches wavefront size
5055bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5056 return (Reg == AMDGPU::VCC && isWave64()) ||
5057 (Reg == AMDGPU::VCC_LO && isWave32());
5058}
5059
5060// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5061bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5062 const OperandVector &Operands) {
5063 unsigned Opcode = Inst.getOpcode();
5064 const MCInstrDesc &Desc = MII.get(Opcode);
5065 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5066 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5067 !HasMandatoryLiteral && !isVOPD(Opcode))
5068 return true;
5069
5070 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
5071
5072 std::optional<unsigned> LiteralOpIdx;
5073 std::optional<uint64_t> LiteralValue;
5074
5075 for (int OpIdx : OpIndices) {
5076 if (OpIdx == -1)
5077 continue;
5078
5079 const MCOperand &MO = Inst.getOperand(OpIdx);
5080 if (!MO.isImm() && !MO.isExpr())
5081 continue;
5082 if (!isSISrcOperand(Desc, OpIdx))
5083 continue;
5084
5085 std::optional<int64_t> Imm;
5086 if (MO.isImm())
5087 Imm = MO.getImm();
5088 else if (MO.isExpr() && isLitExpr(MO.getExpr()))
5089 Imm = getLitValue(MO.getExpr());
5090
5091 bool IsAnotherLiteral = false;
5092 if (!Imm.has_value()) {
5093 // Literal value not known, so we conservately assume it's different.
5094 IsAnotherLiteral = true;
5095 } else if (!isInlineConstant(Inst, OpIdx)) {
5096 uint64_t Value = *Imm;
5097 bool IsForcedFP64 =
5098 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5100 HasMandatoryLiteral);
5101 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
5102 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5103 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
5104
5105 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5106 !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) {
5107 Error(getOperandLoc(Operands, OpIdx),
5108 "invalid operand for instruction");
5109 return false;
5110 }
5111
5112 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5113 Value = Hi_32(Value);
5114
5115 IsAnotherLiteral = !LiteralValue || *LiteralValue != Value;
5117 }
5118
5119 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5120 !getFeatureBits()[FeatureVOP3Literal]) {
5121 Error(getOperandLoc(Operands, OpIdx),
5122 "literal operands are not supported");
5123 return false;
5124 }
5125
5126 if (LiteralOpIdx && IsAnotherLiteral) {
5127 Error(getLaterLoc(getOperandLoc(Operands, OpIdx),
5128 getOperandLoc(Operands, *LiteralOpIdx)),
5129 "only one unique literal operand is allowed");
5130 return false;
5131 }
5132
5133 if (IsAnotherLiteral)
5134 LiteralOpIdx = OpIdx;
5135 }
5136
5137 return true;
5138}
5139
5140// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5141static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5142 const MCRegisterInfo *MRI) {
5143 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5144 if (OpIdx < 0)
5145 return -1;
5146
5147 const MCOperand &Op = Inst.getOperand(OpIdx);
5148 if (!Op.isReg())
5149 return -1;
5150
5151 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5152 auto Reg = Sub ? Sub : Op.getReg();
5153 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5154 return AGPR32.contains(Reg) ? 1 : 0;
5155}
5156
5157bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5158 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5159 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5161 SIInstrFlags::DS)) == 0)
5162 return true;
5163
5164 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5165 ? AMDGPU::OpName::data0
5166 : AMDGPU::OpName::vdata;
5167
5168 const MCRegisterInfo *MRI = getMRI();
5169 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5170 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5171
5172 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5173 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5174 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5175 return false;
5176 }
5177
5178 auto FB = getFeatureBits();
5179 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5180 if (DataAreg < 0 || DstAreg < 0)
5181 return true;
5182 return DstAreg == DataAreg;
5183 }
5184
5185 return DstAreg < 1 && DataAreg < 1;
5186}
5187
5188bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5189 auto FB = getFeatureBits();
5190 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5191 return true;
5192
5193 unsigned Opc = Inst.getOpcode();
5194 const MCRegisterInfo *MRI = getMRI();
5195 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5196 // unaligned VGPR. All others only allow even aligned VGPRs.
5197 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5198 return true;
5199
5200 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5201 switch (Opc) {
5202 default:
5203 break;
5204 case AMDGPU::DS_LOAD_TR6_B96:
5205 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5206 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5207 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5208 return true;
5209 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5210 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5211 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5212 // allows unaligned VGPR for vdst, but other operands still only allow
5213 // even aligned VGPRs.
5214 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
5215 if (VAddrIdx != -1) {
5216 const MCOperand &Op = Inst.getOperand(VAddrIdx);
5217 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5218 if ((Sub - AMDGPU::VGPR0) & 1)
5219 return false;
5220 }
5221 return true;
5222 }
5223 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5224 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5225 return true;
5226 }
5227 }
5228
5229 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5230 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5231 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5232 const MCOperand &Op = Inst.getOperand(I);
5233 if (!Op.isReg())
5234 continue;
5235
5236 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5237 if (!Sub)
5238 continue;
5239
5240 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5241 return false;
5242 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5243 return false;
5244 }
5245
5246 return true;
5247}
5248
5249SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5250 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5251 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5252 if (Op.isBLGP())
5253 return Op.getStartLoc();
5254 }
5255 return SMLoc();
5256}
5257
5258bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5259 const OperandVector &Operands) {
5260 unsigned Opc = Inst.getOpcode();
5261 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5262 if (BlgpIdx == -1)
5263 return true;
5264 SMLoc BLGPLoc = getBLGPLoc(Operands);
5265 if (!BLGPLoc.isValid())
5266 return true;
5267 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5268 auto FB = getFeatureBits();
5269 bool UsesNeg = false;
5270 if (FB[AMDGPU::FeatureGFX940Insts]) {
5271 switch (Opc) {
5272 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5273 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5274 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5275 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5276 UsesNeg = true;
5277 }
5278 }
5279
5280 if (IsNeg == UsesNeg)
5281 return true;
5282
5283 Error(BLGPLoc,
5284 UsesNeg ? "invalid modifier: blgp is not supported"
5285 : "invalid modifier: neg is not supported");
5286
5287 return false;
5288}
5289
5290bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5291 const OperandVector &Operands) {
5292 if (!isGFX11Plus())
5293 return true;
5294
5295 unsigned Opc = Inst.getOpcode();
5296 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5297 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5298 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5299 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5300 return true;
5301
5302 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5303 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5304 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5305 if (Reg == AMDGPU::SGPR_NULL)
5306 return true;
5307
5308 Error(getOperandLoc(Operands, Src0Idx), "src0 must be null");
5309 return false;
5310}
5311
5312bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5313 const OperandVector &Operands) {
5314 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5315 if ((TSFlags & SIInstrFlags::DS) == 0)
5316 return true;
5317 if (TSFlags & SIInstrFlags::GWS)
5318 return validateGWS(Inst, Operands);
5319 // Only validate GDS for non-GWS instructions.
5320 if (hasGDS())
5321 return true;
5322 int GDSIdx =
5323 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5324 if (GDSIdx < 0)
5325 return true;
5326 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5327 if (GDS) {
5328 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5329 Error(S, "gds modifier is not supported on this GPU");
5330 return false;
5331 }
5332 return true;
5333}
5334
5335// gfx90a has an undocumented limitation:
5336// DS_GWS opcodes must use even aligned registers.
5337bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5338 const OperandVector &Operands) {
5339 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5340 return true;
5341
5342 int Opc = Inst.getOpcode();
5343 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5344 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5345 return true;
5346
5347 const MCRegisterInfo *MRI = getMRI();
5348 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5349 int Data0Pos =
5350 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5351 assert(Data0Pos != -1);
5352 auto Reg = Inst.getOperand(Data0Pos).getReg();
5353 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5354 if (RegIdx & 1) {
5355 Error(getOperandLoc(Operands, Data0Pos), "vgpr must be even aligned");
5356 return false;
5357 }
5358
5359 return true;
5360}
5361
5362bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5363 const OperandVector &Operands,
5364 SMLoc IDLoc) {
5365 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5366 AMDGPU::OpName::cpol);
5367 if (CPolPos == -1)
5368 return true;
5369
5370 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5371
5372 if (!isGFX1250()) {
5373 if (CPol & CPol::SCAL) {
5374 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5375 StringRef CStr(S.getPointer());
5376 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5377 Error(S, "scale_offset is not supported on this GPU");
5378 }
5379 if (CPol & CPol::NV) {
5380 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5381 StringRef CStr(S.getPointer());
5382 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5383 Error(S, "nv is not supported on this GPU");
5384 }
5385 }
5386
5387 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
5388 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5389 StringRef CStr(S.getPointer());
5390 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5391 Error(S, "scale_offset is not supported for this instruction");
5392 }
5393
5394 if (isGFX12Plus())
5395 return validateTHAndScopeBits(Inst, Operands, CPol);
5396
5397 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5398 if (TSFlags & SIInstrFlags::SMRD) {
5399 if (CPol && (isSI() || isCI())) {
5400 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5401 Error(S, "cache policy is not supported for SMRD instructions");
5402 return false;
5403 }
5404 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5405 Error(IDLoc, "invalid cache policy for SMEM instruction");
5406 return false;
5407 }
5408 }
5409
5410 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5411 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5414 if (!(TSFlags & AllowSCCModifier)) {
5415 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5416 StringRef CStr(S.getPointer());
5417 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5418 Error(S,
5419 "scc modifier is not supported for this instruction on this GPU");
5420 return false;
5421 }
5422 }
5423
5425 return true;
5426
5427 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5428 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5429 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5430 : "instruction must use glc");
5431 return false;
5432 }
5433 } else {
5434 if (CPol & CPol::GLC) {
5435 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5436 StringRef CStr(S.getPointer());
5438 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5439 Error(S, isGFX940() ? "instruction must not use sc0"
5440 : "instruction must not use glc");
5441 return false;
5442 }
5443 }
5444
5445 return true;
5446}
5447
5448bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5449 const OperandVector &Operands,
5450 const unsigned CPol) {
5451 const unsigned TH = CPol & AMDGPU::CPol::TH;
5452 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5453
5454 const unsigned Opcode = Inst.getOpcode();
5455 const MCInstrDesc &TID = MII.get(Opcode);
5456
5457 auto PrintError = [&](StringRef Msg) {
5458 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5459 Error(S, Msg);
5460 return false;
5461 };
5462
5463 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5466 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5467
5468 if (TH == 0)
5469 return true;
5470
5471 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5472 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5473 (TH == AMDGPU::CPol::TH_NT_HT)))
5474 return PrintError("invalid th value for SMEM instruction");
5475
5476 if (TH == AMDGPU::CPol::TH_BYPASS) {
5477 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5479 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5481 return PrintError("scope and th combination is not valid");
5482 }
5483
5484 unsigned THType = AMDGPU::getTemporalHintType(TID);
5485 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5486 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5487 return PrintError("invalid th value for atomic instructions");
5488 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5489 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5490 return PrintError("invalid th value for store instructions");
5491 } else {
5492 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5493 return PrintError("invalid th value for load instructions");
5494 }
5495
5496 return true;
5497}
5498
5499bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5500 const OperandVector &Operands) {
5501 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5502 if (Desc.mayStore() &&
5504 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5505 if (Loc != getInstLoc(Operands)) {
5506 Error(Loc, "TFE modifier has no meaning for store instructions");
5507 return false;
5508 }
5509 }
5510
5511 return true;
5512}
5513
5514bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5515 const OperandVector &Operands) {
5516 unsigned Opc = Inst.getOpcode();
5517 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5518 const MCInstrDesc &Desc = MII.get(Opc);
5519
5520 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) -> bool {
5521 int FmtIdx = AMDGPU::getNamedOperandIdx(Opc, FmtOp);
5522 if (FmtIdx == -1)
5523 return true;
5524 unsigned Fmt = Inst.getOperand(FmtIdx).getImm();
5525 int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
5526 unsigned RegSize =
5527 TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[SrcIdx], HwMode))
5528 .getSizeInBits();
5529
5531 return true;
5532
5533 static const char *FmtNames[] = {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
5534 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
5535 "MATRIX_FMT_FP4"};
5536
5537 Error(getOperandLoc(Operands, SrcIdx),
5538 "wrong register tuple size for " + Twine(FmtNames[Fmt]));
5539 return false;
5540 };
5541
5542 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5543 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5544}
5545
5546bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc,
5547 const OperandVector &Operands) {
5548 if (!validateLdsDirect(Inst, Operands))
5549 return false;
5550 if (!validateTrue16OpSel(Inst)) {
5551 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5552 "op_sel operand conflicts with 16-bit operand suffix");
5553 return false;
5554 }
5555 if (!validateSOPLiteral(Inst, Operands))
5556 return false;
5557 if (!validateVOPLiteral(Inst, Operands)) {
5558 return false;
5559 }
5560 if (!validateConstantBusLimitations(Inst, Operands)) {
5561 return false;
5562 }
5563 if (!validateVOPD(Inst, Operands)) {
5564 return false;
5565 }
5566 if (!validateIntClampSupported(Inst)) {
5567 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5568 "integer clamping is not supported on this GPU");
5569 return false;
5570 }
5571 if (!validateOpSel(Inst)) {
5572 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5573 "invalid op_sel operand");
5574 return false;
5575 }
5576 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5577 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5578 "invalid neg_lo operand");
5579 return false;
5580 }
5581 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5582 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5583 "invalid neg_hi operand");
5584 return false;
5585 }
5586 if (!validateDPP(Inst, Operands)) {
5587 return false;
5588 }
5589 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5590 if (!validateMIMGD16(Inst)) {
5591 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5592 "d16 modifier is not supported on this GPU");
5593 return false;
5594 }
5595 if (!validateMIMGDim(Inst, Operands)) {
5596 Error(IDLoc, "missing dim operand");
5597 return false;
5598 }
5599 if (!validateTensorR128(Inst)) {
5600 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5601 "instruction must set modifier r128=0");
5602 return false;
5603 }
5604 if (!validateMIMGMSAA(Inst)) {
5605 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5606 "invalid dim; must be MSAA type");
5607 return false;
5608 }
5609 if (!validateMIMGDataSize(Inst, IDLoc)) {
5610 return false;
5611 }
5612 if (!validateMIMGAddrSize(Inst, IDLoc))
5613 return false;
5614 if (!validateMIMGAtomicDMask(Inst)) {
5615 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5616 "invalid atomic image dmask");
5617 return false;
5618 }
5619 if (!validateMIMGGatherDMask(Inst)) {
5620 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5621 "invalid image_gather dmask: only one bit must be set");
5622 return false;
5623 }
5624 if (!validateMovrels(Inst, Operands)) {
5625 return false;
5626 }
5627 if (!validateOffset(Inst, Operands)) {
5628 return false;
5629 }
5630 if (!validateMAIAccWrite(Inst, Operands)) {
5631 return false;
5632 }
5633 if (!validateMAISrc2(Inst, Operands)) {
5634 return false;
5635 }
5636 if (!validateMFMA(Inst, Operands)) {
5637 return false;
5638 }
5639 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5640 return false;
5641 }
5642
5643 if (!validateAGPRLdSt(Inst)) {
5644 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5645 ? "invalid register class: data and dst should be all VGPR or AGPR"
5646 : "invalid register class: agpr loads and stores not supported on this GPU"
5647 );
5648 return false;
5649 }
5650 if (!validateVGPRAlign(Inst)) {
5651 Error(IDLoc,
5652 "invalid register class: vgpr tuples must be 64 bit aligned");
5653 return false;
5654 }
5655 if (!validateDS(Inst, Operands)) {
5656 return false;
5657 }
5658
5659 if (!validateBLGP(Inst, Operands)) {
5660 return false;
5661 }
5662
5663 if (!validateDivScale(Inst)) {
5664 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5665 return false;
5666 }
5667 if (!validateWaitCnt(Inst, Operands)) {
5668 return false;
5669 }
5670 if (!validateTFE(Inst, Operands)) {
5671 return false;
5672 }
5673 if (!validateWMMA(Inst, Operands)) {
5674 return false;
5675 }
5676
5677 return true;
5678}
5679
5681 const FeatureBitset &FBS,
5682 unsigned VariantID = 0);
5683
5684static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5685 const FeatureBitset &AvailableFeatures,
5686 unsigned VariantID);
5687
5688bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5689 const FeatureBitset &FBS) {
5690 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5691}
5692
5693bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5694 const FeatureBitset &FBS,
5695 ArrayRef<unsigned> Variants) {
5696 for (auto Variant : Variants) {
5697 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5698 return true;
5699 }
5700
5701 return false;
5702}
5703
5704bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5705 SMLoc IDLoc) {
5706 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5707
5708 // Check if requested instruction variant is supported.
5709 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5710 return false;
5711
5712 // This instruction is not supported.
5713 // Clear any other pending errors because they are no longer relevant.
5714 getParser().clearPendingErrors();
5715
5716 // Requested instruction variant is not supported.
5717 // Check if any other variants are supported.
5718 StringRef VariantName = getMatchedVariantName();
5719 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5720 return Error(IDLoc,
5721 Twine(VariantName,
5722 " variant of this instruction is not supported"));
5723 }
5724
5725 // Check if this instruction may be used with a different wavesize.
5726 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5727 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5728 // FIXME: Use getAvailableFeatures, and do not manually recompute
5729 FeatureBitset FeaturesWS32 = getFeatureBits();
5730 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5731 .flip(AMDGPU::FeatureWavefrontSize32);
5732 FeatureBitset AvailableFeaturesWS32 =
5733 ComputeAvailableFeatures(FeaturesWS32);
5734
5735 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5736 return Error(IDLoc, "instruction requires wavesize=32");
5737 }
5738
5739 // Finally check if this instruction is supported on any other GPU.
5740 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5741 return Error(IDLoc, "instruction not supported on this GPU");
5742 }
5743
5744 // Instruction not supported on any GPU. Probably a typo.
5745 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5746 return Error(IDLoc, "invalid instruction" + Suggestion);
5747}
5748
5749static bool isInvalidVOPDY(const OperandVector &Operands,
5750 uint64_t InvalidOprIdx) {
5751 assert(InvalidOprIdx < Operands.size());
5752 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5753 if (Op.isToken() && InvalidOprIdx > 1) {
5754 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5755 return PrevOp.isToken() && PrevOp.getToken() == "::";
5756 }
5757 return false;
5758}
5759
5760bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5761 OperandVector &Operands,
5762 MCStreamer &Out,
5763 uint64_t &ErrorInfo,
5764 bool MatchingInlineAsm) {
5765 MCInst Inst;
5766 Inst.setLoc(IDLoc);
5767 unsigned Result = Match_Success;
5768 for (auto Variant : getMatchedVariants()) {
5769 uint64_t EI;
5770 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5771 Variant);
5772 // We order match statuses from least to most specific. We use most specific
5773 // status as resulting
5774 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5775 if (R == Match_Success || R == Match_MissingFeature ||
5776 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5777 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5778 Result != Match_MissingFeature)) {
5779 Result = R;
5780 ErrorInfo = EI;
5781 }
5782 if (R == Match_Success)
5783 break;
5784 }
5785
5786 if (Result == Match_Success) {
5787 if (!validateInstruction(Inst, IDLoc, Operands)) {
5788 return true;
5789 }
5790 Out.emitInstruction(Inst, getSTI());
5791 return false;
5792 }
5793
5794 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5795 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5796 return true;
5797 }
5798
5799 switch (Result) {
5800 default: break;
5801 case Match_MissingFeature:
5802 // It has been verified that the specified instruction
5803 // mnemonic is valid. A match was found but it requires
5804 // features which are not supported on this GPU.
5805 return Error(IDLoc, "operands are not valid for this GPU or mode");
5806
5807 case Match_InvalidOperand: {
5808 SMLoc ErrorLoc = IDLoc;
5809 if (ErrorInfo != ~0ULL) {
5810 if (ErrorInfo >= Operands.size()) {
5811 return Error(IDLoc, "too few operands for instruction");
5812 }
5813 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5814 if (ErrorLoc == SMLoc())
5815 ErrorLoc = IDLoc;
5816
5817 if (isInvalidVOPDY(Operands, ErrorInfo))
5818 return Error(ErrorLoc, "invalid VOPDY instruction");
5819 }
5820 return Error(ErrorLoc, "invalid operand for instruction");
5821 }
5822
5823 case Match_MnemonicFail:
5824 llvm_unreachable("Invalid instructions should have been handled already");
5825 }
5826 llvm_unreachable("Implement any new match types added!");
5827}
5828
5829bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5830 int64_t Tmp = -1;
5831 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5832 return true;
5833 }
5834 if (getParser().parseAbsoluteExpression(Tmp)) {
5835 return true;
5836 }
5837 Ret = static_cast<uint32_t>(Tmp);
5838 return false;
5839}
5840
5841bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5842 if (!getSTI().getTargetTriple().isAMDGCN())
5843 return TokError("directive only supported for amdgcn architecture");
5844
5845 std::string TargetIDDirective;
5846 SMLoc TargetStart = getTok().getLoc();
5847 if (getParser().parseEscapedString(TargetIDDirective))
5848 return true;
5849
5850 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5851 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5852 return getParser().Error(TargetRange.Start,
5853 (Twine(".amdgcn_target directive's target id ") +
5854 Twine(TargetIDDirective) +
5855 Twine(" does not match the specified target id ") +
5856 Twine(getTargetStreamer().getTargetID()->toString())).str());
5857
5858 return false;
5859}
5860
5861bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5862 return Error(Range.Start, "value out of range", Range);
5863}
5864
5865bool AMDGPUAsmParser::calculateGPRBlocks(
5866 const FeatureBitset &Features, const MCExpr *VCCUsed,
5867 const MCExpr *FlatScrUsed, bool XNACKUsed,
5868 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5869 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5870 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5871 // TODO(scott.linder): These calculations are duplicated from
5872 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5873 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5874 MCContext &Ctx = getContext();
5875
5876 const MCExpr *NumSGPRs = NextFreeSGPR;
5877 int64_t EvaluatedSGPRs;
5878
5879 if (Version.Major >= 10)
5881 else {
5882 unsigned MaxAddressableNumSGPRs =
5884
5885 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5886 !Features.test(FeatureSGPRInitBug) &&
5887 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5888 return OutOfRangeError(SGPRRange);
5889
5890 const MCExpr *ExtraSGPRs =
5891 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5892 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5893
5894 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5895 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5896 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5897 return OutOfRangeError(SGPRRange);
5898
5899 if (Features.test(FeatureSGPRInitBug))
5900 NumSGPRs =
5902 }
5903
5904 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5905 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5906 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5907 unsigned Granule) -> const MCExpr * {
5908 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5909 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5910 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5911 const MCExpr *AlignToGPR =
5912 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5913 const MCExpr *DivGPR =
5914 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5915 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5916 return SubGPR;
5917 };
5918
5919 VGPRBlocks = GetNumGPRBlocks(
5920 NextFreeVGPR,
5921 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5922 SGPRBlocks =
5923 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5924
5925 return false;
5926}
5927
5928bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5929 if (!getSTI().getTargetTriple().isAMDGCN())
5930 return TokError("directive only supported for amdgcn architecture");
5931
5932 if (!isHsaAbi(getSTI()))
5933 return TokError("directive only supported for amdhsa OS");
5934
5935 StringRef KernelName;
5936 if (getParser().parseIdentifier(KernelName))
5937 return true;
5938
5939 AMDGPU::MCKernelDescriptor KD =
5941 &getSTI(), getContext());
5942
5943 StringSet<> Seen;
5944
5945 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5946
5947 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5948 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5949
5950 SMRange VGPRRange;
5951 const MCExpr *NextFreeVGPR = ZeroExpr;
5952 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5953 const MCExpr *NamedBarCnt = ZeroExpr;
5954 uint64_t SharedVGPRCount = 0;
5955 uint64_t PreloadLength = 0;
5956 uint64_t PreloadOffset = 0;
5957 SMRange SGPRRange;
5958 const MCExpr *NextFreeSGPR = ZeroExpr;
5959
5960 // Count the number of user SGPRs implied from the enabled feature bits.
5961 unsigned ImpliedUserSGPRCount = 0;
5962
5963 // Track if the asm explicitly contains the directive for the user SGPR
5964 // count.
5965 std::optional<unsigned> ExplicitUserSGPRCount;
5966 const MCExpr *ReserveVCC = OneExpr;
5967 const MCExpr *ReserveFlatScr = OneExpr;
5968 std::optional<bool> EnableWavefrontSize32;
5969
5970 while (true) {
5971 while (trySkipToken(AsmToken::EndOfStatement));
5972
5973 StringRef ID;
5974 SMRange IDRange = getTok().getLocRange();
5975 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5976 return true;
5977
5978 if (ID == ".end_amdhsa_kernel")
5979 break;
5980
5981 if (!Seen.insert(ID).second)
5982 return TokError(".amdhsa_ directives cannot be repeated");
5983
5984 SMLoc ValStart = getLoc();
5985 const MCExpr *ExprVal;
5986 if (getParser().parseExpression(ExprVal))
5987 return true;
5988 SMLoc ValEnd = getLoc();
5989 SMRange ValRange = SMRange(ValStart, ValEnd);
5990
5991 int64_t IVal = 0;
5992 uint64_t Val = IVal;
5993 bool EvaluatableExpr;
5994 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5995 if (IVal < 0)
5996 return OutOfRangeError(ValRange);
5997 Val = IVal;
5998 }
5999
6000#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6001 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6002 return OutOfRangeError(RANGE); \
6003 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6004 getContext());
6005
6006// Some fields use the parsed value immediately which requires the expression to
6007// be solvable.
6008#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6009 if (!(RESOLVED)) \
6010 return Error(IDRange.Start, "directive should have resolvable expression", \
6011 IDRange);
6012
6013 if (ID == ".amdhsa_group_segment_fixed_size") {
6015 CHAR_BIT>(Val))
6016 return OutOfRangeError(ValRange);
6017 KD.group_segment_fixed_size = ExprVal;
6018 } else if (ID == ".amdhsa_private_segment_fixed_size") {
6020 CHAR_BIT>(Val))
6021 return OutOfRangeError(ValRange);
6022 KD.private_segment_fixed_size = ExprVal;
6023 } else if (ID == ".amdhsa_kernarg_size") {
6024 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
6025 return OutOfRangeError(ValRange);
6026 KD.kernarg_size = ExprVal;
6027 } else if (ID == ".amdhsa_user_sgpr_count") {
6028 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6029 ExplicitUserSGPRCount = Val;
6030 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6031 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6033 return Error(IDRange.Start,
6034 "directive is not supported with architected flat scratch",
6035 IDRange);
6037 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6038 ExprVal, ValRange);
6039 if (Val)
6040 ImpliedUserSGPRCount += 4;
6041 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6042 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6043 if (!hasKernargPreload())
6044 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6045
6046 if (Val > getMaxNumUserSGPRs())
6047 return OutOfRangeError(ValRange);
6048 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6049 ValRange);
6050 if (Val) {
6051 ImpliedUserSGPRCount += Val;
6052 PreloadLength = Val;
6053 }
6054 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6055 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6056 if (!hasKernargPreload())
6057 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6058
6059 if (Val >= 1024)
6060 return OutOfRangeError(ValRange);
6061 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6062 ValRange);
6063 if (Val)
6064 PreloadOffset = Val;
6065 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6066 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6068 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6069 ValRange);
6070 if (Val)
6071 ImpliedUserSGPRCount += 2;
6072 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6073 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6075 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6076 ValRange);
6077 if (Val)
6078 ImpliedUserSGPRCount += 2;
6079 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6080 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6082 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6083 ExprVal, ValRange);
6084 if (Val)
6085 ImpliedUserSGPRCount += 2;
6086 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6087 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6089 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6090 ValRange);
6091 if (Val)
6092 ImpliedUserSGPRCount += 2;
6093 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6095 return Error(IDRange.Start,
6096 "directive is not supported with architected flat scratch",
6097 IDRange);
6098 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6100 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6101 ExprVal, ValRange);
6102 if (Val)
6103 ImpliedUserSGPRCount += 2;
6104 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6105 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6107 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6108 ExprVal, ValRange);
6109 if (Val)
6110 ImpliedUserSGPRCount += 1;
6111 } else if (ID == ".amdhsa_wavefront_size32") {
6112 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6113 if (IVersion.Major < 10)
6114 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6115 EnableWavefrontSize32 = Val;
6117 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6118 ValRange);
6119 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6121 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6122 ValRange);
6123 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6125 return Error(IDRange.Start,
6126 "directive is not supported with architected flat scratch",
6127 IDRange);
6129 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6130 ValRange);
6131 } else if (ID == ".amdhsa_enable_private_segment") {
6133 return Error(
6134 IDRange.Start,
6135 "directive is not supported without architected flat scratch",
6136 IDRange);
6138 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6139 ValRange);
6140 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6142 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6143 ValRange);
6144 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6146 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6147 ValRange);
6148 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6150 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6151 ValRange);
6152 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6154 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6155 ValRange);
6156 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6158 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6159 ValRange);
6160 } else if (ID == ".amdhsa_next_free_vgpr") {
6161 VGPRRange = ValRange;
6162 NextFreeVGPR = ExprVal;
6163 } else if (ID == ".amdhsa_next_free_sgpr") {
6164 SGPRRange = ValRange;
6165 NextFreeSGPR = ExprVal;
6166 } else if (ID == ".amdhsa_accum_offset") {
6167 if (!isGFX90A())
6168 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6169 AccumOffset = ExprVal;
6170 } else if (ID == ".amdhsa_named_barrier_count") {
6171 if (!isGFX1250())
6172 return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
6173 NamedBarCnt = ExprVal;
6174 } else if (ID == ".amdhsa_reserve_vcc") {
6175 if (EvaluatableExpr && !isUInt<1>(Val))
6176 return OutOfRangeError(ValRange);
6177 ReserveVCC = ExprVal;
6178 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6179 if (IVersion.Major < 7)
6180 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6182 return Error(IDRange.Start,
6183 "directive is not supported with architected flat scratch",
6184 IDRange);
6185 if (EvaluatableExpr && !isUInt<1>(Val))
6186 return OutOfRangeError(ValRange);
6187 ReserveFlatScr = ExprVal;
6188 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6189 if (IVersion.Major < 8)
6190 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6191 if (!isUInt<1>(Val))
6192 return OutOfRangeError(ValRange);
6193 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6194 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6195 IDRange);
6196 } else if (ID == ".amdhsa_float_round_mode_32") {
6198 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6199 ValRange);
6200 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6202 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6203 ValRange);
6204 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6206 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6207 ValRange);
6208 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6210 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6211 ValRange);
6212 } else if (ID == ".amdhsa_dx10_clamp") {
6213 if (IVersion.Major >= 12)
6214 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6216 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6217 ValRange);
6218 } else if (ID == ".amdhsa_ieee_mode") {
6219 if (IVersion.Major >= 12)
6220 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6222 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6223 ValRange);
6224 } else if (ID == ".amdhsa_fp16_overflow") {
6225 if (IVersion.Major < 9)
6226 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6228 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6229 ValRange);
6230 } else if (ID == ".amdhsa_tg_split") {
6231 if (!isGFX90A())
6232 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6233 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6234 ExprVal, ValRange);
6235 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6236 if (!supportsWGP(getSTI()))
6237 return Error(IDRange.Start,
6238 "directive unsupported on " + getSTI().getCPU(), IDRange);
6240 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6241 ValRange);
6242 } else if (ID == ".amdhsa_memory_ordered") {
6243 if (IVersion.Major < 10)
6244 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6246 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6247 ValRange);
6248 } else if (ID == ".amdhsa_forward_progress") {
6249 if (IVersion.Major < 10)
6250 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6252 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6253 ValRange);
6254 } else if (ID == ".amdhsa_shared_vgpr_count") {
6255 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6256 if (IVersion.Major < 10 || IVersion.Major >= 12)
6257 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6258 IDRange);
6259 SharedVGPRCount = Val;
6261 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6262 ValRange);
6263 } else if (ID == ".amdhsa_inst_pref_size") {
6264 if (IVersion.Major < 11)
6265 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6266 if (IVersion.Major == 11) {
6268 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6269 ValRange);
6270 } else {
6272 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6273 ValRange);
6274 }
6275 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6278 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6279 ExprVal, ValRange);
6280 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6282 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6283 ExprVal, ValRange);
6284 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6287 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6288 ExprVal, ValRange);
6289 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6291 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6292 ExprVal, ValRange);
6293 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6295 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6296 ExprVal, ValRange);
6297 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6299 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6300 ExprVal, ValRange);
6301 } else if (ID == ".amdhsa_exception_int_div_zero") {
6303 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6304 ExprVal, ValRange);
6305 } else if (ID == ".amdhsa_round_robin_scheduling") {
6306 if (IVersion.Major < 12)
6307 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6309 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6310 ValRange);
6311 } else {
6312 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6313 }
6314
6315#undef PARSE_BITS_ENTRY
6316 }
6317
6318 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6319 return TokError(".amdhsa_next_free_vgpr directive is required");
6320
6321 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6322 return TokError(".amdhsa_next_free_sgpr directive is required");
6323
6324 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6325
6326 // Consider the case where the total number of UserSGPRs with trailing
6327 // allocated preload SGPRs, is greater than the number of explicitly
6328 // referenced SGPRs.
6329 if (PreloadLength) {
6330 MCContext &Ctx = getContext();
6331 NextFreeSGPR = AMDGPUMCExpr::createMax(
6332 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6333 }
6334
6335 const MCExpr *VGPRBlocks;
6336 const MCExpr *SGPRBlocks;
6337 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6338 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6339 EnableWavefrontSize32, NextFreeVGPR,
6340 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6341 SGPRBlocks))
6342 return true;
6343
6344 int64_t EvaluatedVGPRBlocks;
6345 bool VGPRBlocksEvaluatable =
6346 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6347 if (VGPRBlocksEvaluatable &&
6349 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6350 return OutOfRangeError(VGPRRange);
6351 }
6353 KD.compute_pgm_rsrc1, VGPRBlocks,
6354 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6355 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6356
6357 int64_t EvaluatedSGPRBlocks;
6358 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6360 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6361 return OutOfRangeError(SGPRRange);
6363 KD.compute_pgm_rsrc1, SGPRBlocks,
6364 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6365 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6366
6367 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6368 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
6369 "enabled user SGPRs");
6370
6371 if (isGFX1250()) {
6373 return TokError("too many user SGPRs enabled");
6376 MCConstantExpr::create(UserSGPRCount, getContext()),
6377 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6378 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6379 } else {
6381 UserSGPRCount))
6382 return TokError("too many user SGPRs enabled");
6385 MCConstantExpr::create(UserSGPRCount, getContext()),
6386 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6387 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6388 }
6389
6390 int64_t IVal = 0;
6391 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6392 return TokError("Kernarg size should be resolvable");
6393 uint64_t kernarg_size = IVal;
6394 if (PreloadLength && kernarg_size &&
6395 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6396 return TokError("Kernarg preload length + offset is larger than the "
6397 "kernarg segment size");
6398
6399 if (isGFX90A()) {
6400 if (!Seen.contains(".amdhsa_accum_offset"))
6401 return TokError(".amdhsa_accum_offset directive is required");
6402 int64_t EvaluatedAccum;
6403 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6404 uint64_t UEvaluatedAccum = EvaluatedAccum;
6405 if (AccumEvaluatable &&
6406 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6407 return TokError("accum_offset should be in range [4..256] in "
6408 "increments of 4");
6409
6410 int64_t EvaluatedNumVGPR;
6411 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6412 AccumEvaluatable &&
6413 UEvaluatedAccum >
6414 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6415 return TokError("accum_offset exceeds total VGPR allocation");
6416 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6418 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6421 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6422 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6423 getContext());
6424 }
6425
6426 if (isGFX1250())
6428 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6429 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6430 getContext());
6431
6432 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6433 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6434 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6435 return TokError("shared_vgpr_count directive not valid on "
6436 "wavefront size 32");
6437 }
6438
6439 if (VGPRBlocksEvaluatable &&
6440 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6441 63)) {
6442 return TokError("shared_vgpr_count*2 + "
6443 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6444 "exceed 63\n");
6445 }
6446 }
6447
6448 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6449 NextFreeVGPR, NextFreeSGPR,
6450 ReserveVCC, ReserveFlatScr);
6451 return false;
6452}
6453
6454bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6455 uint32_t Version;
6456 if (ParseAsAbsoluteExpression(Version))
6457 return true;
6458
6459 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6460 return false;
6461}
6462
6463bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6464 AMDGPUMCKernelCodeT &C) {
6465 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6466 // assembly for backwards compatibility.
6467 if (ID == "max_scratch_backing_memory_byte_size") {
6468 Parser.eatToEndOfStatement();
6469 return false;
6470 }
6471
6472 SmallString<40> ErrStr;
6473 raw_svector_ostream Err(ErrStr);
6474 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6475 return TokError(Err.str());
6476 }
6477 Lex();
6478
6479 if (ID == "enable_wavefront_size32") {
6480 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6481 if (!isGFX10Plus())
6482 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6483 if (!isWave32())
6484 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6485 } else {
6486 if (!isWave64())
6487 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6488 }
6489 }
6490
6491 if (ID == "wavefront_size") {
6492 if (C.wavefront_size == 5) {
6493 if (!isGFX10Plus())
6494 return TokError("wavefront_size=5 is only allowed on GFX10+");
6495 if (!isWave32())
6496 return TokError("wavefront_size=5 requires +WavefrontSize32");
6497 } else if (C.wavefront_size == 6) {
6498 if (!isWave64())
6499 return TokError("wavefront_size=6 requires +WavefrontSize64");
6500 }
6501 }
6502
6503 return false;
6504}
6505
6506bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6507 AMDGPUMCKernelCodeT KernelCode;
6508 KernelCode.initDefault(&getSTI(), getContext());
6509
6510 while (true) {
6511 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6512 // will set the current token to EndOfStatement.
6513 while(trySkipToken(AsmToken::EndOfStatement));
6514
6515 StringRef ID;
6516 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6517 return true;
6518
6519 if (ID == ".end_amd_kernel_code_t")
6520 break;
6521
6522 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6523 return true;
6524 }
6525
6526 KernelCode.validate(&getSTI(), getContext());
6527 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6528
6529 return false;
6530}
6531
6532bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6533 StringRef KernelName;
6534 if (!parseId(KernelName, "expected symbol name"))
6535 return true;
6536
6537 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6539
6540 KernelScope.initialize(getContext());
6541 return false;
6542}
6543
6544bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6545 if (!getSTI().getTargetTriple().isAMDGCN()) {
6546 return Error(getLoc(),
6547 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6548 "architectures");
6549 }
6550
6551 auto TargetIDDirective = getLexer().getTok().getStringContents();
6552 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6553 return Error(getParser().getTok().getLoc(), "target id must match options");
6554
6555 getTargetStreamer().EmitISAVersion();
6556 Lex();
6557
6558 return false;
6559}
6560
6561bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6562 assert(isHsaAbi(getSTI()));
6563
6564 std::string HSAMetadataString;
6565 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6566 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6567 return true;
6568
6569 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6570 return Error(getLoc(), "invalid HSA metadata");
6571
6572 return false;
6573}
6574
6575/// Common code to parse out a block of text (typically YAML) between start and
6576/// end directives.
6577bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6578 const char *AssemblerDirectiveEnd,
6579 std::string &CollectString) {
6580
6581 raw_string_ostream CollectStream(CollectString);
6582
6583 getLexer().setSkipSpace(false);
6584
6585 bool FoundEnd = false;
6586 while (!isToken(AsmToken::Eof)) {
6587 while (isToken(AsmToken::Space)) {
6588 CollectStream << getTokenStr();
6589 Lex();
6590 }
6591
6592 if (trySkipId(AssemblerDirectiveEnd)) {
6593 FoundEnd = true;
6594 break;
6595 }
6596
6597 CollectStream << Parser.parseStringToEndOfStatement()
6598 << getContext().getAsmInfo()->getSeparatorString();
6599
6600 Parser.eatToEndOfStatement();
6601 }
6602
6603 getLexer().setSkipSpace(true);
6604
6605 if (isToken(AsmToken::Eof) && !FoundEnd) {
6606 return TokError(Twine("expected directive ") +
6607 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6608 }
6609
6610 return false;
6611}
6612
6613/// Parse the assembler directive for new MsgPack-format PAL metadata.
6614bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6615 std::string String;
6616 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6618 return true;
6619
6620 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6621 if (!PALMetadata->setFromString(String))
6622 return Error(getLoc(), "invalid PAL metadata");
6623 return false;
6624}
6625
6626/// Parse the assembler directive for old linear-format PAL metadata.
6627bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6628 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6629 return Error(getLoc(),
6630 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6631 "not available on non-amdpal OSes")).str());
6632 }
6633
6634 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6635 PALMetadata->setLegacy();
6636 for (;;) {
6637 uint32_t Key, Value;
6638 if (ParseAsAbsoluteExpression(Key)) {
6639 return TokError(Twine("invalid value in ") +
6641 }
6642 if (!trySkipToken(AsmToken::Comma)) {
6643 return TokError(Twine("expected an even number of values in ") +
6645 }
6646 if (ParseAsAbsoluteExpression(Value)) {
6647 return TokError(Twine("invalid value in ") +
6649 }
6650 PALMetadata->setRegister(Key, Value);
6651 if (!trySkipToken(AsmToken::Comma))
6652 break;
6653 }
6654 return false;
6655}
6656
6657/// ParseDirectiveAMDGPULDS
6658/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6659bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6660 if (getParser().checkForValidSection())
6661 return true;
6662
6663 StringRef Name;
6664 SMLoc NameLoc = getLoc();
6665 if (getParser().parseIdentifier(Name))
6666 return TokError("expected identifier in directive");
6667
6668 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6669 if (getParser().parseComma())
6670 return true;
6671
6672 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6673
6674 int64_t Size;
6675 SMLoc SizeLoc = getLoc();
6676 if (getParser().parseAbsoluteExpression(Size))
6677 return true;
6678 if (Size < 0)
6679 return Error(SizeLoc, "size must be non-negative");
6680 if (Size > LocalMemorySize)
6681 return Error(SizeLoc, "size is too large");
6682
6683 int64_t Alignment = 4;
6684 if (trySkipToken(AsmToken::Comma)) {
6685 SMLoc AlignLoc = getLoc();
6686 if (getParser().parseAbsoluteExpression(Alignment))
6687 return true;
6688 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6689 return Error(AlignLoc, "alignment must be a power of two");
6690
6691 // Alignment larger than the size of LDS is possible in theory, as long
6692 // as the linker manages to place to symbol at address 0, but we do want
6693 // to make sure the alignment fits nicely into a 32-bit integer.
6694 if (Alignment >= 1u << 31)
6695 return Error(AlignLoc, "alignment is too large");
6696 }
6697
6698 if (parseEOL())
6699 return true;
6700
6701 Symbol->redefineIfPossible();
6702 if (!Symbol->isUndefined())
6703 return Error(NameLoc, "invalid symbol redefinition");
6704
6705 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6706 return false;
6707}
6708
6709bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6710 StringRef IDVal = DirectiveID.getString();
6711
6712 if (isHsaAbi(getSTI())) {
6713 if (IDVal == ".amdhsa_kernel")
6714 return ParseDirectiveAMDHSAKernel();
6715
6716 if (IDVal == ".amdhsa_code_object_version")
6717 return ParseDirectiveAMDHSACodeObjectVersion();
6718
6719 // TODO: Restructure/combine with PAL metadata directive.
6721 return ParseDirectiveHSAMetadata();
6722 } else {
6723 if (IDVal == ".amd_kernel_code_t")
6724 return ParseDirectiveAMDKernelCodeT();
6725
6726 if (IDVal == ".amdgpu_hsa_kernel")
6727 return ParseDirectiveAMDGPUHsaKernel();
6728
6729 if (IDVal == ".amd_amdgpu_isa")
6730 return ParseDirectiveISAVersion();
6731
6733 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6734 Twine(" directive is "
6735 "not available on non-amdhsa OSes"))
6736 .str());
6737 }
6738 }
6739
6740 if (IDVal == ".amdgcn_target")
6741 return ParseDirectiveAMDGCNTarget();
6742
6743 if (IDVal == ".amdgpu_lds")
6744 return ParseDirectiveAMDGPULDS();
6745
6746 if (IDVal == PALMD::AssemblerDirectiveBegin)
6747 return ParseDirectivePALMetadataBegin();
6748
6749 if (IDVal == PALMD::AssemblerDirective)
6750 return ParseDirectivePALMetadata();
6751
6752 return true;
6753}
6754
6755bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6756 MCRegister Reg) {
6757 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6758 return isGFX9Plus();
6759
6760 // GFX10+ has 2 more SGPRs 104 and 105.
6761 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6762 return hasSGPR104_SGPR105();
6763
6764 switch (Reg.id()) {
6765 case SRC_SHARED_BASE_LO:
6766 case SRC_SHARED_BASE:
6767 case SRC_SHARED_LIMIT_LO:
6768 case SRC_SHARED_LIMIT:
6769 case SRC_PRIVATE_BASE_LO:
6770 case SRC_PRIVATE_BASE:
6771 case SRC_PRIVATE_LIMIT_LO:
6772 case SRC_PRIVATE_LIMIT:
6773 return isGFX9Plus();
6774 case SRC_FLAT_SCRATCH_BASE_LO:
6775 case SRC_FLAT_SCRATCH_BASE_HI:
6776 return hasGloballyAddressableScratch();
6777 case SRC_POPS_EXITING_WAVE_ID:
6778 return isGFX9Plus() && !isGFX11Plus();
6779 case TBA:
6780 case TBA_LO:
6781 case TBA_HI:
6782 case TMA:
6783 case TMA_LO:
6784 case TMA_HI:
6785 return !isGFX9Plus();
6786 case XNACK_MASK:
6787 case XNACK_MASK_LO:
6788 case XNACK_MASK_HI:
6789 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6790 case SGPR_NULL:
6791 return isGFX10Plus();
6792 case SRC_EXECZ:
6793 case SRC_VCCZ:
6794 return !isGFX11Plus();
6795 default:
6796 break;
6797 }
6798
6799 if (isCI())
6800 return true;
6801
6802 if (isSI() || isGFX10Plus()) {
6803 // No flat_scr on SI.
6804 // On GFX10Plus flat scratch is not a valid register operand and can only be
6805 // accessed with s_setreg/s_getreg.
6806 switch (Reg.id()) {
6807 case FLAT_SCR:
6808 case FLAT_SCR_LO:
6809 case FLAT_SCR_HI:
6810 return false;
6811 default:
6812 return true;
6813 }
6814 }
6815
6816 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6817 // SI/CI have.
6818 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
6819 return hasSGPR102_SGPR103();
6820
6821 return true;
6822}
6823
6824ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6825 StringRef Mnemonic,
6826 OperandMode Mode) {
6827 ParseStatus Res = parseVOPD(Operands);
6828 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6829 return Res;
6830
6831 // Try to parse with a custom parser
6832 Res = MatchOperandParserImpl(Operands, Mnemonic);
6833
6834 // If we successfully parsed the operand or if there as an error parsing,
6835 // we are done.
6836 //
6837 // If we are parsing after we reach EndOfStatement then this means we
6838 // are appending default values to the Operands list. This is only done
6839 // by custom parser, so we shouldn't continue on to the generic parsing.
6840 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6841 return Res;
6842
6843 SMLoc RBraceLoc;
6844 SMLoc LBraceLoc = getLoc();
6845 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6846 unsigned Prefix = Operands.size();
6847
6848 for (;;) {
6849 auto Loc = getLoc();
6850 Res = parseReg(Operands);
6851 if (Res.isNoMatch())
6852 Error(Loc, "expected a register");
6853 if (!Res.isSuccess())
6854 return ParseStatus::Failure;
6855
6856 RBraceLoc = getLoc();
6857 if (trySkipToken(AsmToken::RBrac))
6858 break;
6859
6860 if (!skipToken(AsmToken::Comma,
6861 "expected a comma or a closing square bracket"))
6862 return ParseStatus::Failure;
6863 }
6864
6865 if (Operands.size() - Prefix > 1) {
6866 Operands.insert(Operands.begin() + Prefix,
6867 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6868 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6869 }
6870
6871 return ParseStatus::Success;
6872 }
6873
6874 return parseRegOrImm(Operands);
6875}
6876
6877StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6878 // Clear any forced encodings from the previous instruction.
6879 setForcedEncodingSize(0);
6880 setForcedDPP(false);
6881 setForcedSDWA(false);
6882
6883 if (Name.consume_back("_e64_dpp")) {
6884 setForcedDPP(true);
6885 setForcedEncodingSize(64);
6886 return Name;
6887 }
6888 if (Name.consume_back("_e64")) {
6889 setForcedEncodingSize(64);
6890 return Name;
6891 }
6892 if (Name.consume_back("_e32")) {
6893 setForcedEncodingSize(32);
6894 return Name;
6895 }
6896 if (Name.consume_back("_dpp")) {
6897 setForcedDPP(true);
6898 return Name;
6899 }
6900 if (Name.consume_back("_sdwa")) {
6901 setForcedSDWA(true);
6902 return Name;
6903 }
6904 return Name;
6905}
6906
6907static void applyMnemonicAliases(StringRef &Mnemonic,
6908 const FeatureBitset &Features,
6909 unsigned VariantID);
6910
6911bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6912 StringRef Name, SMLoc NameLoc,
6913 OperandVector &Operands) {
6914 // Add the instruction mnemonic
6915 Name = parseMnemonicSuffix(Name);
6916
6917 // If the target architecture uses MnemonicAlias, call it here to parse
6918 // operands correctly.
6919 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6920
6921 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6922
6923 bool IsMIMG = Name.starts_with("image_");
6924
6925 while (!trySkipToken(AsmToken::EndOfStatement)) {
6926 OperandMode Mode = OperandMode_Default;
6927 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6928 Mode = OperandMode_NSA;
6929 ParseStatus Res = parseOperand(Operands, Name, Mode);
6930
6931 if (!Res.isSuccess()) {
6932 checkUnsupportedInstruction(Name, NameLoc);
6933 if (!Parser.hasPendingError()) {
6934 // FIXME: use real operand location rather than the current location.
6935 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6936 : "not a valid operand.";
6937 Error(getLoc(), Msg);
6938 }
6939 while (!trySkipToken(AsmToken::EndOfStatement)) {
6940 lex();
6941 }
6942 return true;
6943 }
6944
6945 // Eat the comma or space if there is one.
6946 trySkipToken(AsmToken::Comma);
6947 }
6948
6949 return false;
6950}
6951
6952//===----------------------------------------------------------------------===//
6953// Utility functions
6954//===----------------------------------------------------------------------===//
6955
6956ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6957 OperandVector &Operands) {
6958 SMLoc S = getLoc();
6959 if (!trySkipId(Name))
6960 return ParseStatus::NoMatch;
6961
6962 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6963 return ParseStatus::Success;
6964}
6965
6966ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6967 int64_t &IntVal) {
6968
6969 if (!trySkipId(Prefix, AsmToken::Colon))
6970 return ParseStatus::NoMatch;
6971
6973}
6974
6975ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6976 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6977 std::function<bool(int64_t &)> ConvertResult) {
6978 SMLoc S = getLoc();
6979 int64_t Value = 0;
6980
6981 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6982 if (!Res.isSuccess())
6983 return Res;
6984
6985 if (ConvertResult && !ConvertResult(Value)) {
6986 Error(S, "invalid " + StringRef(Prefix) + " value.");
6987 }
6988
6989 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6990 return ParseStatus::Success;
6991}
6992
6993ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6994 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6995 bool (*ConvertResult)(int64_t &)) {
6996 SMLoc S = getLoc();
6997 if (!trySkipId(Prefix, AsmToken::Colon))
6998 return ParseStatus::NoMatch;
6999
7000 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
7001 return ParseStatus::Failure;
7002
7003 unsigned Val = 0;
7004 const unsigned MaxSize = 4;
7005
7006 // FIXME: How to verify the number of elements matches the number of src
7007 // operands?
7008 for (int I = 0; ; ++I) {
7009 int64_t Op;
7010 SMLoc Loc = getLoc();
7011 if (!parseExpr(Op))
7012 return ParseStatus::Failure;
7013
7014 if (Op != 0 && Op != 1)
7015 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
7016
7017 Val |= (Op << I);
7018
7019 if (trySkipToken(AsmToken::RBrac))
7020 break;
7021
7022 if (I + 1 == MaxSize)
7023 return Error(getLoc(), "expected a closing square bracket");
7024
7025 if (!skipToken(AsmToken::Comma, "expected a comma"))
7026 return ParseStatus::Failure;
7027 }
7028
7029 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
7030 return ParseStatus::Success;
7031}
7032
7033ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7034 OperandVector &Operands,
7035 AMDGPUOperand::ImmTy ImmTy) {
7036 int64_t Bit;
7037 SMLoc S = getLoc();
7038
7039 if (trySkipId(Name)) {
7040 Bit = 1;
7041 } else if (trySkipId("no", Name)) {
7042 Bit = 0;
7043 } else {
7044 return ParseStatus::NoMatch;
7045 }
7046
7047 if (Name == "r128" && !hasMIMG_R128())
7048 return Error(S, "r128 modifier is not supported on this GPU");
7049 if (Name == "a16" && !hasA16())
7050 return Error(S, "a16 modifier is not supported on this GPU");
7051
7052 if (Bit == 0 && Name == "gds") {
7053 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7054 if (Mnemo.starts_with("ds_gws"))
7055 return Error(S, "nogds is not allowed");
7056 }
7057
7058 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7059 ImmTy = AMDGPUOperand::ImmTyR128A16;
7060
7061 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
7062 return ParseStatus::Success;
7063}
7064
7065unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7066 bool &Disabling) const {
7067 Disabling = Id.consume_front("no");
7068
7069 if (isGFX940() && !Mnemo.starts_with("s_")) {
7070 return StringSwitch<unsigned>(Id)
7071 .Case("nt", AMDGPU::CPol::NT)
7072 .Case("sc0", AMDGPU::CPol::SC0)
7073 .Case("sc1", AMDGPU::CPol::SC1)
7074 .Default(0);
7075 }
7076
7077 return StringSwitch<unsigned>(Id)
7078 .Case("dlc", AMDGPU::CPol::DLC)
7079 .Case("glc", AMDGPU::CPol::GLC)
7080 .Case("scc", AMDGPU::CPol::SCC)
7081 .Case("slc", AMDGPU::CPol::SLC)
7082 .Default(0);
7083}
7084
7085ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7086 if (isGFX12Plus()) {
7087 SMLoc StringLoc = getLoc();
7088
7089 int64_t CPolVal = 0;
7090 ParseStatus ResTH = ParseStatus::NoMatch;
7091 ParseStatus ResScope = ParseStatus::NoMatch;
7092 ParseStatus ResNV = ParseStatus::NoMatch;
7093 ParseStatus ResScal = ParseStatus::NoMatch;
7094
7095 for (;;) {
7096 if (ResTH.isNoMatch()) {
7097 int64_t TH;
7098 ResTH = parseTH(Operands, TH);
7099 if (ResTH.isFailure())
7100 return ResTH;
7101 if (ResTH.isSuccess()) {
7102 CPolVal |= TH;
7103 continue;
7104 }
7105 }
7106
7107 if (ResScope.isNoMatch()) {
7108 int64_t Scope;
7109 ResScope = parseScope(Operands, Scope);
7110 if (ResScope.isFailure())
7111 return ResScope;
7112 if (ResScope.isSuccess()) {
7113 CPolVal |= Scope;
7114 continue;
7115 }
7116 }
7117
7118 // NV bit exists on GFX12+, but does something starting from GFX1250.
7119 // Allow parsing on all GFX12 and fail on validation for better
7120 // diagnostics.
7121 if (ResNV.isNoMatch()) {
7122 if (trySkipId("nv")) {
7123 ResNV = ParseStatus::Success;
7124 CPolVal |= CPol::NV;
7125 continue;
7126 } else if (trySkipId("no", "nv")) {
7127 ResNV = ParseStatus::Success;
7128 continue;
7129 }
7130 }
7131
7132 if (ResScal.isNoMatch()) {
7133 if (trySkipId("scale_offset")) {
7134 ResScal = ParseStatus::Success;
7135 CPolVal |= CPol::SCAL;
7136 continue;
7137 } else if (trySkipId("no", "scale_offset")) {
7138 ResScal = ParseStatus::Success;
7139 continue;
7140 }
7141 }
7142
7143 break;
7144 }
7145
7146 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7147 ResScal.isNoMatch())
7148 return ParseStatus::NoMatch;
7149
7150 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
7151 AMDGPUOperand::ImmTyCPol));
7152 return ParseStatus::Success;
7153 }
7154
7155 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7156 SMLoc OpLoc = getLoc();
7157 unsigned Enabled = 0, Seen = 0;
7158 for (;;) {
7159 SMLoc S = getLoc();
7160 bool Disabling;
7161 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7162 if (!CPol)
7163 break;
7164
7165 lex();
7166
7167 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7168 return Error(S, "dlc modifier is not supported on this GPU");
7169
7170 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7171 return Error(S, "scc modifier is not supported on this GPU");
7172
7173 if (Seen & CPol)
7174 return Error(S, "duplicate cache policy modifier");
7175
7176 if (!Disabling)
7177 Enabled |= CPol;
7178
7179 Seen |= CPol;
7180 }
7181
7182 if (!Seen)
7183 return ParseStatus::NoMatch;
7184
7185 Operands.push_back(
7186 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7187 return ParseStatus::Success;
7188}
7189
7190ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7191 int64_t &Scope) {
7192 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7194
7195 ParseStatus Res = parseStringOrIntWithPrefix(
7196 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7197 Scope);
7198
7199 if (Res.isSuccess())
7200 Scope = Scopes[Scope];
7201
7202 return Res;
7203}
7204
7205ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7206 TH = AMDGPU::CPol::TH_RT; // default
7207
7208 StringRef Value;
7209 SMLoc StringLoc;
7210 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7211 if (!Res.isSuccess())
7212 return Res;
7213
7214 if (Value == "TH_DEFAULT")
7216 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7217 Value == "TH_LOAD_NT_WB") {
7218 return Error(StringLoc, "invalid th value");
7219 } else if (Value.consume_front("TH_ATOMIC_")) {
7221 } else if (Value.consume_front("TH_LOAD_")) {
7223 } else if (Value.consume_front("TH_STORE_")) {
7225 } else {
7226 return Error(StringLoc, "invalid th value");
7227 }
7228
7229 if (Value == "BYPASS")
7231
7232 if (TH != 0) {
7234 TH |= StringSwitch<int64_t>(Value)
7235 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7236 .Case("RT", AMDGPU::CPol::TH_RT)
7237 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7238 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7239 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7241 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7242 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7244 .Default(0xffffffff);
7245 else
7246 TH |= StringSwitch<int64_t>(Value)
7247 .Case("RT", AMDGPU::CPol::TH_RT)
7248 .Case("NT", AMDGPU::CPol::TH_NT)
7249 .Case("HT", AMDGPU::CPol::TH_HT)
7250 .Case("LU", AMDGPU::CPol::TH_LU)
7251 .Case("WB", AMDGPU::CPol::TH_WB)
7252 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7253 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7254 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7255 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7256 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7257 .Default(0xffffffff);
7258 }
7259
7260 if (TH == 0xffffffff)
7261 return Error(StringLoc, "invalid th value");
7262
7263 return ParseStatus::Success;
7264}
7265
7266static void
7268 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7269 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7270 std::optional<unsigned> InsertAt = std::nullopt) {
7271 auto i = OptionalIdx.find(ImmT);
7272 if (i != OptionalIdx.end()) {
7273 unsigned Idx = i->second;
7274 const AMDGPUOperand &Op =
7275 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7276 if (InsertAt)
7277 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7278 else
7279 Op.addImmOperands(Inst, 1);
7280 } else {
7281 if (InsertAt.has_value())
7282 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7283 else
7285 }
7286}
7287
7288ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7289 StringRef &Value,
7290 SMLoc &StringLoc) {
7291 if (!trySkipId(Prefix, AsmToken::Colon))
7292 return ParseStatus::NoMatch;
7293
7294 StringLoc = getLoc();
7295 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7297}
7298
7299ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7300 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7301 int64_t &IntVal) {
7302 if (!trySkipId(Name, AsmToken::Colon))
7303 return ParseStatus::NoMatch;
7304
7305 SMLoc StringLoc = getLoc();
7306
7307 StringRef Value;
7308 if (isToken(AsmToken::Identifier)) {
7309 Value = getTokenStr();
7310 lex();
7311
7312 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7313 if (Value == Ids[IntVal])
7314 break;
7315 } else if (!parseExpr(IntVal))
7316 return ParseStatus::Failure;
7317
7318 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7319 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7320
7321 return ParseStatus::Success;
7322}
7323
7324ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7325 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7326 AMDGPUOperand::ImmTy Type) {
7327 SMLoc S = getLoc();
7328 int64_t IntVal;
7329
7330 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7331 if (Res.isSuccess())
7332 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7333
7334 return Res;
7335}
7336
7337//===----------------------------------------------------------------------===//
7338// MTBUF format
7339//===----------------------------------------------------------------------===//
7340
7341bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7342 int64_t MaxVal,
7343 int64_t &Fmt) {
7344 int64_t Val;
7345 SMLoc Loc = getLoc();
7346
7347 auto Res = parseIntWithPrefix(Pref, Val);
7348 if (Res.isFailure())
7349 return false;
7350 if (Res.isNoMatch())
7351 return true;
7352
7353 if (Val < 0 || Val > MaxVal) {
7354 Error(Loc, Twine("out of range ", StringRef(Pref)));
7355 return false;
7356 }
7357
7358 Fmt = Val;
7359 return true;
7360}
7361
7362ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7363 AMDGPUOperand::ImmTy ImmTy) {
7364 const char *Pref = "index_key";
7365 int64_t ImmVal = 0;
7366 SMLoc Loc = getLoc();
7367 auto Res = parseIntWithPrefix(Pref, ImmVal);
7368 if (!Res.isSuccess())
7369 return Res;
7370
7371 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7372 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7373 (ImmVal < 0 || ImmVal > 1))
7374 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7375
7376 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7377 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7378
7379 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7380 return ParseStatus::Success;
7381}
7382
7383ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7384 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7385}
7386
7387ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7388 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7389}
7390
7391ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7392 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7393}
7394
7395ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7396 StringRef Name,
7397 AMDGPUOperand::ImmTy Type) {
7398 return parseStringOrIntWithPrefix(Operands, Name,
7399 {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
7400 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
7401 "MATRIX_FMT_FP4"},
7402 Type);
7403}
7404
7405ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7406 return tryParseMatrixFMT(Operands, "matrix_a_fmt",
7407 AMDGPUOperand::ImmTyMatrixAFMT);
7408}
7409
7410ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7411 return tryParseMatrixFMT(Operands, "matrix_b_fmt",
7412 AMDGPUOperand::ImmTyMatrixBFMT);
7413}
7414
7415ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7416 StringRef Name,
7417 AMDGPUOperand::ImmTy Type) {
7418 return parseStringOrIntWithPrefix(
7419 Operands, Name, {"MATRIX_SCALE_ROW0", "MATRIX_SCALE_ROW1"}, Type);
7420}
7421
7422ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7423 return tryParseMatrixScale(Operands, "matrix_a_scale",
7424 AMDGPUOperand::ImmTyMatrixAScale);
7425}
7426
7427ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7428 return tryParseMatrixScale(Operands, "matrix_b_scale",
7429 AMDGPUOperand::ImmTyMatrixBScale);
7430}
7431
7432ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7433 StringRef Name,
7434 AMDGPUOperand::ImmTy Type) {
7435 return parseStringOrIntWithPrefix(
7436 Operands, Name,
7437 {"MATRIX_SCALE_FMT_E8", "MATRIX_SCALE_FMT_E5M3", "MATRIX_SCALE_FMT_E4M3"},
7438 Type);
7439}
7440
7441ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7442 return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
7443 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7444}
7445
7446ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7447 return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
7448 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7449}
7450
7451// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7452// values to live in a joint format operand in the MCInst encoding.
7453ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7454 using namespace llvm::AMDGPU::MTBUFFormat;
7455
7456 int64_t Dfmt = DFMT_UNDEF;
7457 int64_t Nfmt = NFMT_UNDEF;
7458
7459 // dfmt and nfmt can appear in either order, and each is optional.
7460 for (int I = 0; I < 2; ++I) {
7461 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7462 return ParseStatus::Failure;
7463
7464 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7465 return ParseStatus::Failure;
7466
7467 // Skip optional comma between dfmt/nfmt
7468 // but guard against 2 commas following each other.
7469 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7470 !peekToken().is(AsmToken::Comma)) {
7471 trySkipToken(AsmToken::Comma);
7472 }
7473 }
7474
7475 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7476 return ParseStatus::NoMatch;
7477
7478 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7479 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7480
7481 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7482 return ParseStatus::Success;
7483}
7484
7485ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7486 using namespace llvm::AMDGPU::MTBUFFormat;
7487
7488 int64_t Fmt = UFMT_UNDEF;
7489
7490 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7491 return ParseStatus::Failure;
7492
7493 if (Fmt == UFMT_UNDEF)
7494 return ParseStatus::NoMatch;
7495
7496 Format = Fmt;
7497 return ParseStatus::Success;
7498}
7499
7500bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7501 int64_t &Nfmt,
7502 StringRef FormatStr,
7503 SMLoc Loc) {
7504 using namespace llvm::AMDGPU::MTBUFFormat;
7505 int64_t Format;
7506
7507 Format = getDfmt(FormatStr);
7508 if (Format != DFMT_UNDEF) {
7509 Dfmt = Format;
7510 return true;
7511 }
7512
7513 Format = getNfmt(FormatStr, getSTI());
7514 if (Format != NFMT_UNDEF) {
7515 Nfmt = Format;
7516 return true;
7517 }
7518
7519 Error(Loc, "unsupported format");
7520 return false;
7521}
7522
7523ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7524 SMLoc FormatLoc,
7525 int64_t &Format) {
7526 using namespace llvm::AMDGPU::MTBUFFormat;
7527
7528 int64_t Dfmt = DFMT_UNDEF;
7529 int64_t Nfmt = NFMT_UNDEF;
7530 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7531 return ParseStatus::Failure;
7532
7533 if (trySkipToken(AsmToken::Comma)) {
7534 StringRef Str;
7535 SMLoc Loc = getLoc();
7536 if (!parseId(Str, "expected a format string") ||
7537 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7538 return ParseStatus::Failure;
7539 if (Dfmt == DFMT_UNDEF)
7540 return Error(Loc, "duplicate numeric format");
7541 if (Nfmt == NFMT_UNDEF)
7542 return Error(Loc, "duplicate data format");
7543 }
7544
7545 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7546 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7547
7548 if (isGFX10Plus()) {
7549 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7550 if (Ufmt == UFMT_UNDEF)
7551 return Error(FormatLoc, "unsupported format");
7552 Format = Ufmt;
7553 } else {
7554 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7555 }
7556
7557 return ParseStatus::Success;
7558}
7559
7560ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7561 SMLoc Loc,
7562 int64_t &Format) {
7563 using namespace llvm::AMDGPU::MTBUFFormat;
7564
7565 auto Id = getUnifiedFormat(FormatStr, getSTI());
7566 if (Id == UFMT_UNDEF)
7567 return ParseStatus::NoMatch;
7568
7569 if (!isGFX10Plus())
7570 return Error(Loc, "unified format is not supported on this GPU");
7571
7572 Format = Id;
7573 return ParseStatus::Success;
7574}
7575
7576ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7577 using namespace llvm::AMDGPU::MTBUFFormat;
7578 SMLoc Loc = getLoc();
7579
7580 if (!parseExpr(Format))
7581 return ParseStatus::Failure;
7582 if (!isValidFormatEncoding(Format, getSTI()))
7583 return Error(Loc, "out of range format");
7584
7585 return ParseStatus::Success;
7586}
7587
7588ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7589 using namespace llvm::AMDGPU::MTBUFFormat;
7590
7591 if (!trySkipId("format", AsmToken::Colon))
7592 return ParseStatus::NoMatch;
7593
7594 if (trySkipToken(AsmToken::LBrac)) {
7595 StringRef FormatStr;
7596 SMLoc Loc = getLoc();
7597 if (!parseId(FormatStr, "expected a format string"))
7598 return ParseStatus::Failure;
7599
7600 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7601 if (Res.isNoMatch())
7602 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7603 if (!Res.isSuccess())
7604 return Res;
7605
7606 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7607 return ParseStatus::Failure;
7608
7609 return ParseStatus::Success;
7610 }
7611
7612 return parseNumericFormat(Format);
7613}
7614
7615ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7616 using namespace llvm::AMDGPU::MTBUFFormat;
7617
7618 int64_t Format = getDefaultFormatEncoding(getSTI());
7619 ParseStatus Res;
7620 SMLoc Loc = getLoc();
7621
7622 // Parse legacy format syntax.
7623 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7624 if (Res.isFailure())
7625 return Res;
7626
7627 bool FormatFound = Res.isSuccess();
7628
7629 Operands.push_back(
7630 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7631
7632 if (FormatFound)
7633 trySkipToken(AsmToken::Comma);
7634
7635 if (isToken(AsmToken::EndOfStatement)) {
7636 // We are expecting an soffset operand,
7637 // but let matcher handle the error.
7638 return ParseStatus::Success;
7639 }
7640
7641 // Parse soffset.
7642 Res = parseRegOrImm(Operands);
7643 if (!Res.isSuccess())
7644 return Res;
7645
7646 trySkipToken(AsmToken::Comma);
7647
7648 if (!FormatFound) {
7649 Res = parseSymbolicOrNumericFormat(Format);
7650 if (Res.isFailure())
7651 return Res;
7652 if (Res.isSuccess()) {
7653 auto Size = Operands.size();
7654 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7655 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7656 Op.setImm(Format);
7657 }
7658 return ParseStatus::Success;
7659 }
7660
7661 if (isId("format") && peekToken().is(AsmToken::Colon))
7662 return Error(getLoc(), "duplicate format");
7663 return ParseStatus::Success;
7664}
7665
7666ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7667 ParseStatus Res =
7668 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7669 if (Res.isNoMatch()) {
7670 Res = parseIntWithPrefix("inst_offset", Operands,
7671 AMDGPUOperand::ImmTyInstOffset);
7672 }
7673 return Res;
7674}
7675
7676ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7677 ParseStatus Res =
7678 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7679 if (Res.isNoMatch())
7680 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7681 return Res;
7682}
7683
7684ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7685 ParseStatus Res =
7686 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7687 if (Res.isNoMatch()) {
7688 Res =
7689 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7690 }
7691 return Res;
7692}
7693
7694//===----------------------------------------------------------------------===//
7695// Exp
7696//===----------------------------------------------------------------------===//
7697
7698void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7699 OptionalImmIndexMap OptionalIdx;
7700
7701 unsigned OperandIdx[4];
7702 unsigned EnMask = 0;
7703 int SrcIdx = 0;
7704
7705 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7706 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7707
7708 // Add the register arguments
7709 if (Op.isReg()) {
7710 assert(SrcIdx < 4);
7711 OperandIdx[SrcIdx] = Inst.size();
7712 Op.addRegOperands(Inst, 1);
7713 ++SrcIdx;
7714 continue;
7715 }
7716
7717 if (Op.isOff()) {
7718 assert(SrcIdx < 4);
7719 OperandIdx[SrcIdx] = Inst.size();
7720 Inst.addOperand(MCOperand::createReg(MCRegister()));
7721 ++SrcIdx;
7722 continue;
7723 }
7724
7725 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7726 Op.addImmOperands(Inst, 1);
7727 continue;
7728 }
7729
7730 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7731 continue;
7732
7733 // Handle optional arguments
7734 OptionalIdx[Op.getImmTy()] = i;
7735 }
7736
7737 assert(SrcIdx == 4);
7738
7739 bool Compr = false;
7740 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7741 Compr = true;
7742 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7743 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7744 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7745 }
7746
7747 for (auto i = 0; i < SrcIdx; ++i) {
7748 if (Inst.getOperand(OperandIdx[i]).getReg()) {
7749 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7750 }
7751 }
7752
7753 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7754 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7755
7756 Inst.addOperand(MCOperand::createImm(EnMask));
7757}
7758
7759//===----------------------------------------------------------------------===//
7760// s_waitcnt
7761//===----------------------------------------------------------------------===//
7762
7763static bool
7765 const AMDGPU::IsaVersion ISA,
7766 int64_t &IntVal,
7767 int64_t CntVal,
7768 bool Saturate,
7769 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7770 unsigned (*decode)(const IsaVersion &Version, unsigned))
7771{
7772 bool Failed = false;
7773
7774 IntVal = encode(ISA, IntVal, CntVal);
7775 if (CntVal != decode(ISA, IntVal)) {
7776 if (Saturate) {
7777 IntVal = encode(ISA, IntVal, -1);
7778 } else {
7779 Failed = true;
7780 }
7781 }
7782 return Failed;
7783}
7784
7785bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7786
7787 SMLoc CntLoc = getLoc();
7788 StringRef CntName = getTokenStr();
7789
7790 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7791 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7792 return false;
7793
7794 int64_t CntVal;
7795 SMLoc ValLoc = getLoc();
7796 if (!parseExpr(CntVal))
7797 return false;
7798
7799 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7800
7801 bool Failed = true;
7802 bool Sat = CntName.ends_with("_sat");
7803
7804 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7805 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7806 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7807 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7808 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7809 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7810 } else {
7811 Error(CntLoc, "invalid counter name " + CntName);
7812 return false;
7813 }
7814
7815 if (Failed) {
7816 Error(ValLoc, "too large value for " + CntName);
7817 return false;
7818 }
7819
7820 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7821 return false;
7822
7823 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7824 if (isToken(AsmToken::EndOfStatement)) {
7825 Error(getLoc(), "expected a counter name");
7826 return false;
7827 }
7828 }
7829
7830 return true;
7831}
7832
7833ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7834 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7835 int64_t Waitcnt = getWaitcntBitMask(ISA);
7836 SMLoc S = getLoc();
7837
7838 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7839 while (!isToken(AsmToken::EndOfStatement)) {
7840 if (!parseCnt(Waitcnt))
7841 return ParseStatus::Failure;
7842 }
7843 } else {
7844 if (!parseExpr(Waitcnt))
7845 return ParseStatus::Failure;
7846 }
7847
7848 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7849 return ParseStatus::Success;
7850}
7851
7852bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7853 SMLoc FieldLoc = getLoc();
7854 StringRef FieldName = getTokenStr();
7855 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7856 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7857 return false;
7858
7859 SMLoc ValueLoc = getLoc();
7860 StringRef ValueName = getTokenStr();
7861 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7862 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7863 return false;
7864
7865 unsigned Shift;
7866 if (FieldName == "instid0") {
7867 Shift = 0;
7868 } else if (FieldName == "instskip") {
7869 Shift = 4;
7870 } else if (FieldName == "instid1") {
7871 Shift = 7;
7872 } else {
7873 Error(FieldLoc, "invalid field name " + FieldName);
7874 return false;
7875 }
7876
7877 int Value;
7878 if (Shift == 4) {
7879 // Parse values for instskip.
7880 Value = StringSwitch<int>(ValueName)
7881 .Case("SAME", 0)
7882 .Case("NEXT", 1)
7883 .Case("SKIP_1", 2)
7884 .Case("SKIP_2", 3)
7885 .Case("SKIP_3", 4)
7886 .Case("SKIP_4", 5)
7887 .Default(-1);
7888 } else {
7889 // Parse values for instid0 and instid1.
7890 Value = StringSwitch<int>(ValueName)
7891 .Case("NO_DEP", 0)
7892 .Case("VALU_DEP_1", 1)
7893 .Case("VALU_DEP_2", 2)
7894 .Case("VALU_DEP_3", 3)
7895 .Case("VALU_DEP_4", 4)
7896 .Case("TRANS32_DEP_1", 5)
7897 .Case("TRANS32_DEP_2", 6)
7898 .Case("TRANS32_DEP_3", 7)
7899 .Case("FMA_ACCUM_CYCLE_1", 8)
7900 .Case("SALU_CYCLE_1", 9)
7901 .Case("SALU_CYCLE_2", 10)
7902 .Case("SALU_CYCLE_3", 11)
7903 .Default(-1);
7904 }
7905 if (Value < 0) {
7906 Error(ValueLoc, "invalid value name " + ValueName);
7907 return false;
7908 }
7909
7910 Delay |= Value << Shift;
7911 return true;
7912}
7913
7914ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7915 int64_t Delay = 0;
7916 SMLoc S = getLoc();
7917
7918 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7919 do {
7920 if (!parseDelay(Delay))
7921 return ParseStatus::Failure;
7922 } while (trySkipToken(AsmToken::Pipe));
7923 } else {
7924 if (!parseExpr(Delay))
7925 return ParseStatus::Failure;
7926 }
7927
7928 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7929 return ParseStatus::Success;
7930}
7931
7932bool
7933AMDGPUOperand::isSWaitCnt() const {
7934 return isImm();
7935}
7936
7937bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7938
7939//===----------------------------------------------------------------------===//
7940// DepCtr
7941//===----------------------------------------------------------------------===//
7942
7943void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7944 StringRef DepCtrName) {
7945 switch (ErrorId) {
7946 case OPR_ID_UNKNOWN:
7947 Error(Loc, Twine("invalid counter name ", DepCtrName));
7948 return;
7949 case OPR_ID_UNSUPPORTED:
7950 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7951 return;
7952 case OPR_ID_DUPLICATE:
7953 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7954 return;
7955 case OPR_VAL_INVALID:
7956 Error(Loc, Twine("invalid value for ", DepCtrName));
7957 return;
7958 default:
7959 assert(false);
7960 }
7961}
7962
7963bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7964
7965 using namespace llvm::AMDGPU::DepCtr;
7966
7967 SMLoc DepCtrLoc = getLoc();
7968 StringRef DepCtrName = getTokenStr();
7969
7970 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7971 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7972 return false;
7973
7974 int64_t ExprVal;
7975 if (!parseExpr(ExprVal))
7976 return false;
7977
7978 unsigned PrevOprMask = UsedOprMask;
7979 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7980
7981 if (CntVal < 0) {
7982 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7983 return false;
7984 }
7985
7986 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7987 return false;
7988
7989 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7990 if (isToken(AsmToken::EndOfStatement)) {
7991 Error(getLoc(), "expected a counter name");
7992 return false;
7993 }
7994 }
7995
7996 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7997 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7998 return true;
7999}
8000
8001ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
8002 using namespace llvm::AMDGPU::DepCtr;
8003
8004 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
8005 SMLoc Loc = getLoc();
8006
8007 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8008 unsigned UsedOprMask = 0;
8009 while (!isToken(AsmToken::EndOfStatement)) {
8010 if (!parseDepCtr(DepCtr, UsedOprMask))
8011 return ParseStatus::Failure;
8012 }
8013 } else {
8014 if (!parseExpr(DepCtr))
8015 return ParseStatus::Failure;
8016 }
8017
8018 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
8019 return ParseStatus::Success;
8020}
8021
8022bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8023
8024//===----------------------------------------------------------------------===//
8025// hwreg
8026//===----------------------------------------------------------------------===//
8027
8028ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8029 OperandInfoTy &Offset,
8030 OperandInfoTy &Width) {
8031 using namespace llvm::AMDGPU::Hwreg;
8032
8033 if (!trySkipId("hwreg", AsmToken::LParen))
8034 return ParseStatus::NoMatch;
8035
8036 // The register may be specified by name or using a numeric code
8037 HwReg.Loc = getLoc();
8038 if (isToken(AsmToken::Identifier) &&
8039 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8040 HwReg.IsSymbolic = true;
8041 lex(); // skip register name
8042 } else if (!parseExpr(HwReg.Val, "a register name")) {
8043 return ParseStatus::Failure;
8044 }
8045
8046 if (trySkipToken(AsmToken::RParen))
8047 return ParseStatus::Success;
8048
8049 // parse optional params
8050 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
8051 return ParseStatus::Failure;
8052
8053 Offset.Loc = getLoc();
8054 if (!parseExpr(Offset.Val))
8055 return ParseStatus::Failure;
8056
8057 if (!skipToken(AsmToken::Comma, "expected a comma"))
8058 return ParseStatus::Failure;
8059
8060 Width.Loc = getLoc();
8061 if (!parseExpr(Width.Val) ||
8062 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8063 return ParseStatus::Failure;
8064
8065 return ParseStatus::Success;
8066}
8067
8068ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8069 using namespace llvm::AMDGPU::Hwreg;
8070
8071 int64_t ImmVal = 0;
8072 SMLoc Loc = getLoc();
8073
8074 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8075 HwregId::Default);
8076 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8077 HwregOffset::Default);
8078 struct : StructuredOpField {
8079 using StructuredOpField::StructuredOpField;
8080 bool validate(AMDGPUAsmParser &Parser) const override {
8081 if (!isUIntN(Width, Val - 1))
8082 return Error(Parser, "only values from 1 to 32 are legal");
8083 return true;
8084 }
8085 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8086 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
8087
8088 if (Res.isNoMatch())
8089 Res = parseHwregFunc(HwReg, Offset, Width);
8090
8091 if (Res.isSuccess()) {
8092 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
8093 return ParseStatus::Failure;
8094 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
8095 }
8096
8097 if (Res.isNoMatch() &&
8098 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
8100
8101 if (!Res.isSuccess())
8102 return ParseStatus::Failure;
8103
8104 if (!isUInt<16>(ImmVal))
8105 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8106 Operands.push_back(
8107 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8108 return ParseStatus::Success;
8109}
8110
8111bool AMDGPUOperand::isHwreg() const {
8112 return isImmTy(ImmTyHwreg);
8113}
8114
8115//===----------------------------------------------------------------------===//
8116// sendmsg
8117//===----------------------------------------------------------------------===//
8118
8119bool
8120AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8121 OperandInfoTy &Op,
8122 OperandInfoTy &Stream) {
8123 using namespace llvm::AMDGPU::SendMsg;
8124
8125 Msg.Loc = getLoc();
8126 if (isToken(AsmToken::Identifier) &&
8127 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8128 Msg.IsSymbolic = true;
8129 lex(); // skip message name
8130 } else if (!parseExpr(Msg.Val, "a message name")) {
8131 return false;
8132 }
8133
8134 if (trySkipToken(AsmToken::Comma)) {
8135 Op.IsDefined = true;
8136 Op.Loc = getLoc();
8137 if (isToken(AsmToken::Identifier) &&
8138 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8140 lex(); // skip operation name
8141 } else if (!parseExpr(Op.Val, "an operation name")) {
8142 return false;
8143 }
8144
8145 if (trySkipToken(AsmToken::Comma)) {
8146 Stream.IsDefined = true;
8147 Stream.Loc = getLoc();
8148 if (!parseExpr(Stream.Val))
8149 return false;
8150 }
8151 }
8152
8153 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
8154}
8155
8156bool
8157AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8158 const OperandInfoTy &Op,
8159 const OperandInfoTy &Stream) {
8160 using namespace llvm::AMDGPU::SendMsg;
8161
8162 // Validation strictness depends on whether message is specified
8163 // in a symbolic or in a numeric form. In the latter case
8164 // only encoding possibility is checked.
8165 bool Strict = Msg.IsSymbolic;
8166
8167 if (Strict) {
8168 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8169 Error(Msg.Loc, "specified message id is not supported on this GPU");
8170 return false;
8171 }
8172 } else {
8173 if (!isValidMsgId(Msg.Val, getSTI())) {
8174 Error(Msg.Loc, "invalid message id");
8175 return false;
8176 }
8177 }
8178 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
8179 if (Op.IsDefined) {
8180 Error(Op.Loc, "message does not support operations");
8181 } else {
8182 Error(Msg.Loc, "missing message operation");
8183 }
8184 return false;
8185 }
8186 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
8187 if (Op.Val == OPR_ID_UNSUPPORTED)
8188 Error(Op.Loc, "specified operation id is not supported on this GPU");
8189 else
8190 Error(Op.Loc, "invalid operation id");
8191 return false;
8192 }
8193 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
8194 Stream.IsDefined) {
8195 Error(Stream.Loc, "message operation does not support streams");
8196 return false;
8197 }
8198 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
8199 Error(Stream.Loc, "invalid message stream id");
8200 return false;
8201 }
8202 return true;
8203}
8204
8205ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8206 using namespace llvm::AMDGPU::SendMsg;
8207
8208 int64_t ImmVal = 0;
8209 SMLoc Loc = getLoc();
8210
8211 if (trySkipId("sendmsg", AsmToken::LParen)) {
8212 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8213 OperandInfoTy Op(OP_NONE_);
8214 OperandInfoTy Stream(STREAM_ID_NONE_);
8215 if (parseSendMsgBody(Msg, Op, Stream) &&
8216 validateSendMsg(Msg, Op, Stream)) {
8217 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
8218 } else {
8219 return ParseStatus::Failure;
8220 }
8221 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
8222 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8223 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8224 } else {
8225 return ParseStatus::Failure;
8226 }
8227
8228 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8229 return ParseStatus::Success;
8230}
8231
8232bool AMDGPUOperand::isSendMsg() const {
8233 return isImmTy(ImmTySendMsg);
8234}
8235
8236//===----------------------------------------------------------------------===//
8237// v_interp
8238//===----------------------------------------------------------------------===//
8239
8240ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8241 StringRef Str;
8242 SMLoc S = getLoc();
8243
8244 if (!parseId(Str))
8245 return ParseStatus::NoMatch;
8246
8247 int Slot = StringSwitch<int>(Str)
8248 .Case("p10", 0)
8249 .Case("p20", 1)
8250 .Case("p0", 2)
8251 .Default(-1);
8252
8253 if (Slot == -1)
8254 return Error(S, "invalid interpolation slot");
8255
8256 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8257 AMDGPUOperand::ImmTyInterpSlot));
8258 return ParseStatus::Success;
8259}
8260
8261ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8262 StringRef Str;
8263 SMLoc S = getLoc();
8264
8265 if (!parseId(Str))
8266 return ParseStatus::NoMatch;
8267
8268 if (!Str.starts_with("attr"))
8269 return Error(S, "invalid interpolation attribute");
8270
8271 StringRef Chan = Str.take_back(2);
8272 int AttrChan = StringSwitch<int>(Chan)
8273 .Case(".x", 0)
8274 .Case(".y", 1)
8275 .Case(".z", 2)
8276 .Case(".w", 3)
8277 .Default(-1);
8278 if (AttrChan == -1)
8279 return Error(S, "invalid or missing interpolation attribute channel");
8280
8281 Str = Str.drop_back(2).drop_front(4);
8282
8283 uint8_t Attr;
8284 if (Str.getAsInteger(10, Attr))
8285 return Error(S, "invalid or missing interpolation attribute number");
8286
8287 if (Attr > 32)
8288 return Error(S, "out of bounds interpolation attribute number");
8289
8290 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8291
8292 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8293 AMDGPUOperand::ImmTyInterpAttr));
8294 Operands.push_back(AMDGPUOperand::CreateImm(
8295 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8296 return ParseStatus::Success;
8297}
8298
8299//===----------------------------------------------------------------------===//
8300// exp
8301//===----------------------------------------------------------------------===//
8302
8303ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8304 using namespace llvm::AMDGPU::Exp;
8305
8306 StringRef Str;
8307 SMLoc S = getLoc();
8308
8309 if (!parseId(Str))
8310 return ParseStatus::NoMatch;
8311
8312 unsigned Id = getTgtId(Str);
8313 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8314 return Error(S, (Id == ET_INVALID)
8315 ? "invalid exp target"
8316 : "exp target is not supported on this GPU");
8317
8318 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8319 AMDGPUOperand::ImmTyExpTgt));
8320 return ParseStatus::Success;
8321}
8322
8323//===----------------------------------------------------------------------===//
8324// parser helpers
8325//===----------------------------------------------------------------------===//
8326
8327bool
8328AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8329 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8330}
8331
8332bool
8333AMDGPUAsmParser::isId(const StringRef Id) const {
8334 return isId(getToken(), Id);
8335}
8336
8337bool
8338AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8339 return getTokenKind() == Kind;
8340}
8341
8342StringRef AMDGPUAsmParser::getId() const {
8343 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8344}
8345
8346bool
8347AMDGPUAsmParser::trySkipId(const StringRef Id) {
8348 if (isId(Id)) {
8349 lex();
8350 return true;
8351 }
8352 return false;
8353}
8354
8355bool
8356AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8357 if (isToken(AsmToken::Identifier)) {
8358 StringRef Tok = getTokenStr();
8359 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8360 lex();
8361 return true;
8362 }
8363 }
8364 return false;
8365}
8366
8367bool
8368AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8369 if (isId(Id) && peekToken().is(Kind)) {
8370 lex();
8371 lex();
8372 return true;
8373 }
8374 return false;
8375}
8376
8377bool
8378AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8379 if (isToken(Kind)) {
8380 lex();
8381 return true;
8382 }
8383 return false;
8384}
8385
8386bool
8387AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8388 const StringRef ErrMsg) {
8389 if (!trySkipToken(Kind)) {
8390 Error(getLoc(), ErrMsg);
8391 return false;
8392 }
8393 return true;
8394}
8395
8396bool
8397AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8398 SMLoc S = getLoc();
8399
8400 const MCExpr *Expr;
8401 if (Parser.parseExpression(Expr))
8402 return false;
8403
8404 if (Expr->evaluateAsAbsolute(Imm))
8405 return true;
8406
8407 if (Expected.empty()) {
8408 Error(S, "expected absolute expression");
8409 } else {
8410 Error(S, Twine("expected ", Expected) +
8411 Twine(" or an absolute expression"));
8412 }
8413 return false;
8414}
8415
8416bool
8417AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8418 SMLoc S = getLoc();
8419
8420 const MCExpr *Expr;
8421 if (Parser.parseExpression(Expr))
8422 return false;
8423
8424 int64_t IntVal;
8425 if (Expr->evaluateAsAbsolute(IntVal)) {
8426 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8427 } else {
8428 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8429 }
8430 return true;
8431}
8432
8433bool
8434AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8435 if (isToken(AsmToken::String)) {
8436 Val = getToken().getStringContents();
8437 lex();
8438 return true;
8439 }
8440 Error(getLoc(), ErrMsg);
8441 return false;
8442}
8443
8444bool
8445AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8446 if (isToken(AsmToken::Identifier)) {
8447 Val = getTokenStr();
8448 lex();
8449 return true;
8450 }
8451 if (!ErrMsg.empty())
8452 Error(getLoc(), ErrMsg);
8453 return false;
8454}
8455
8456AsmToken
8457AMDGPUAsmParser::getToken() const {
8458 return Parser.getTok();
8459}
8460
8461AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8462 return isToken(AsmToken::EndOfStatement)
8463 ? getToken()
8464 : getLexer().peekTok(ShouldSkipSpace);
8465}
8466
8467void
8468AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8469 auto TokCount = getLexer().peekTokens(Tokens);
8470
8471 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8472 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8473}
8474
8476AMDGPUAsmParser::getTokenKind() const {
8477 return getLexer().getKind();
8478}
8479
8480SMLoc
8481AMDGPUAsmParser::getLoc() const {
8482 return getToken().getLoc();
8483}
8484
8485StringRef
8486AMDGPUAsmParser::getTokenStr() const {
8487 return getToken().getString();
8488}
8489
8490void
8491AMDGPUAsmParser::lex() {
8492 Parser.Lex();
8493}
8494
8495SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8496 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8497}
8498
8499// Returns one of the given locations that comes later in the source.
8500SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8501 return a.getPointer() < b.getPointer() ? b : a;
8502}
8503
8504SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8505 int MCOpIdx) const {
8506 for (const auto &Op : Operands) {
8507 const auto TargetOp = static_cast<AMDGPUOperand &>(*Op);
8508 if (TargetOp.getMCOpIdx() == MCOpIdx)
8509 return TargetOp.getStartLoc();
8510 }
8511 llvm_unreachable("No such MC operand!");
8512}
8513
8514SMLoc
8515AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8516 const OperandVector &Operands) const {
8517 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8518 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8519 if (Test(Op))
8520 return Op.getStartLoc();
8521 }
8522 return getInstLoc(Operands);
8523}
8524
8525SMLoc
8526AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8527 const OperandVector &Operands) const {
8528 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8529 return getOperandLoc(Test, Operands);
8530}
8531
8532ParseStatus
8533AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8534 if (!trySkipToken(AsmToken::LCurly))
8535 return ParseStatus::NoMatch;
8536
8537 bool First = true;
8538 while (!trySkipToken(AsmToken::RCurly)) {
8539 if (!First &&
8540 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8541 return ParseStatus::Failure;
8542
8543 StringRef Id = getTokenStr();
8544 SMLoc IdLoc = getLoc();
8545 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8546 !skipToken(AsmToken::Colon, "colon expected"))
8547 return ParseStatus::Failure;
8548
8549 const auto *I =
8550 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8551 if (I == Fields.end())
8552 return Error(IdLoc, "unknown field");
8553 if ((*I)->IsDefined)
8554 return Error(IdLoc, "duplicate field");
8555
8556 // TODO: Support symbolic values.
8557 (*I)->Loc = getLoc();
8558 if (!parseExpr((*I)->Val))
8559 return ParseStatus::Failure;
8560 (*I)->IsDefined = true;
8561
8562 First = false;
8563 }
8564 return ParseStatus::Success;
8565}
8566
8567bool AMDGPUAsmParser::validateStructuredOpFields(
8569 return all_of(Fields, [this](const StructuredOpField *F) {
8570 return F->validate(*this);
8571 });
8572}
8573
8574//===----------------------------------------------------------------------===//
8575// swizzle
8576//===----------------------------------------------------------------------===//
8577
8579static unsigned
8580encodeBitmaskPerm(const unsigned AndMask,
8581 const unsigned OrMask,
8582 const unsigned XorMask) {
8583 using namespace llvm::AMDGPU::Swizzle;
8584
8585 return BITMASK_PERM_ENC |
8586 (AndMask << BITMASK_AND_SHIFT) |
8587 (OrMask << BITMASK_OR_SHIFT) |
8588 (XorMask << BITMASK_XOR_SHIFT);
8589}
8590
8591bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8592 const unsigned MaxVal,
8593 const Twine &ErrMsg, SMLoc &Loc) {
8594 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8595 return false;
8596 }
8597 Loc = getLoc();
8598 if (!parseExpr(Op)) {
8599 return false;
8600 }
8601 if (Op < MinVal || Op > MaxVal) {
8602 Error(Loc, ErrMsg);
8603 return false;
8604 }
8605
8606 return true;
8607}
8608
8609bool
8610AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8611 const unsigned MinVal,
8612 const unsigned MaxVal,
8613 const StringRef ErrMsg) {
8614 SMLoc Loc;
8615 for (unsigned i = 0; i < OpNum; ++i) {
8616 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8617 return false;
8618 }
8619
8620 return true;
8621}
8622
8623bool
8624AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8625 using namespace llvm::AMDGPU::Swizzle;
8626
8627 int64_t Lane[LANE_NUM];
8628 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8629 "expected a 2-bit lane id")) {
8631 for (unsigned I = 0; I < LANE_NUM; ++I) {
8632 Imm |= Lane[I] << (LANE_SHIFT * I);
8633 }
8634 return true;
8635 }
8636 return false;
8637}
8638
8639bool
8640AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8641 using namespace llvm::AMDGPU::Swizzle;
8642
8643 SMLoc Loc;
8644 int64_t GroupSize;
8645 int64_t LaneIdx;
8646
8647 if (!parseSwizzleOperand(GroupSize,
8648 2, 32,
8649 "group size must be in the interval [2,32]",
8650 Loc)) {
8651 return false;
8652 }
8653 if (!isPowerOf2_64(GroupSize)) {
8654 Error(Loc, "group size must be a power of two");
8655 return false;
8656 }
8657 if (parseSwizzleOperand(LaneIdx,
8658 0, GroupSize - 1,
8659 "lane id must be in the interval [0,group size - 1]",
8660 Loc)) {
8661 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8662 return true;
8663 }
8664 return false;
8665}
8666
8667bool
8668AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8669 using namespace llvm::AMDGPU::Swizzle;
8670
8671 SMLoc Loc;
8672 int64_t GroupSize;
8673
8674 if (!parseSwizzleOperand(GroupSize,
8675 2, 32,
8676 "group size must be in the interval [2,32]",
8677 Loc)) {
8678 return false;
8679 }
8680 if (!isPowerOf2_64(GroupSize)) {
8681 Error(Loc, "group size must be a power of two");
8682 return false;
8683 }
8684
8685 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8686 return true;
8687}
8688
8689bool
8690AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8691 using namespace llvm::AMDGPU::Swizzle;
8692
8693 SMLoc Loc;
8694 int64_t GroupSize;
8695
8696 if (!parseSwizzleOperand(GroupSize,
8697 1, 16,
8698 "group size must be in the interval [1,16]",
8699 Loc)) {
8700 return false;
8701 }
8702 if (!isPowerOf2_64(GroupSize)) {
8703 Error(Loc, "group size must be a power of two");
8704 return false;
8705 }
8706
8707 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8708 return true;
8709}
8710
8711bool
8712AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8713 using namespace llvm::AMDGPU::Swizzle;
8714
8715 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8716 return false;
8717 }
8718
8719 StringRef Ctl;
8720 SMLoc StrLoc = getLoc();
8721 if (!parseString(Ctl)) {
8722 return false;
8723 }
8724 if (Ctl.size() != BITMASK_WIDTH) {
8725 Error(StrLoc, "expected a 5-character mask");
8726 return false;
8727 }
8728
8729 unsigned AndMask = 0;
8730 unsigned OrMask = 0;
8731 unsigned XorMask = 0;
8732
8733 for (size_t i = 0; i < Ctl.size(); ++i) {
8734 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8735 switch(Ctl[i]) {
8736 default:
8737 Error(StrLoc, "invalid mask");
8738 return false;
8739 case '0':
8740 break;
8741 case '1':
8742 OrMask |= Mask;
8743 break;
8744 case 'p':
8745 AndMask |= Mask;
8746 break;
8747 case 'i':
8748 AndMask |= Mask;
8749 XorMask |= Mask;
8750 break;
8751 }
8752 }
8753
8754 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8755 return true;
8756}
8757
8758bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8759 using namespace llvm::AMDGPU::Swizzle;
8760
8761 if (!AMDGPU::isGFX9Plus(getSTI())) {
8762 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
8763 return false;
8764 }
8765
8766 int64_t Swizzle;
8767 SMLoc Loc;
8768 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8769 "FFT swizzle must be in the interval [0," +
8770 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8771 Loc))
8772 return false;
8773
8774 Imm = FFT_MODE_ENC | Swizzle;
8775 return true;
8776}
8777
8778bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8779 using namespace llvm::AMDGPU::Swizzle;
8780
8781 if (!AMDGPU::isGFX9Plus(getSTI())) {
8782 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
8783 return false;
8784 }
8785
8786 SMLoc Loc;
8787 int64_t Direction;
8788
8789 if (!parseSwizzleOperand(Direction, 0, 1,
8790 "direction must be 0 (left) or 1 (right)", Loc))
8791 return false;
8792
8793 int64_t RotateSize;
8794 if (!parseSwizzleOperand(
8795 RotateSize, 0, ROTATE_MAX_SIZE,
8796 "number of threads to rotate must be in the interval [0," +
8797 Twine(ROTATE_MAX_SIZE) + Twine(']'),
8798 Loc))
8799 return false;
8800
8802 (RotateSize << ROTATE_SIZE_SHIFT);
8803 return true;
8804}
8805
8806bool
8807AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8808
8809 SMLoc OffsetLoc = getLoc();
8810
8811 if (!parseExpr(Imm, "a swizzle macro")) {
8812 return false;
8813 }
8814 if (!isUInt<16>(Imm)) {
8815 Error(OffsetLoc, "expected a 16-bit offset");
8816 return false;
8817 }
8818 return true;
8819}
8820
8821bool
8822AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8823 using namespace llvm::AMDGPU::Swizzle;
8824
8825 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8826
8827 SMLoc ModeLoc = getLoc();
8828 bool Ok = false;
8829
8830 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8831 Ok = parseSwizzleQuadPerm(Imm);
8832 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8833 Ok = parseSwizzleBitmaskPerm(Imm);
8834 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8835 Ok = parseSwizzleBroadcast(Imm);
8836 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8837 Ok = parseSwizzleSwap(Imm);
8838 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8839 Ok = parseSwizzleReverse(Imm);
8840 } else if (trySkipId(IdSymbolic[ID_FFT])) {
8841 Ok = parseSwizzleFFT(Imm);
8842 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8843 Ok = parseSwizzleRotate(Imm);
8844 } else {
8845 Error(ModeLoc, "expected a swizzle mode");
8846 }
8847
8848 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8849 }
8850
8851 return false;
8852}
8853
8854ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8855 SMLoc S = getLoc();
8856 int64_t Imm = 0;
8857
8858 if (trySkipId("offset")) {
8859
8860 bool Ok = false;
8861 if (skipToken(AsmToken::Colon, "expected a colon")) {
8862 if (trySkipId("swizzle")) {
8863 Ok = parseSwizzleMacro(Imm);
8864 } else {
8865 Ok = parseSwizzleOffset(Imm);
8866 }
8867 }
8868
8869 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8870
8872 }
8873 return ParseStatus::NoMatch;
8874}
8875
8876bool
8877AMDGPUOperand::isSwizzle() const {
8878 return isImmTy(ImmTySwizzle);
8879}
8880
8881//===----------------------------------------------------------------------===//
8882// VGPR Index Mode
8883//===----------------------------------------------------------------------===//
8884
8885int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8886
8887 using namespace llvm::AMDGPU::VGPRIndexMode;
8888
8889 if (trySkipToken(AsmToken::RParen)) {
8890 return OFF;
8891 }
8892
8893 int64_t Imm = 0;
8894
8895 while (true) {
8896 unsigned Mode = 0;
8897 SMLoc S = getLoc();
8898
8899 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8900 if (trySkipId(IdSymbolic[ModeId])) {
8901 Mode = 1 << ModeId;
8902 break;
8903 }
8904 }
8905
8906 if (Mode == 0) {
8907 Error(S, (Imm == 0)?
8908 "expected a VGPR index mode or a closing parenthesis" :
8909 "expected a VGPR index mode");
8910 return UNDEF;
8911 }
8912
8913 if (Imm & Mode) {
8914 Error(S, "duplicate VGPR index mode");
8915 return UNDEF;
8916 }
8917 Imm |= Mode;
8918
8919 if (trySkipToken(AsmToken::RParen))
8920 break;
8921 if (!skipToken(AsmToken::Comma,
8922 "expected a comma or a closing parenthesis"))
8923 return UNDEF;
8924 }
8925
8926 return Imm;
8927}
8928
8929ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8930
8931 using namespace llvm::AMDGPU::VGPRIndexMode;
8932
8933 int64_t Imm = 0;
8934 SMLoc S = getLoc();
8935
8936 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8937 Imm = parseGPRIdxMacro();
8938 if (Imm == UNDEF)
8939 return ParseStatus::Failure;
8940 } else {
8941 if (getParser().parseAbsoluteExpression(Imm))
8942 return ParseStatus::Failure;
8943 if (Imm < 0 || !isUInt<4>(Imm))
8944 return Error(S, "invalid immediate: only 4-bit values are legal");
8945 }
8946
8947 Operands.push_back(
8948 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8949 return ParseStatus::Success;
8950}
8951
8952bool AMDGPUOperand::isGPRIdxMode() const {
8953 return isImmTy(ImmTyGprIdxMode);
8954}
8955
8956//===----------------------------------------------------------------------===//
8957// sopp branch targets
8958//===----------------------------------------------------------------------===//
8959
8960ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8961
8962 // Make sure we are not parsing something
8963 // that looks like a label or an expression but is not.
8964 // This will improve error messages.
8965 if (isRegister() || isModifier())
8966 return ParseStatus::NoMatch;
8967
8968 if (!parseExpr(Operands))
8969 return ParseStatus::Failure;
8970
8971 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8972 assert(Opr.isImm() || Opr.isExpr());
8973 SMLoc Loc = Opr.getStartLoc();
8974
8975 // Currently we do not support arbitrary expressions as branch targets.
8976 // Only labels and absolute expressions are accepted.
8977 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8978 Error(Loc, "expected an absolute expression or a label");
8979 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8980 Error(Loc, "expected a 16-bit signed jump offset");
8981 }
8982
8983 return ParseStatus::Success;
8984}
8985
8986//===----------------------------------------------------------------------===//
8987// Boolean holding registers
8988//===----------------------------------------------------------------------===//
8989
8990ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8991 return parseReg(Operands);
8992}
8993
8994//===----------------------------------------------------------------------===//
8995// mubuf
8996//===----------------------------------------------------------------------===//
8997
8998void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8999 const OperandVector &Operands,
9000 bool IsAtomic) {
9001 OptionalImmIndexMap OptionalIdx;
9002 unsigned FirstOperandIdx = 1;
9003 bool IsAtomicReturn = false;
9004
9005 if (IsAtomic) {
9006 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
9008 }
9009
9010 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
9011 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9012
9013 // Add the register arguments
9014 if (Op.isReg()) {
9015 Op.addRegOperands(Inst, 1);
9016 // Insert a tied src for atomic return dst.
9017 // This cannot be postponed as subsequent calls to
9018 // addImmOperands rely on correct number of MC operands.
9019 if (IsAtomicReturn && i == FirstOperandIdx)
9020 Op.addRegOperands(Inst, 1);
9021 continue;
9022 }
9023
9024 // Handle the case where soffset is an immediate
9025 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9026 Op.addImmOperands(Inst, 1);
9027 continue;
9028 }
9029
9030 // Handle tokens like 'offen' which are sometimes hard-coded into the
9031 // asm string. There are no MCInst operands for these.
9032 if (Op.isToken()) {
9033 continue;
9034 }
9035 assert(Op.isImm());
9036
9037 // Handle optional arguments
9038 OptionalIdx[Op.getImmTy()] = i;
9039 }
9040
9041 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
9042 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
9043 // Parse a dummy operand as a placeholder for the SWZ operand. This enforces
9044 // agreement between MCInstrDesc.getNumOperands and MCInst.getNumOperands.
9046}
9047
9048//===----------------------------------------------------------------------===//
9049// smrd
9050//===----------------------------------------------------------------------===//
9051
9052bool AMDGPUOperand::isSMRDOffset8() const {
9053 return isImmLiteral() && isUInt<8>(getImm());
9054}
9055
9056bool AMDGPUOperand::isSMEMOffset() const {
9057 // Offset range is checked later by validator.
9058 return isImmLiteral();
9059}
9060
9061bool AMDGPUOperand::isSMRDLiteralOffset() const {
9062 // 32-bit literals are only supported on CI and we only want to use them
9063 // when the offset is > 8-bits.
9064 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
9065}
9066
9067//===----------------------------------------------------------------------===//
9068// vop3
9069//===----------------------------------------------------------------------===//
9070
9071static bool ConvertOmodMul(int64_t &Mul) {
9072 if (Mul != 1 && Mul != 2 && Mul != 4)
9073 return false;
9074
9075 Mul >>= 1;
9076 return true;
9077}
9078
9079static bool ConvertOmodDiv(int64_t &Div) {
9080 if (Div == 1) {
9081 Div = 0;
9082 return true;
9083 }
9084
9085 if (Div == 2) {
9086 Div = 3;
9087 return true;
9088 }
9089
9090 return false;
9091}
9092
9093// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9094// This is intentional and ensures compatibility with sp3.
9095// See bug 35397 for details.
9096bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9097 if (BoundCtrl == 0 || BoundCtrl == 1) {
9098 if (!isGFX11Plus())
9099 BoundCtrl = 1;
9100 return true;
9101 }
9102 return false;
9103}
9104
9105void AMDGPUAsmParser::onBeginOfFile() {
9106 if (!getParser().getStreamer().getTargetStreamer() ||
9107 getSTI().getTargetTriple().getArch() == Triple::r600)
9108 return;
9109
9110 if (!getTargetStreamer().getTargetID())
9111 getTargetStreamer().initializeTargetID(getSTI(),
9112 getSTI().getFeatureString());
9113
9114 if (isHsaAbi(getSTI()))
9115 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9116}
9117
9118/// Parse AMDGPU specific expressions.
9119///
9120/// expr ::= or(expr, ...) |
9121/// max(expr, ...)
9122///
9123bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9124 using AGVK = AMDGPUMCExpr::VariantKind;
9125
9126 if (isToken(AsmToken::Identifier)) {
9127 StringRef TokenId = getTokenStr();
9128 AGVK VK = StringSwitch<AGVK>(TokenId)
9129 .Case("max", AGVK::AGVK_Max)
9130 .Case("or", AGVK::AGVK_Or)
9131 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
9132 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9133 .Case("alignto", AGVK::AGVK_AlignTo)
9134 .Case("occupancy", AGVK::AGVK_Occupancy)
9135 .Default(AGVK::AGVK_None);
9136
9137 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
9139 uint64_t CommaCount = 0;
9140 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9141 lex(); // Eat '('
9142 while (true) {
9143 if (trySkipToken(AsmToken::RParen)) {
9144 if (Exprs.empty()) {
9145 Error(getToken().getLoc(),
9146 "empty " + Twine(TokenId) + " expression");
9147 return true;
9148 }
9149 if (CommaCount + 1 != Exprs.size()) {
9150 Error(getToken().getLoc(),
9151 "mismatch of commas in " + Twine(TokenId) + " expression");
9152 return true;
9153 }
9154 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
9155 return false;
9156 }
9157 const MCExpr *Expr;
9158 if (getParser().parseExpression(Expr, EndLoc))
9159 return true;
9160 Exprs.push_back(Expr);
9161 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
9162 if (LastTokenWasComma)
9163 CommaCount++;
9164 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
9165 Error(getToken().getLoc(),
9166 "unexpected token in " + Twine(TokenId) + " expression");
9167 return true;
9168 }
9169 }
9170 }
9171 }
9172 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
9173}
9174
9175ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9176 StringRef Name = getTokenStr();
9177 if (Name == "mul") {
9178 return parseIntWithPrefix("mul", Operands,
9179 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
9180 }
9181
9182 if (Name == "div") {
9183 return parseIntWithPrefix("div", Operands,
9184 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
9185 }
9186
9187 return ParseStatus::NoMatch;
9188}
9189
9190// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9191// the number of src operands present, then copies that bit into src0_modifiers.
9192static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9193 int Opc = Inst.getOpcode();
9194 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9195 if (OpSelIdx == -1)
9196 return;
9197
9198 int SrcNum;
9199 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9200 AMDGPU::OpName::src2};
9201 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
9202 ++SrcNum)
9203 ;
9204 assert(SrcNum > 0);
9205
9206 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9207
9208 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
9209 if (DstIdx == -1)
9210 return;
9211
9212 const MCOperand &DstOp = Inst.getOperand(DstIdx);
9213 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
9214 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9215 if (DstOp.isReg() &&
9216 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
9218 ModVal |= SISrcMods::DST_OP_SEL;
9219 } else {
9220 if ((OpSel & (1 << SrcNum)) != 0)
9221 ModVal |= SISrcMods::DST_OP_SEL;
9222 }
9223 Inst.getOperand(ModIdx).setImm(ModVal);
9224}
9225
9226void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9227 const OperandVector &Operands) {
9228 cvtVOP3P(Inst, Operands);
9229 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9230}
9231
9232void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9233 OptionalImmIndexMap &OptionalIdx) {
9234 cvtVOP3P(Inst, Operands, OptionalIdx);
9235 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9236}
9237
9238static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9239 return
9240 // 1. This operand is input modifiers
9241 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9242 // 2. This is not last operand
9243 && Desc.NumOperands > (OpNum + 1)
9244 // 3. Next operand is register class
9245 && Desc.operands()[OpNum + 1].RegClass != -1
9246 // 4. Next register is not tied to any other operand
9247 && Desc.getOperandConstraint(OpNum + 1,
9249}
9250
9251void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) {
9252 unsigned Opc = Inst.getOpcode();
9253 constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9254 AMDGPU::OpName::src2};
9255 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9256 AMDGPU::OpName::src1_modifiers,
9257 AMDGPU::OpName::src2_modifiers};
9258 for (int J = 0; J < 3; ++J) {
9259 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9260 if (OpIdx == -1)
9261 // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but
9262 // no src1. So continue instead of break.
9263 continue;
9264
9265 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9266 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9267
9268 if ((OpSel & (1 << J)) != 0)
9269 ModVal |= SISrcMods::OP_SEL_0;
9270 // op_sel[3] is encoded in src0_modifiers.
9271 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9272 ModVal |= SISrcMods::DST_OP_SEL;
9273
9274 Inst.getOperand(ModIdx).setImm(ModVal);
9275 }
9276}
9277
9278void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9279{
9280 OptionalImmIndexMap OptionalIdx;
9281 unsigned Opc = Inst.getOpcode();
9282
9283 unsigned I = 1;
9284 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9285 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9286 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9287 }
9288
9289 for (unsigned E = Operands.size(); I != E; ++I) {
9290 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9292 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9293 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9294 Op.isInterpAttrChan()) {
9295 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9296 } else if (Op.isImmModifier()) {
9297 OptionalIdx[Op.getImmTy()] = I;
9298 } else {
9299 llvm_unreachable("unhandled operand type");
9300 }
9301 }
9302
9303 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9304 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9305 AMDGPUOperand::ImmTyHigh);
9306
9307 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9308 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9309 AMDGPUOperand::ImmTyClamp);
9310
9311 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9312 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9313 AMDGPUOperand::ImmTyOModSI);
9314
9315 // Some v_interp instructions use op_sel[3] for dst.
9316 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9317 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9318 AMDGPUOperand::ImmTyOpSel);
9319 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9320 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9321
9322 cvtOpSelHelper(Inst, OpSel);
9323 }
9324}
9325
9326void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9327{
9328 OptionalImmIndexMap OptionalIdx;
9329 unsigned Opc = Inst.getOpcode();
9330
9331 unsigned I = 1;
9332 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9333 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9334 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9335 }
9336
9337 for (unsigned E = Operands.size(); I != E; ++I) {
9338 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9340 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9341 } else if (Op.isImmModifier()) {
9342 OptionalIdx[Op.getImmTy()] = I;
9343 } else {
9344 llvm_unreachable("unhandled operand type");
9345 }
9346 }
9347
9348 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9349
9350 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9351 if (OpSelIdx != -1)
9352 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9353
9354 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9355
9356 if (OpSelIdx == -1)
9357 return;
9358
9359 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9360 cvtOpSelHelper(Inst, OpSel);
9361}
9362
9363void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9364 const OperandVector &Operands) {
9365 OptionalImmIndexMap OptionalIdx;
9366 unsigned Opc = Inst.getOpcode();
9367 unsigned I = 1;
9368 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9369
9370 const MCInstrDesc &Desc = MII.get(Opc);
9371
9372 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9373 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9374
9375 for (unsigned E = Operands.size(); I != E; ++I) {
9376 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9377 int NumOperands = Inst.getNumOperands();
9378 // The order of operands in MCInst and parsed operands are different.
9379 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9380 // indices for parsing scale values correctly.
9381 if (NumOperands == CbszOpIdx) {
9384 }
9385 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9386 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9387 } else if (Op.isImmModifier()) {
9388 OptionalIdx[Op.getImmTy()] = I;
9389 } else {
9390 Op.addRegOrImmOperands(Inst, 1);
9391 }
9392 }
9393
9394 // Insert CBSZ and BLGP operands for F8F6F4 variants
9395 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9396 if (CbszIdx != OptionalIdx.end()) {
9397 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9398 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9399 }
9400
9401 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9402 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9403 if (BlgpIdx != OptionalIdx.end()) {
9404 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9405 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9406 }
9407
9408 // Add dummy src_modifiers
9411
9412 // Handle op_sel fields
9413
9414 unsigned OpSel = 0;
9415 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9416 if (OpselIdx != OptionalIdx.end()) {
9417 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9418 .getImm();
9419 }
9420
9421 unsigned OpSelHi = 0;
9422 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9423 if (OpselHiIdx != OptionalIdx.end()) {
9424 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9425 .getImm();
9426 }
9427 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9428 AMDGPU::OpName::src1_modifiers};
9429
9430 for (unsigned J = 0; J < 2; ++J) {
9431 unsigned ModVal = 0;
9432 if (OpSel & (1 << J))
9433 ModVal |= SISrcMods::OP_SEL_0;
9434 if (OpSelHi & (1 << J))
9435 ModVal |= SISrcMods::OP_SEL_1;
9436
9437 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9438 Inst.getOperand(ModIdx).setImm(ModVal);
9439 }
9440}
9441
9442void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9443 OptionalImmIndexMap &OptionalIdx) {
9444 unsigned Opc = Inst.getOpcode();
9445
9446 unsigned I = 1;
9447 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9448 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9449 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9450 }
9451
9452 for (unsigned E = Operands.size(); I != E; ++I) {
9453 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9455 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9456 } else if (Op.isImmModifier()) {
9457 OptionalIdx[Op.getImmTy()] = I;
9458 } else {
9459 Op.addRegOrImmOperands(Inst, 1);
9460 }
9461 }
9462
9463 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))
9464 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9465 AMDGPUOperand::ImmTyScaleSel);
9466
9467 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9468 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9469 AMDGPUOperand::ImmTyClamp);
9470
9471 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9472 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9473 Inst.addOperand(Inst.getOperand(0));
9474 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9475 AMDGPUOperand::ImmTyByteSel);
9476 }
9477
9478 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9479 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9480 AMDGPUOperand::ImmTyOModSI);
9481
9482 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9483 // it has src2 register operand that is tied to dst operand
9484 // we don't allow modifiers for this operand in assembler so src2_modifiers
9485 // should be 0.
9486 if (isMAC(Opc)) {
9487 auto *it = Inst.begin();
9488 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9489 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9490 ++it;
9491 // Copy the operand to ensure it's not invalidated when Inst grows.
9492 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9493 }
9494}
9495
9496void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9497 OptionalImmIndexMap OptionalIdx;
9498 cvtVOP3(Inst, Operands, OptionalIdx);
9499}
9500
9501void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9502 OptionalImmIndexMap &OptIdx) {
9503 const int Opc = Inst.getOpcode();
9504 const MCInstrDesc &Desc = MII.get(Opc);
9505
9506 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9507
9508 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9509 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9510 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9511 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9512 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9513 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9514 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9515 Inst.addOperand(Inst.getOperand(0));
9516 }
9517
9518 // Adding vdst_in operand is already covered for these DPP instructions in
9519 // cvtVOP3DPP.
9520 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
9521 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9522 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9523 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9524 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9525 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9526 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9527 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9528 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9529 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9530 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9531 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp_gfx1250 ||
9532 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp8_gfx1250 ||
9533 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9534 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9535 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
9536 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
9537 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
9538 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
9539 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
9540 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
9541 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
9542 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
9543 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
9544 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
9545 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
9546 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
9547 Inst.addOperand(Inst.getOperand(0));
9548 }
9549
9550 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9551 if (BitOp3Idx != -1) {
9552 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9553 }
9554
9555 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9556 // instruction, and then figure out where to actually put the modifiers
9557
9558 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9559 if (OpSelIdx != -1) {
9560 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9561 }
9562
9563 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9564 if (OpSelHiIdx != -1) {
9565 int DefaultVal = IsPacked ? -1 : 0;
9566 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9567 DefaultVal);
9568 }
9569
9570 int MatrixAFMTIdx =
9571 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
9572 if (MatrixAFMTIdx != -1) {
9573 addOptionalImmOperand(Inst, Operands, OptIdx,
9574 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9575 }
9576
9577 int MatrixBFMTIdx =
9578 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
9579 if (MatrixBFMTIdx != -1) {
9580 addOptionalImmOperand(Inst, Operands, OptIdx,
9581 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9582 }
9583
9584 int MatrixAScaleIdx =
9585 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
9586 if (MatrixAScaleIdx != -1) {
9587 addOptionalImmOperand(Inst, Operands, OptIdx,
9588 AMDGPUOperand::ImmTyMatrixAScale, 0);
9589 }
9590
9591 int MatrixBScaleIdx =
9592 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
9593 if (MatrixBScaleIdx != -1) {
9594 addOptionalImmOperand(Inst, Operands, OptIdx,
9595 AMDGPUOperand::ImmTyMatrixBScale, 0);
9596 }
9597
9598 int MatrixAScaleFmtIdx =
9599 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9600 if (MatrixAScaleFmtIdx != -1) {
9601 addOptionalImmOperand(Inst, Operands, OptIdx,
9602 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9603 }
9604
9605 int MatrixBScaleFmtIdx =
9606 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9607 if (MatrixBScaleFmtIdx != -1) {
9608 addOptionalImmOperand(Inst, Operands, OptIdx,
9609 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9610 }
9611
9612 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9613 addOptionalImmOperand(Inst, Operands, OptIdx,
9614 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9615
9616 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9617 addOptionalImmOperand(Inst, Operands, OptIdx,
9618 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9619
9620 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9621 if (NegLoIdx != -1)
9622 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9623
9624 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9625 if (NegHiIdx != -1)
9626 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9627
9628 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9629 AMDGPU::OpName::src2};
9630 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9631 AMDGPU::OpName::src1_modifiers,
9632 AMDGPU::OpName::src2_modifiers};
9633
9634 unsigned OpSel = 0;
9635 unsigned OpSelHi = 0;
9636 unsigned NegLo = 0;
9637 unsigned NegHi = 0;
9638
9639 if (OpSelIdx != -1)
9640 OpSel = Inst.getOperand(OpSelIdx).getImm();
9641
9642 if (OpSelHiIdx != -1)
9643 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9644
9645 if (NegLoIdx != -1)
9646 NegLo = Inst.getOperand(NegLoIdx).getImm();
9647
9648 if (NegHiIdx != -1)
9649 NegHi = Inst.getOperand(NegHiIdx).getImm();
9650
9651 for (int J = 0; J < 3; ++J) {
9652 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9653 if (OpIdx == -1)
9654 break;
9655
9656 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9657
9658 if (ModIdx == -1)
9659 continue;
9660
9661 uint32_t ModVal = 0;
9662
9663 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9664 if (SrcOp.isReg() && getMRI()
9665 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9666 .contains(SrcOp.getReg())) {
9667 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9668 if (VGPRSuffixIsHi)
9669 ModVal |= SISrcMods::OP_SEL_0;
9670 } else {
9671 if ((OpSel & (1 << J)) != 0)
9672 ModVal |= SISrcMods::OP_SEL_0;
9673 }
9674
9675 if ((OpSelHi & (1 << J)) != 0)
9676 ModVal |= SISrcMods::OP_SEL_1;
9677
9678 if ((NegLo & (1 << J)) != 0)
9679 ModVal |= SISrcMods::NEG;
9680
9681 if ((NegHi & (1 << J)) != 0)
9682 ModVal |= SISrcMods::NEG_HI;
9683
9684 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9685 }
9686}
9687
9688void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9689 OptionalImmIndexMap OptIdx;
9690 cvtVOP3(Inst, Operands, OptIdx);
9691 cvtVOP3P(Inst, Operands, OptIdx);
9692}
9693
9695 unsigned i, unsigned Opc,
9696 AMDGPU::OpName OpName) {
9697 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9698 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9699 else
9700 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9701}
9702
9703void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9704 unsigned Opc = Inst.getOpcode();
9705
9706 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9707 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9708 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9709 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9710 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9711
9712 OptionalImmIndexMap OptIdx;
9713 for (unsigned i = 5; i < Operands.size(); ++i) {
9714 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9715 OptIdx[Op.getImmTy()] = i;
9716 }
9717
9718 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9719 addOptionalImmOperand(Inst, Operands, OptIdx,
9720 AMDGPUOperand::ImmTyIndexKey8bit);
9721
9722 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9723 addOptionalImmOperand(Inst, Operands, OptIdx,
9724 AMDGPUOperand::ImmTyIndexKey16bit);
9725
9726 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
9727 addOptionalImmOperand(Inst, Operands, OptIdx,
9728 AMDGPUOperand::ImmTyIndexKey32bit);
9729
9730 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9731 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9732
9733 cvtVOP3P(Inst, Operands, OptIdx);
9734}
9735
9736//===----------------------------------------------------------------------===//
9737// VOPD
9738//===----------------------------------------------------------------------===//
9739
9740ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9741 if (!hasVOPD(getSTI()))
9742 return ParseStatus::NoMatch;
9743
9744 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9745 SMLoc S = getLoc();
9746 lex();
9747 lex();
9748 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9749 SMLoc OpYLoc = getLoc();
9750 StringRef OpYName;
9751 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9752 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9753 return ParseStatus::Success;
9754 }
9755 return Error(OpYLoc, "expected a VOPDY instruction after ::");
9756 }
9757 return ParseStatus::NoMatch;
9758}
9759
9760// Create VOPD MCInst operands using parsed assembler operands.
9761void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9762 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9763
9764 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9765 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9767 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9768 return;
9769 }
9770 if (Op.isReg()) {
9771 Op.addRegOperands(Inst, 1);
9772 return;
9773 }
9774 if (Op.isImm()) {
9775 Op.addImmOperands(Inst, 1);
9776 return;
9777 }
9778 llvm_unreachable("Unhandled operand type in cvtVOPD");
9779 };
9780
9781 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
9782
9783 // MCInst operands are ordered as follows:
9784 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9785
9786 for (auto CompIdx : VOPD::COMPONENTS) {
9787 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9788 }
9789
9790 for (auto CompIdx : VOPD::COMPONENTS) {
9791 const auto &CInfo = InstInfo[CompIdx];
9792 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9793 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9794 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9795 if (CInfo.hasSrc2Acc())
9796 addOp(CInfo.getIndexOfDstInParsedOperands());
9797 }
9798
9799 int BitOp3Idx =
9800 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
9801 if (BitOp3Idx != -1) {
9802 OptionalImmIndexMap OptIdx;
9803 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
9804 if (Op.isImm())
9805 OptIdx[Op.getImmTy()] = Operands.size() - 1;
9806
9807 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9808 }
9809}
9810
9811//===----------------------------------------------------------------------===//
9812// dpp
9813//===----------------------------------------------------------------------===//
9814
9815bool AMDGPUOperand::isDPP8() const {
9816 return isImmTy(ImmTyDPP8);
9817}
9818
9819bool AMDGPUOperand::isDPPCtrl() const {
9820 using namespace AMDGPU::DPP;
9821
9822 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
9823 if (result) {
9824 int64_t Imm = getImm();
9825 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9826 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9827 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9828 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9829 (Imm == DppCtrl::WAVE_SHL1) ||
9830 (Imm == DppCtrl::WAVE_ROL1) ||
9831 (Imm == DppCtrl::WAVE_SHR1) ||
9832 (Imm == DppCtrl::WAVE_ROR1) ||
9833 (Imm == DppCtrl::ROW_MIRROR) ||
9834 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9835 (Imm == DppCtrl::BCAST15) ||
9836 (Imm == DppCtrl::BCAST31) ||
9837 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9838 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9839 }
9840 return false;
9841}
9842
9843//===----------------------------------------------------------------------===//
9844// mAI
9845//===----------------------------------------------------------------------===//
9846
9847bool AMDGPUOperand::isBLGP() const {
9848 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
9849}
9850
9851bool AMDGPUOperand::isS16Imm() const {
9852 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
9853}
9854
9855bool AMDGPUOperand::isU16Imm() const {
9856 return isImmLiteral() && isUInt<16>(getImm());
9857}
9858
9859//===----------------------------------------------------------------------===//
9860// dim
9861//===----------------------------------------------------------------------===//
9862
9863bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9864 // We want to allow "dim:1D" etc.,
9865 // but the initial 1 is tokenized as an integer.
9866 std::string Token;
9867 if (isToken(AsmToken::Integer)) {
9868 SMLoc Loc = getToken().getEndLoc();
9869 Token = std::string(getTokenStr());
9870 lex();
9871 if (getLoc() != Loc)
9872 return false;
9873 }
9874
9875 StringRef Suffix;
9876 if (!parseId(Suffix))
9877 return false;
9878 Token += Suffix;
9879
9880 StringRef DimId = Token;
9881 DimId.consume_front("SQ_RSRC_IMG_");
9882
9883 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
9884 if (!DimInfo)
9885 return false;
9886
9887 Encoding = DimInfo->Encoding;
9888 return true;
9889}
9890
9891ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9892 if (!isGFX10Plus())
9893 return ParseStatus::NoMatch;
9894
9895 SMLoc S = getLoc();
9896
9897 if (!trySkipId("dim", AsmToken::Colon))
9898 return ParseStatus::NoMatch;
9899
9900 unsigned Encoding;
9901 SMLoc Loc = getLoc();
9902 if (!parseDimId(Encoding))
9903 return Error(Loc, "invalid dim value");
9904
9905 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9906 AMDGPUOperand::ImmTyDim));
9907 return ParseStatus::Success;
9908}
9909
9910//===----------------------------------------------------------------------===//
9911// dpp
9912//===----------------------------------------------------------------------===//
9913
9914ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9915 SMLoc S = getLoc();
9916
9917 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9918 return ParseStatus::NoMatch;
9919
9920 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9921
9922 int64_t Sels[8];
9923
9924 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9925 return ParseStatus::Failure;
9926
9927 for (size_t i = 0; i < 8; ++i) {
9928 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9929 return ParseStatus::Failure;
9930
9931 SMLoc Loc = getLoc();
9932 if (getParser().parseAbsoluteExpression(Sels[i]))
9933 return ParseStatus::Failure;
9934 if (0 > Sels[i] || 7 < Sels[i])
9935 return Error(Loc, "expected a 3-bit value");
9936 }
9937
9938 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9939 return ParseStatus::Failure;
9940
9941 unsigned DPP8 = 0;
9942 for (size_t i = 0; i < 8; ++i)
9943 DPP8 |= (Sels[i] << (i * 3));
9944
9945 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9946 return ParseStatus::Success;
9947}
9948
9949bool
9950AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9951 const OperandVector &Operands) {
9952 if (Ctrl == "row_newbcast")
9953 return isGFX90A();
9954
9955 if (Ctrl == "row_share" ||
9956 Ctrl == "row_xmask")
9957 return isGFX10Plus();
9958
9959 if (Ctrl == "wave_shl" ||
9960 Ctrl == "wave_shr" ||
9961 Ctrl == "wave_rol" ||
9962 Ctrl == "wave_ror" ||
9963 Ctrl == "row_bcast")
9964 return isVI() || isGFX9();
9965
9966 return Ctrl == "row_mirror" ||
9967 Ctrl == "row_half_mirror" ||
9968 Ctrl == "quad_perm" ||
9969 Ctrl == "row_shl" ||
9970 Ctrl == "row_shr" ||
9971 Ctrl == "row_ror";
9972}
9973
9974int64_t
9975AMDGPUAsmParser::parseDPPCtrlPerm() {
9976 // quad_perm:[%d,%d,%d,%d]
9977
9978 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9979 return -1;
9980
9981 int64_t Val = 0;
9982 for (int i = 0; i < 4; ++i) {
9983 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9984 return -1;
9985
9986 int64_t Temp;
9987 SMLoc Loc = getLoc();
9988 if (getParser().parseAbsoluteExpression(Temp))
9989 return -1;
9990 if (Temp < 0 || Temp > 3) {
9991 Error(Loc, "expected a 2-bit value");
9992 return -1;
9993 }
9994
9995 Val += (Temp << i * 2);
9996 }
9997
9998 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9999 return -1;
10000
10001 return Val;
10002}
10003
10004int64_t
10005AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10006 using namespace AMDGPU::DPP;
10007
10008 // sel:%d
10009
10010 int64_t Val;
10011 SMLoc Loc = getLoc();
10012
10013 if (getParser().parseAbsoluteExpression(Val))
10014 return -1;
10015
10016 struct DppCtrlCheck {
10017 int64_t Ctrl;
10018 int Lo;
10019 int Hi;
10020 };
10021
10022 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
10023 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10024 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10025 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10026 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10027 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10028 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10029 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10030 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10031 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10032 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10033 .Default({-1, 0, 0});
10034
10035 bool Valid;
10036 if (Check.Ctrl == -1) {
10037 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
10038 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10039 } else {
10040 Valid = Check.Lo <= Val && Val <= Check.Hi;
10041 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
10042 }
10043
10044 if (!Valid) {
10045 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
10046 return -1;
10047 }
10048
10049 return Val;
10050}
10051
10052ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10053 using namespace AMDGPU::DPP;
10054
10055 if (!isToken(AsmToken::Identifier) ||
10056 !isSupportedDPPCtrl(getTokenStr(), Operands))
10057 return ParseStatus::NoMatch;
10058
10059 SMLoc S = getLoc();
10060 int64_t Val = -1;
10061 StringRef Ctrl;
10062
10063 parseId(Ctrl);
10064
10065 if (Ctrl == "row_mirror") {
10066 Val = DppCtrl::ROW_MIRROR;
10067 } else if (Ctrl == "row_half_mirror") {
10068 Val = DppCtrl::ROW_HALF_MIRROR;
10069 } else {
10070 if (skipToken(AsmToken::Colon, "expected a colon")) {
10071 if (Ctrl == "quad_perm") {
10072 Val = parseDPPCtrlPerm();
10073 } else {
10074 Val = parseDPPCtrlSel(Ctrl);
10075 }
10076 }
10077 }
10078
10079 if (Val == -1)
10080 return ParseStatus::Failure;
10081
10082 Operands.push_back(
10083 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10084 return ParseStatus::Success;
10085}
10086
10087void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10088 bool IsDPP8) {
10089 OptionalImmIndexMap OptionalIdx;
10090 unsigned Opc = Inst.getOpcode();
10091 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10092
10093 // MAC instructions are special because they have 'old'
10094 // operand which is not tied to dst (but assumed to be).
10095 // They also have dummy unused src2_modifiers.
10096 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
10097 int Src2ModIdx =
10098 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
10099 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10100 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
10101
10102 unsigned I = 1;
10103 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10104 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10105 }
10106
10107 int Fi = 0;
10108 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
10109 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10110 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10111 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10112 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
10113
10114 for (unsigned E = Operands.size(); I != E; ++I) {
10115
10116 if (IsMAC) {
10117 int NumOperands = Inst.getNumOperands();
10118 if (OldIdx == NumOperands) {
10119 // Handle old operand
10120 constexpr int DST_IDX = 0;
10121 Inst.addOperand(Inst.getOperand(DST_IDX));
10122 } else if (Src2ModIdx == NumOperands) {
10123 // Add unused dummy src2_modifiers
10125 }
10126 }
10127
10128 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10129 Inst.addOperand(Inst.getOperand(0));
10130 }
10131
10132 if (IsVOP3CvtSrDpp) {
10133 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10135 Inst.addOperand(MCOperand::createReg(MCRegister()));
10136 }
10137 }
10138
10139 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10141 if (TiedTo != -1) {
10142 assert((unsigned)TiedTo < Inst.getNumOperands());
10143 // handle tied old or src2 for MAC instructions
10144 Inst.addOperand(Inst.getOperand(TiedTo));
10145 }
10146 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10147 // Add the register arguments
10148 if (IsDPP8 && Op.isDppFI()) {
10149 Fi = Op.getImm();
10150 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10151 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10152 } else if (Op.isReg()) {
10153 Op.addRegOperands(Inst, 1);
10154 } else if (Op.isImm() &&
10155 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10156 Op.addImmOperands(Inst, 1);
10157 } else if (Op.isImm()) {
10158 OptionalIdx[Op.getImmTy()] = I;
10159 } else {
10160 llvm_unreachable("unhandled operand type");
10161 }
10162 }
10163
10164 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10165 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10166 AMDGPUOperand::ImmTyClamp);
10167
10168 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
10169 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10170 Inst.addOperand(Inst.getOperand(0));
10171 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10172 AMDGPUOperand::ImmTyByteSel);
10173 }
10174
10175 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10176 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
10177
10178 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10179 cvtVOP3P(Inst, Operands, OptionalIdx);
10180 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10181 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10182 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
10183 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
10184 }
10185
10186 if (IsDPP8) {
10187 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
10188 using namespace llvm::AMDGPU::DPP;
10189 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10190 } else {
10191 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
10192 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10193 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10194 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10195
10196 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
10197 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10198 AMDGPUOperand::ImmTyDppFI);
10199 }
10200}
10201
10202void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10203 OptionalImmIndexMap OptionalIdx;
10204
10205 unsigned I = 1;
10206 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10207 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10208 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10209 }
10210
10211 int Fi = 0;
10212 for (unsigned E = Operands.size(); I != E; ++I) {
10213 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10215 if (TiedTo != -1) {
10216 assert((unsigned)TiedTo < Inst.getNumOperands());
10217 // handle tied old or src2 for MAC instructions
10218 Inst.addOperand(Inst.getOperand(TiedTo));
10219 }
10220 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10221 // Add the register arguments
10222 if (Op.isReg() && validateVccOperand(Op.getReg())) {
10223 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10224 // Skip it.
10225 continue;
10226 }
10227
10228 if (IsDPP8) {
10229 if (Op.isDPP8()) {
10230 Op.addImmOperands(Inst, 1);
10231 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10232 Op.addRegWithFPInputModsOperands(Inst, 2);
10233 } else if (Op.isDppFI()) {
10234 Fi = Op.getImm();
10235 } else if (Op.isReg()) {
10236 Op.addRegOperands(Inst, 1);
10237 } else {
10238 llvm_unreachable("Invalid operand type");
10239 }
10240 } else {
10242 Op.addRegWithFPInputModsOperands(Inst, 2);
10243 } else if (Op.isReg()) {
10244 Op.addRegOperands(Inst, 1);
10245 } else if (Op.isDPPCtrl()) {
10246 Op.addImmOperands(Inst, 1);
10247 } else if (Op.isImm()) {
10248 // Handle optional arguments
10249 OptionalIdx[Op.getImmTy()] = I;
10250 } else {
10251 llvm_unreachable("Invalid operand type");
10252 }
10253 }
10254 }
10255
10256 if (IsDPP8) {
10257 using namespace llvm::AMDGPU::DPP;
10258 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10259 } else {
10260 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10261 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10262 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10263 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
10264 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10265 AMDGPUOperand::ImmTyDppFI);
10266 }
10267 }
10268}
10269
10270//===----------------------------------------------------------------------===//
10271// sdwa
10272//===----------------------------------------------------------------------===//
10273
10274ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10275 StringRef Prefix,
10276 AMDGPUOperand::ImmTy Type) {
10277 return parseStringOrIntWithPrefix(
10278 Operands, Prefix,
10279 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10280 Type);
10281}
10282
10283ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10284 return parseStringOrIntWithPrefix(
10285 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10286 AMDGPUOperand::ImmTySDWADstUnused);
10287}
10288
10289void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10290 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
10291}
10292
10293void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10294 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
10295}
10296
10297void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10298 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
10299}
10300
10301void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10302 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
10303}
10304
10305void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10306 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
10307}
10308
10309void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10310 uint64_t BasicInstType,
10311 bool SkipDstVcc,
10312 bool SkipSrcVcc) {
10313 using namespace llvm::AMDGPU::SDWA;
10314
10315 OptionalImmIndexMap OptionalIdx;
10316 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10317 bool SkippedVcc = false;
10318
10319 unsigned I = 1;
10320 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10321 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10322 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10323 }
10324
10325 for (unsigned E = Operands.size(); I != E; ++I) {
10326 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10327 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10328 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10329 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10330 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10331 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10332 // Skip VCC only if we didn't skip it on previous iteration.
10333 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10334 if (BasicInstType == SIInstrFlags::VOP2 &&
10335 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10336 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10337 SkippedVcc = true;
10338 continue;
10339 }
10340 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
10341 SkippedVcc = true;
10342 continue;
10343 }
10344 }
10346 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10347 } else if (Op.isImm()) {
10348 // Handle optional arguments
10349 OptionalIdx[Op.getImmTy()] = I;
10350 } else {
10351 llvm_unreachable("Invalid operand type");
10352 }
10353 SkippedVcc = false;
10354 }
10355
10356 const unsigned Opc = Inst.getOpcode();
10357 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10358 Opc != AMDGPU::V_NOP_sdwa_vi) {
10359 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10360 switch (BasicInstType) {
10361 case SIInstrFlags::VOP1:
10362 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10363 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10364 AMDGPUOperand::ImmTyClamp, 0);
10365
10366 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10367 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10368 AMDGPUOperand::ImmTyOModSI, 0);
10369
10370 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10371 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10372 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10373
10374 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10375 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10376 AMDGPUOperand::ImmTySDWADstUnused,
10377 DstUnused::UNUSED_PRESERVE);
10378
10379 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10380 break;
10381
10382 case SIInstrFlags::VOP2:
10383 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10384 AMDGPUOperand::ImmTyClamp, 0);
10385
10386 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10387 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10388
10389 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10390 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10391 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10392 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10393 break;
10394
10395 case SIInstrFlags::VOPC:
10396 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10397 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10398 AMDGPUOperand::ImmTyClamp, 0);
10399 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10400 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10401 break;
10402
10403 default:
10404 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10405 }
10406 }
10407
10408 // special case v_mac_{f16, f32}:
10409 // it has src2 register operand that is tied to dst operand
10410 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10411 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10412 auto *it = Inst.begin();
10413 std::advance(
10414 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10415 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10416 }
10417}
10418
10419/// Force static initialization.
10420extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10425
10426#define GET_MATCHER_IMPLEMENTATION
10427#define GET_MNEMONIC_SPELL_CHECKER
10428#define GET_MNEMONIC_CHECKER
10429#include "AMDGPUGenAsmMatcher.inc"
10430
10431ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10432 unsigned MCK) {
10433 switch (MCK) {
10434 case MCK_addr64:
10435 return parseTokenOp("addr64", Operands);
10436 case MCK_done:
10437 return parseTokenOp("done", Operands);
10438 case MCK_idxen:
10439 return parseTokenOp("idxen", Operands);
10440 case MCK_lds:
10441 return parseTokenOp("lds", Operands);
10442 case MCK_offen:
10443 return parseTokenOp("offen", Operands);
10444 case MCK_off:
10445 return parseTokenOp("off", Operands);
10446 case MCK_row_95_en:
10447 return parseTokenOp("row_en", Operands);
10448 case MCK_gds:
10449 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10450 case MCK_tfe:
10451 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10452 }
10453 return tryCustomParseOperand(Operands, MCK);
10454}
10455
10456// This function should be defined after auto-generated include so that we have
10457// MatchClassKind enum defined
10458unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10459 unsigned Kind) {
10460 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10461 // But MatchInstructionImpl() expects to meet token and fails to validate
10462 // operand. This method checks if we are given immediate operand but expect to
10463 // get corresponding token.
10464 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10465 switch (Kind) {
10466 case MCK_addr64:
10467 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10468 case MCK_gds:
10469 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10470 case MCK_lds:
10471 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10472 case MCK_idxen:
10473 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10474 case MCK_offen:
10475 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10476 case MCK_tfe:
10477 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10478 case MCK_SSrc_b32:
10479 // When operands have expression values, they will return true for isToken,
10480 // because it is not possible to distinguish between a token and an
10481 // expression at parse time. MatchInstructionImpl() will always try to
10482 // match an operand as a token, when isToken returns true, and when the
10483 // name of the expression is not a valid token, the match will fail,
10484 // so we need to handle it here.
10485 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10486 case MCK_SSrc_f32:
10487 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10488 case MCK_SOPPBrTarget:
10489 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10490 case MCK_VReg32OrOff:
10491 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10492 case MCK_InterpSlot:
10493 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10494 case MCK_InterpAttr:
10495 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10496 case MCK_InterpAttrChan:
10497 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10498 case MCK_SReg_64:
10499 case MCK_SReg_64_XEXEC:
10500 // Null is defined as a 32-bit register but
10501 // it should also be enabled with 64-bit operands or larger.
10502 // The following code enables it for SReg_64 and larger operands
10503 // used as source and destination. Remaining source
10504 // operands are handled in isInlinableImm.
10505 case MCK_SReg_96:
10506 case MCK_SReg_128:
10507 case MCK_SReg_256:
10508 case MCK_SReg_512:
10509 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10510 default:
10511 return Match_InvalidOperand;
10512 }
10513}
10514
10515//===----------------------------------------------------------------------===//
10516// endpgm
10517//===----------------------------------------------------------------------===//
10518
10519ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10520 SMLoc S = getLoc();
10521 int64_t Imm = 0;
10522
10523 if (!parseExpr(Imm)) {
10524 // The operand is optional, if not present default to 0
10525 Imm = 0;
10526 }
10527
10528 if (!isUInt<16>(Imm))
10529 return Error(S, "expected a 16-bit value");
10530
10531 Operands.push_back(
10532 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10533 return ParseStatus::Success;
10534}
10535
10536bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10537
10538//===----------------------------------------------------------------------===//
10539// Split Barrier
10540//===----------------------------------------------------------------------===//
10541
10542bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
@ Default
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:231
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
unsigned unsigned DefaultVal
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
BinaryOperator * Mul
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6053
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
Register getReg() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
SMLoc getLoc() const
Definition MCInst.h:208
void setLoc(SMLoc loc)
Definition MCInst.h:207
unsigned getOpcode() const
Definition MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232
void addOperand(const MCOperand Op)
Definition MCInst.h:215
iterator begin()
Definition MCInst.h:227
size_t size() const
Definition MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Definition MCInstrInfo.h:80
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
void setImm(int64_t Val)
Definition MCInst.h:89
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
bool isImm() const
Definition MCInst.h:66
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
const MCExpr * getExpr() const
Definition MCInst.h:118
bool isExpr() const
Definition MCInst.h:69
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
void setRedefinable(bool Value)
Mark this symbol as redefinable.
Definition MCSymbol.h:210
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Definition Register.h:100
Represents a location in source code.
Definition SMLoc.h:22
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:35
constexpr const char * getPointer() const
Definition SMLoc.h:33
constexpr bool isValid() const
Definition SMLoc.h:28
SMLoc Start
Definition SMLoc.h:49
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:854
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:657
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:637
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:273
bool contains(StringRef key) const
Check if the set contains the given key.
Definition StringSet.h:60
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
static constexpr CustomOperand Operands[]
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:231
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:202
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:209
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:224
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:206
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:201
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:218
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:216
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:205
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:236
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:237
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:212
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:204
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:217
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:213
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:238
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:220
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:203
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:228
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1430
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ OPERAND_IMMEDIATE
Definition MCInstrDesc.h:61
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
@ Valid
The data is already valid.
Context & getContext() const
Definition BasicBlock.h:99
bool isNull(StringRef S)
Definition YAMLTraits.h:570
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition Error.h:1113
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
StringMapEntry< Value * > ValueName
Definition Value.h:56
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:206
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
void PrintError(const Twine &Msg)
Definition Error.cpp:104
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:267
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
Op::Description Desc
Target & getTheR600Target()
The target for R600 GPUs.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:27
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
#define N
RegisterKind Kind
StringLiteral Name
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...