LLVM 23.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/MC/MCSymbol.h"
46#include <optional>
47
48using namespace llvm;
49using namespace llvm::AMDGPU;
50using namespace llvm::amdhsa;
51
52namespace {
53
54class AMDGPUAsmParser;
55
56enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
57
58//===----------------------------------------------------------------------===//
59// Operand
60//===----------------------------------------------------------------------===//
61
62class AMDGPUOperand : public MCParsedAsmOperand {
63 enum KindTy {
64 Token,
65 Immediate,
66 Register,
67 Expression
68 } Kind;
69
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
72
73public:
74 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
75 : Kind(Kind_), AsmParser(AsmParser_) {}
76
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
78
79 struct Modifiers {
80 bool Abs = false;
81 bool Neg = false;
82 bool Sext = false;
83 LitModifier Lit = LitModifier::None;
84
85 bool hasFPModifiers() const { return Abs || Neg; }
86 bool hasIntModifiers() const { return Sext; }
87 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
88
89 int64_t getFPModifiersOperand() const {
90 int64_t Operand = 0;
91 Operand |= Abs ? SISrcMods::ABS : 0u;
92 Operand |= Neg ? SISrcMods::NEG : 0u;
93 return Operand;
94 }
95
96 int64_t getIntModifiersOperand() const {
97 int64_t Operand = 0;
98 Operand |= Sext ? SISrcMods::SEXT : 0u;
99 return Operand;
100 }
101
102 int64_t getModifiersOperand() const {
103 assert(!(hasFPModifiers() && hasIntModifiers())
104 && "fp and int modifiers should not be used simultaneously");
105 if (hasFPModifiers())
106 return getFPModifiersOperand();
107 if (hasIntModifiers())
108 return getIntModifiersOperand();
109 return 0;
110 }
111
112 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
113 };
114
115 enum ImmTy {
116 ImmTyNone,
117 ImmTyGDS,
118 ImmTyLDS,
119 ImmTyOffen,
120 ImmTyIdxen,
121 ImmTyAddr64,
122 ImmTyOffset,
123 ImmTyInstOffset,
124 ImmTyOffset0,
125 ImmTyOffset1,
126 ImmTySMEMOffsetMod,
127 ImmTyCPol,
128 ImmTyTFE,
129 ImmTyD16,
130 ImmTyClamp,
131 ImmTyOModSI,
132 ImmTySDWADstSel,
133 ImmTySDWASrc0Sel,
134 ImmTySDWASrc1Sel,
135 ImmTySDWADstUnused,
136 ImmTyDMask,
137 ImmTyDim,
138 ImmTyUNorm,
139 ImmTyDA,
140 ImmTyR128A16,
141 ImmTyA16,
142 ImmTyLWE,
143 ImmTyExpTgt,
144 ImmTyExpCompr,
145 ImmTyExpVM,
146 ImmTyFORMAT,
147 ImmTyHwreg,
148 ImmTyOff,
149 ImmTySendMsg,
150 ImmTyInterpSlot,
151 ImmTyInterpAttr,
152 ImmTyInterpAttrChan,
153 ImmTyOpSel,
154 ImmTyOpSelHi,
155 ImmTyNegLo,
156 ImmTyNegHi,
157 ImmTyIndexKey8bit,
158 ImmTyIndexKey16bit,
159 ImmTyIndexKey32bit,
160 ImmTyDPP8,
161 ImmTyDppCtrl,
162 ImmTyDppRowMask,
163 ImmTyDppBankMask,
164 ImmTyDppBoundCtrl,
165 ImmTyDppFI,
166 ImmTySwizzle,
167 ImmTyGprIdxMode,
168 ImmTyHigh,
169 ImmTyBLGP,
170 ImmTyCBSZ,
171 ImmTyABID,
172 ImmTyEndpgm,
173 ImmTyWaitVDST,
174 ImmTyWaitEXP,
175 ImmTyWaitVAVDst,
176 ImmTyWaitVMVSrc,
177 ImmTyBitOp3,
178 ImmTyMatrixAFMT,
179 ImmTyMatrixBFMT,
180 ImmTyMatrixAScale,
181 ImmTyMatrixBScale,
182 ImmTyMatrixAScaleFmt,
183 ImmTyMatrixBScaleFmt,
184 ImmTyMatrixAReuse,
185 ImmTyMatrixBReuse,
186 ImmTyScaleSel,
187 ImmTyByteSel,
188 };
189
190private:
191 struct TokOp {
192 const char *Data;
193 unsigned Length;
194 };
195
196 struct ImmOp {
197 int64_t Val;
198 ImmTy Type;
199 bool IsFPImm;
200 Modifiers Mods;
201 };
202
203 struct RegOp {
204 MCRegister RegNo;
205 Modifiers Mods;
206 };
207
208 union {
209 TokOp Tok;
210 ImmOp Imm;
211 RegOp Reg;
212 const MCExpr *Expr;
213 };
214
215 // The index of the associated MCInst operand.
216 mutable int MCOpIdx = -1;
217
218public:
219 bool isToken() const override { return Kind == Token; }
220
221 bool isSymbolRefExpr() const {
222 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
223 }
224
225 bool isImm() const override {
226 return Kind == Immediate;
227 }
228
229 bool isInlinableImm(MVT type) const;
230 bool isLiteralImm(MVT type) const;
231
232 bool isRegKind() const {
233 return Kind == Register;
234 }
235
236 bool isReg() const override {
237 return isRegKind() && !hasModifiers();
238 }
239
240 bool isRegOrInline(unsigned RCID, MVT type) const {
241 return isRegClass(RCID) || isInlinableImm(type);
242 }
243
244 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
245 return isRegOrInline(RCID, type) || isLiteralImm(type);
246 }
247
248 bool isRegOrImmWithInt16InputMods() const {
249 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
250 }
251
252 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
254 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
255 }
256
257 bool isRegOrImmWithInt32InputMods() const {
258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259 }
260
261 bool isRegOrInlineImmWithInt16InputMods() const {
262 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
263 }
264
265 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
266 return isRegOrInline(
267 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
268 }
269
270 bool isRegOrInlineImmWithInt32InputMods() const {
271 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
272 }
273
274 bool isRegOrImmWithInt64InputMods() const {
275 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
276 }
277
278 bool isRegOrImmWithFP16InputMods() const {
279 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
280 }
281
282 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
284 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
285 }
286
287 bool isRegOrImmWithFP32InputMods() const {
288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
289 }
290
291 bool isRegOrImmWithFP64InputMods() const {
292 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
293 }
294
295 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
296 return isRegOrInline(
297 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
298 }
299
300 bool isRegOrInlineImmWithFP32InputMods() const {
301 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
302 }
303
304 bool isRegOrInlineImmWithFP64InputMods() const {
305 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
306 }
307
308 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
309
310 bool isVRegWithFP32InputMods() const {
311 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
312 }
313
314 bool isVRegWithFP64InputMods() const {
315 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
316 }
317
318 bool isPackedFP16InputMods() const {
319 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
320 }
321
322 bool isPackedVGPRFP32InputMods() const {
323 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
324 }
325
326 bool isVReg() const {
327 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
328 isRegClass(AMDGPU::VReg_64RegClassID) ||
329 isRegClass(AMDGPU::VReg_96RegClassID) ||
330 isRegClass(AMDGPU::VReg_128RegClassID) ||
331 isRegClass(AMDGPU::VReg_160RegClassID) ||
332 isRegClass(AMDGPU::VReg_192RegClassID) ||
333 isRegClass(AMDGPU::VReg_256RegClassID) ||
334 isRegClass(AMDGPU::VReg_512RegClassID) ||
335 isRegClass(AMDGPU::VReg_1024RegClassID);
336 }
337
338 bool isVReg32() const {
339 return isRegClass(AMDGPU::VGPR_32RegClassID);
340 }
341
342 bool isVReg32OrOff() const {
343 return isOff() || isVReg32();
344 }
345
346 bool isNull() const {
347 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
348 }
349
350 bool isAV_LdSt_32_Align2_RegOp() const {
351 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
352 isRegClass(AMDGPU::AGPR_32RegClassID);
353 }
354
355 bool isVRegWithInputMods() const;
356 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
357 template <bool IsFake16> bool isT16VRegWithInputMods() const;
358
359 bool isSDWAOperand(MVT type) const;
360 bool isSDWAFP16Operand() const;
361 bool isSDWAFP32Operand() const;
362 bool isSDWAInt16Operand() const;
363 bool isSDWAInt32Operand() const;
364
365 bool isImmTy(ImmTy ImmT) const {
366 return isImm() && Imm.Type == ImmT;
367 }
368
369 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
370
371 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
372
373 bool isImmModifier() const {
374 return isImm() && Imm.Type != ImmTyNone;
375 }
376
377 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
378 bool isDim() const { return isImmTy(ImmTyDim); }
379 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
380 bool isOff() const { return isImmTy(ImmTyOff); }
381 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
382 bool isOffen() const { return isImmTy(ImmTyOffen); }
383 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
384 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
385 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
386 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
387 bool isGDS() const { return isImmTy(ImmTyGDS); }
388 bool isLDS() const { return isImmTy(ImmTyLDS); }
389 bool isCPol() const { return isImmTy(ImmTyCPol); }
390 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
391 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
392 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
393 bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
394 bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
395 bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
396 bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
397 bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
398 bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
399 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
400 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
401 bool isTFE() const { return isImmTy(ImmTyTFE); }
402 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
403 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
404 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
405 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
406 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
407 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
408 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
409 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
410 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
411 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
412 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
413 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
414 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
415 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
416
417 bool isRegOrImm() const {
418 return isReg() || isImm();
419 }
420
421 bool isRegClass(unsigned RCID) const;
422
423 bool isInlineValue() const;
424
425 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
426 return isRegOrInline(RCID, type) && !hasModifiers();
427 }
428
429 bool isSCSrcB16() const {
430 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
431 }
432
433 bool isSCSrcV2B16() const {
434 return isSCSrcB16();
435 }
436
437 bool isSCSrc_b32() const {
438 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
439 }
440
441 bool isSCSrc_b64() const {
442 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
443 }
444
445 bool isBoolReg() const;
446
447 bool isSCSrcF16() const {
448 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
449 }
450
451 bool isSCSrcV2F16() const {
452 return isSCSrcF16();
453 }
454
455 bool isSCSrcF32() const {
456 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
457 }
458
459 bool isSCSrcF64() const {
460 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
461 }
462
463 bool isSSrc_b32() const {
464 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
465 }
466
467 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
468
469 bool isSSrcV2B16() const {
470 llvm_unreachable("cannot happen");
471 return isSSrc_b16();
472 }
473
474 bool isSSrc_b64() const {
475 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
476 // See isVSrc64().
477 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
478 (((const MCTargetAsmParser *)AsmParser)
479 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
480 isExpr());
481 }
482
483 bool isSSrc_f32() const {
484 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
485 }
486
487 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
488
489 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
490
491 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
492
493 bool isSSrcV2F16() const {
494 llvm_unreachable("cannot happen");
495 return isSSrc_f16();
496 }
497
498 bool isSSrcV2FP32() const {
499 llvm_unreachable("cannot happen");
500 return isSSrc_f32();
501 }
502
503 bool isSCSrcV2FP32() const {
504 llvm_unreachable("cannot happen");
505 return isSCSrcF32();
506 }
507
508 bool isSSrcV2INT32() const {
509 llvm_unreachable("cannot happen");
510 return isSSrc_b32();
511 }
512
513 bool isSCSrcV2INT32() const {
514 llvm_unreachable("cannot happen");
515 return isSCSrc_b32();
516 }
517
518 bool isSSrcOrLds_b32() const {
519 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
520 isLiteralImm(MVT::i32) || isExpr();
521 }
522
523 bool isVCSrc_b32() const {
524 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
525 }
526
527 bool isVCSrc_b32_Lo256() const {
528 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
529 }
530
531 bool isVCSrc_b64_Lo256() const {
532 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
533 }
534
535 bool isVCSrc_b64() const {
536 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
537 }
538
539 bool isVCSrcT_b16() const {
540 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
541 }
542
543 bool isVCSrcTB16_Lo128() const {
544 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
545 }
546
547 bool isVCSrcFake16B16_Lo128() const {
548 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
549 }
550
551 bool isVCSrc_b16() const {
552 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
553 }
554
555 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
556
557 bool isVCSrc_f32() const {
558 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
559 }
560
561 bool isVCSrc_f64() const {
562 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
563 }
564
565 bool isVCSrcTBF16() const {
566 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
567 }
568
569 bool isVCSrcT_f16() const {
570 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
571 }
572
573 bool isVCSrcT_bf16() const {
574 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
575 }
576
577 bool isVCSrcTBF16_Lo128() const {
578 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
579 }
580
581 bool isVCSrcTF16_Lo128() const {
582 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
583 }
584
585 bool isVCSrcFake16BF16_Lo128() const {
586 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
587 }
588
589 bool isVCSrcFake16F16_Lo128() const {
590 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
591 }
592
593 bool isVCSrc_bf16() const {
594 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
595 }
596
597 bool isVCSrc_f16() const {
598 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
599 }
600
601 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
602
603 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
604
605 bool isVSrc_b32() const {
606 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
607 }
608
609 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
610
611 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
612
613 bool isVSrcT_b16_Lo128() const {
614 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
615 }
616
617 bool isVSrcFake16_b16_Lo128() const {
618 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
619 }
620
621 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
622
623 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
624
625 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
626
627 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
628
629 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
630
631 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
632
633 bool isVSrc_f32() const {
634 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
635 }
636
637 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
638
639 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
640
641 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
642
643 bool isVSrcT_bf16_Lo128() const {
644 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
645 }
646
647 bool isVSrcT_f16_Lo128() const {
648 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
649 }
650
651 bool isVSrcFake16_bf16_Lo128() const {
652 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
653 }
654
655 bool isVSrcFake16_f16_Lo128() const {
656 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
657 }
658
659 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
660
661 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
662
663 bool isVSrc_v2bf16() const {
664 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
665 }
666
667 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
668
669 bool isVSrc_v2f16_splat() const { return isVSrc_v2f16(); }
670
671 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
672
673 bool isVISrcB32() const {
674 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
675 }
676
677 bool isVISrcB16() const {
678 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
679 }
680
681 bool isVISrcV2B16() const {
682 return isVISrcB16();
683 }
684
685 bool isVISrcF32() const {
686 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
687 }
688
689 bool isVISrcF16() const {
690 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
691 }
692
693 bool isVISrcV2F16() const {
694 return isVISrcF16() || isVISrcB32();
695 }
696
697 bool isVISrc_64_bf16() const {
698 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
699 }
700
701 bool isVISrc_64_f16() const {
702 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
703 }
704
705 bool isVISrc_64_b32() const {
706 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
707 }
708
709 bool isVISrc_64B64() const {
710 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
711 }
712
713 bool isVISrc_64_f64() const {
714 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
715 }
716
717 bool isVISrc_64V2FP32() const {
718 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
719 }
720
721 bool isVISrc_64V2INT32() const {
722 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
723 }
724
725 bool isVISrc_256_b32() const {
726 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
727 }
728
729 bool isVISrc_256_f32() const {
730 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
731 }
732
733 bool isVISrc_256B64() const {
734 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
735 }
736
737 bool isVISrc_256_f64() const {
738 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
739 }
740
741 bool isVISrc_512_f64() const {
742 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
743 }
744
745 bool isVISrc_128B16() const {
746 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
747 }
748
749 bool isVISrc_128V2B16() const {
750 return isVISrc_128B16();
751 }
752
753 bool isVISrc_128_b32() const {
754 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
755 }
756
757 bool isVISrc_128_f32() const {
758 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
759 }
760
761 bool isVISrc_256V2FP32() const {
762 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
763 }
764
765 bool isVISrc_256V2INT32() const {
766 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
767 }
768
769 bool isVISrc_512_b32() const {
770 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
771 }
772
773 bool isVISrc_512B16() const {
774 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
775 }
776
777 bool isVISrc_512V2B16() const {
778 return isVISrc_512B16();
779 }
780
781 bool isVISrc_512_f32() const {
782 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
783 }
784
785 bool isVISrc_512F16() const {
786 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
787 }
788
789 bool isVISrc_512V2F16() const {
790 return isVISrc_512F16() || isVISrc_512_b32();
791 }
792
793 bool isVISrc_1024_b32() const {
794 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
795 }
796
797 bool isVISrc_1024B16() const {
798 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
799 }
800
801 bool isVISrc_1024V2B16() const {
802 return isVISrc_1024B16();
803 }
804
805 bool isVISrc_1024_f32() const {
806 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
807 }
808
809 bool isVISrc_1024F16() const {
810 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
811 }
812
813 bool isVISrc_1024V2F16() const {
814 return isVISrc_1024F16() || isVISrc_1024_b32();
815 }
816
817 bool isAISrcB32() const {
818 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
819 }
820
821 bool isAISrcB16() const {
822 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
823 }
824
825 bool isAISrcV2B16() const {
826 return isAISrcB16();
827 }
828
829 bool isAISrcF32() const {
830 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
831 }
832
833 bool isAISrcF16() const {
834 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
835 }
836
837 bool isAISrcV2F16() const {
838 return isAISrcF16() || isAISrcB32();
839 }
840
841 bool isAISrc_64B64() const {
842 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
843 }
844
845 bool isAISrc_64_f64() const {
846 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
847 }
848
849 bool isAISrc_128_b32() const {
850 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
851 }
852
853 bool isAISrc_128B16() const {
854 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
855 }
856
857 bool isAISrc_128V2B16() const {
858 return isAISrc_128B16();
859 }
860
861 bool isAISrc_128_f32() const {
862 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
863 }
864
865 bool isAISrc_128F16() const {
866 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
867 }
868
869 bool isAISrc_128V2F16() const {
870 return isAISrc_128F16() || isAISrc_128_b32();
871 }
872
873 bool isVISrc_128_bf16() const {
874 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
875 }
876
877 bool isVISrc_128_f16() const {
878 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
879 }
880
881 bool isVISrc_128V2F16() const {
882 return isVISrc_128_f16() || isVISrc_128_b32();
883 }
884
885 bool isAISrc_256B64() const {
886 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
887 }
888
889 bool isAISrc_256_f64() const {
890 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
891 }
892
893 bool isAISrc_512_b32() const {
894 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
895 }
896
897 bool isAISrc_512B16() const {
898 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
899 }
900
901 bool isAISrc_512V2B16() const {
902 return isAISrc_512B16();
903 }
904
905 bool isAISrc_512_f32() const {
906 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
907 }
908
909 bool isAISrc_512F16() const {
910 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
911 }
912
913 bool isAISrc_512V2F16() const {
914 return isAISrc_512F16() || isAISrc_512_b32();
915 }
916
917 bool isAISrc_1024_b32() const {
918 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
919 }
920
921 bool isAISrc_1024B16() const {
922 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
923 }
924
925 bool isAISrc_1024V2B16() const {
926 return isAISrc_1024B16();
927 }
928
929 bool isAISrc_1024_f32() const {
930 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
931 }
932
933 bool isAISrc_1024F16() const {
934 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
935 }
936
937 bool isAISrc_1024V2F16() const {
938 return isAISrc_1024F16() || isAISrc_1024_b32();
939 }
940
941 bool isKImmFP32() const {
942 return isLiteralImm(MVT::f32);
943 }
944
945 bool isKImmFP16() const {
946 return isLiteralImm(MVT::f16);
947 }
948
949 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
950
951 bool isMem() const override {
952 return false;
953 }
954
955 bool isExpr() const {
956 return Kind == Expression;
957 }
958
959 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
960
961 bool isSWaitCnt() const;
962 bool isDepCtr() const;
963 bool isSDelayALU() const;
964 bool isHwreg() const;
965 bool isSendMsg() const;
966 bool isSplitBarrier() const;
967 bool isSwizzle() const;
968 bool isSMRDOffset8() const;
969 bool isSMEMOffset() const;
970 bool isSMRDLiteralOffset() const;
971 bool isDPP8() const;
972 bool isDPPCtrl() const;
973 bool isBLGP() const;
974 bool isGPRIdxMode() const;
975 bool isS16Imm() const;
976 bool isU16Imm() const;
977 bool isEndpgm() const;
978
979 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
980 return [this, P]() { return P(*this); };
981 }
982
983 StringRef getToken() const {
984 assert(isToken());
985 return StringRef(Tok.Data, Tok.Length);
986 }
987
988 int64_t getImm() const {
989 assert(isImm());
990 return Imm.Val;
991 }
992
993 void setImm(int64_t Val) {
994 assert(isImm());
995 Imm.Val = Val;
996 }
997
998 ImmTy getImmTy() const {
999 assert(isImm());
1000 return Imm.Type;
1001 }
1002
1003 MCRegister getReg() const override {
1004 assert(isRegKind());
1005 return Reg.RegNo;
1006 }
1007
1008 SMLoc getStartLoc() const override {
1009 return StartLoc;
1010 }
1011
1012 SMLoc getEndLoc() const override {
1013 return EndLoc;
1014 }
1015
1016 SMRange getLocRange() const {
1017 return SMRange(StartLoc, EndLoc);
1018 }
1019
1020 int getMCOpIdx() const { return MCOpIdx; }
1021
1022 Modifiers getModifiers() const {
1023 assert(isRegKind() || isImmTy(ImmTyNone));
1024 return isRegKind() ? Reg.Mods : Imm.Mods;
1025 }
1026
1027 void setModifiers(Modifiers Mods) {
1028 assert(isRegKind() || isImmTy(ImmTyNone));
1029 if (isRegKind())
1030 Reg.Mods = Mods;
1031 else
1032 Imm.Mods = Mods;
1033 }
1034
1035 bool hasModifiers() const {
1036 return getModifiers().hasModifiers();
1037 }
1038
1039 bool hasFPModifiers() const {
1040 return getModifiers().hasFPModifiers();
1041 }
1042
1043 bool hasIntModifiers() const {
1044 return getModifiers().hasIntModifiers();
1045 }
1046
1047 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1048
1049 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1050
1051 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1052
1053 void addRegOperands(MCInst &Inst, unsigned N) const;
1054
1055 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1056 if (isRegKind())
1057 addRegOperands(Inst, N);
1058 else
1059 addImmOperands(Inst, N);
1060 }
1061
1062 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1063 Modifiers Mods = getModifiers();
1064 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1065 if (isRegKind()) {
1066 addRegOperands(Inst, N);
1067 } else {
1068 addImmOperands(Inst, N, false);
1069 }
1070 }
1071
1072 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1073 assert(!hasIntModifiers());
1074 addRegOrImmWithInputModsOperands(Inst, N);
1075 }
1076
1077 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1078 assert(!hasFPModifiers());
1079 addRegOrImmWithInputModsOperands(Inst, N);
1080 }
1081
1082 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1083 Modifiers Mods = getModifiers();
1084 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1085 assert(isRegKind());
1086 addRegOperands(Inst, N);
1087 }
1088
1089 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1090 assert(!hasIntModifiers());
1091 addRegWithInputModsOperands(Inst, N);
1092 }
1093
1094 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1095 assert(!hasFPModifiers());
1096 addRegWithInputModsOperands(Inst, N);
1097 }
1098
1099 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1100 // clang-format off
1101 switch (Type) {
1102 case ImmTyNone: OS << "None"; break;
1103 case ImmTyGDS: OS << "GDS"; break;
1104 case ImmTyLDS: OS << "LDS"; break;
1105 case ImmTyOffen: OS << "Offen"; break;
1106 case ImmTyIdxen: OS << "Idxen"; break;
1107 case ImmTyAddr64: OS << "Addr64"; break;
1108 case ImmTyOffset: OS << "Offset"; break;
1109 case ImmTyInstOffset: OS << "InstOffset"; break;
1110 case ImmTyOffset0: OS << "Offset0"; break;
1111 case ImmTyOffset1: OS << "Offset1"; break;
1112 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1113 case ImmTyCPol: OS << "CPol"; break;
1114 case ImmTyIndexKey8bit: OS << "index_key"; break;
1115 case ImmTyIndexKey16bit: OS << "index_key"; break;
1116 case ImmTyIndexKey32bit: OS << "index_key"; break;
1117 case ImmTyTFE: OS << "TFE"; break;
1118 case ImmTyD16: OS << "D16"; break;
1119 case ImmTyFORMAT: OS << "FORMAT"; break;
1120 case ImmTyClamp: OS << "Clamp"; break;
1121 case ImmTyOModSI: OS << "OModSI"; break;
1122 case ImmTyDPP8: OS << "DPP8"; break;
1123 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1124 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1125 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1126 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1127 case ImmTyDppFI: OS << "DppFI"; break;
1128 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1129 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1130 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1131 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1132 case ImmTyDMask: OS << "DMask"; break;
1133 case ImmTyDim: OS << "Dim"; break;
1134 case ImmTyUNorm: OS << "UNorm"; break;
1135 case ImmTyDA: OS << "DA"; break;
1136 case ImmTyR128A16: OS << "R128A16"; break;
1137 case ImmTyA16: OS << "A16"; break;
1138 case ImmTyLWE: OS << "LWE"; break;
1139 case ImmTyOff: OS << "Off"; break;
1140 case ImmTyExpTgt: OS << "ExpTgt"; break;
1141 case ImmTyExpCompr: OS << "ExpCompr"; break;
1142 case ImmTyExpVM: OS << "ExpVM"; break;
1143 case ImmTyHwreg: OS << "Hwreg"; break;
1144 case ImmTySendMsg: OS << "SendMsg"; break;
1145 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1146 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1147 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1148 case ImmTyOpSel: OS << "OpSel"; break;
1149 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1150 case ImmTyNegLo: OS << "NegLo"; break;
1151 case ImmTyNegHi: OS << "NegHi"; break;
1152 case ImmTySwizzle: OS << "Swizzle"; break;
1153 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1154 case ImmTyHigh: OS << "High"; break;
1155 case ImmTyBLGP: OS << "BLGP"; break;
1156 case ImmTyCBSZ: OS << "CBSZ"; break;
1157 case ImmTyABID: OS << "ABID"; break;
1158 case ImmTyEndpgm: OS << "Endpgm"; break;
1159 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1160 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1161 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1162 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1163 case ImmTyBitOp3: OS << "BitOp3"; break;
1164 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1165 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1166 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1167 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1168 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1169 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1170 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1171 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1172 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1173 case ImmTyByteSel: OS << "ByteSel" ; break;
1174 }
1175 // clang-format on
1176 }
1177
1178 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1179 switch (Kind) {
1180 case Register:
1181 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1182 << " mods: " << Reg.Mods << '>';
1183 break;
1184 case Immediate:
1185 OS << '<' << getImm();
1186 if (getImmTy() != ImmTyNone) {
1187 OS << " type: "; printImmTy(OS, getImmTy());
1188 }
1189 OS << " mods: " << Imm.Mods << '>';
1190 break;
1191 case Token:
1192 OS << '\'' << getToken() << '\'';
1193 break;
1194 case Expression:
1195 OS << "<expr ";
1196 MAI.printExpr(OS, *Expr);
1197 OS << '>';
1198 break;
1199 }
1200 }
1201
1202 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1203 int64_t Val, SMLoc Loc,
1204 ImmTy Type = ImmTyNone,
1205 bool IsFPImm = false) {
1206 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1207 Op->Imm.Val = Val;
1208 Op->Imm.IsFPImm = IsFPImm;
1209 Op->Imm.Type = Type;
1210 Op->Imm.Mods = Modifiers();
1211 Op->StartLoc = Loc;
1212 Op->EndLoc = Loc;
1213 return Op;
1214 }
1215
1216 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1217 StringRef Str, SMLoc Loc,
1218 bool HasExplicitEncodingSize = true) {
1219 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1220 Res->Tok.Data = Str.data();
1221 Res->Tok.Length = Str.size();
1222 Res->StartLoc = Loc;
1223 Res->EndLoc = Loc;
1224 return Res;
1225 }
1226
1227 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1228 MCRegister Reg, SMLoc S, SMLoc E) {
1229 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1230 Op->Reg.RegNo = Reg;
1231 Op->Reg.Mods = Modifiers();
1232 Op->StartLoc = S;
1233 Op->EndLoc = E;
1234 return Op;
1235 }
1236
1237 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1238 const class MCExpr *Expr, SMLoc S) {
1239 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1240 Op->Expr = Expr;
1241 Op->StartLoc = S;
1242 Op->EndLoc = S;
1243 return Op;
1244 }
1245};
1246
1247raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1248 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1249 return OS;
1250}
1251
1252//===----------------------------------------------------------------------===//
1253// AsmParser
1254//===----------------------------------------------------------------------===//
1255
1256// TODO: define GET_SUBTARGET_FEATURE_NAME
1257#define GET_REGISTER_MATCHER
1258#include "AMDGPUGenAsmMatcher.inc"
1259#undef GET_REGISTER_MATCHER
1260#undef GET_SUBTARGET_FEATURE_NAME
1261
1262// Holds info related to the current kernel, e.g. count of SGPRs used.
1263// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1264// .amdgpu_hsa_kernel or at EOF.
1265class KernelScopeInfo {
1266 int SgprIndexUnusedMin = -1;
1267 int VgprIndexUnusedMin = -1;
1268 int AgprIndexUnusedMin = -1;
1269 MCContext *Ctx = nullptr;
1270 MCSubtargetInfo const *MSTI = nullptr;
1271
1272 void usesSgprAt(int i) {
1273 if (i >= SgprIndexUnusedMin) {
1274 SgprIndexUnusedMin = ++i;
1275 if (Ctx) {
1276 MCSymbol* const Sym =
1277 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1278 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1279 }
1280 }
1281 }
1282
1283 void usesVgprAt(int i) {
1284 if (i >= VgprIndexUnusedMin) {
1285 VgprIndexUnusedMin = ++i;
1286 if (Ctx) {
1287 MCSymbol* const Sym =
1288 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1289 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1290 VgprIndexUnusedMin);
1291 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1292 }
1293 }
1294 }
1295
1296 void usesAgprAt(int i) {
1297 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1298 if (!hasMAIInsts(*MSTI))
1299 return;
1300
1301 if (i >= AgprIndexUnusedMin) {
1302 AgprIndexUnusedMin = ++i;
1303 if (Ctx) {
1304 MCSymbol* const Sym =
1305 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1306 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1307
1308 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1309 MCSymbol* const vSym =
1310 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1311 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1312 VgprIndexUnusedMin);
1313 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1314 }
1315 }
1316 }
1317
1318public:
1319 KernelScopeInfo() = default;
1320
1321 void initialize(MCContext &Context) {
1322 Ctx = &Context;
1323 MSTI = Ctx->getSubtargetInfo();
1324
1325 usesSgprAt(SgprIndexUnusedMin = -1);
1326 usesVgprAt(VgprIndexUnusedMin = -1);
1327 if (hasMAIInsts(*MSTI)) {
1328 usesAgprAt(AgprIndexUnusedMin = -1);
1329 }
1330 }
1331
1332 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1333 unsigned RegWidth) {
1334 switch (RegKind) {
1335 case IS_SGPR:
1336 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1337 break;
1338 case IS_AGPR:
1339 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1340 break;
1341 case IS_VGPR:
1342 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1343 break;
1344 default:
1345 break;
1346 }
1347 }
1348};
1349
1350class AMDGPUAsmParser : public MCTargetAsmParser {
1351 MCAsmParser &Parser;
1352
1353 unsigned ForcedEncodingSize = 0;
1354 bool ForcedDPP = false;
1355 bool ForcedSDWA = false;
1356 KernelScopeInfo KernelScope;
1357 const unsigned HwMode;
1358
1359 /// @name Auto-generated Match Functions
1360 /// {
1361
1362#define GET_ASSEMBLER_HEADER
1363#include "AMDGPUGenAsmMatcher.inc"
1364
1365 /// }
1366
1367 /// Get size of register operand
1368 unsigned getRegOperandSize(const MCInstrDesc &Desc, unsigned OpNo) const {
1369 assert(OpNo < Desc.NumOperands);
1370 int16_t RCID = MII.getOpRegClassID(Desc.operands()[OpNo], HwMode);
1371 return getRegBitWidth(RCID) / 8;
1372 }
1373
1374private:
1375 void createConstantSymbol(StringRef Id, int64_t Val);
1376
1377 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1378 bool OutOfRangeError(SMRange Range);
1379 /// Calculate VGPR/SGPR blocks required for given target, reserved
1380 /// registers, and user-specified NextFreeXGPR values.
1381 ///
1382 /// \param Features [in] Target features, used for bug corrections.
1383 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1384 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1385 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1386 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1387 /// descriptor field, if valid.
1388 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1389 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1390 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1391 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1392 /// \param VGPRBlocks [out] Result VGPR block count.
1393 /// \param SGPRBlocks [out] Result SGPR block count.
1394 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1395 const MCExpr *FlatScrUsed, bool XNACKUsed,
1396 std::optional<bool> EnableWavefrontSize32,
1397 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1398 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1399 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1400 bool ParseDirectiveAMDGCNTarget();
1401 bool ParseDirectiveAMDHSACodeObjectVersion();
1402 bool ParseDirectiveAMDHSAKernel();
1403 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1404 bool ParseDirectiveAMDKernelCodeT();
1405 // TODO: Possibly make subtargetHasRegister const.
1406 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1407 bool ParseDirectiveAMDGPUHsaKernel();
1408
1409 bool ParseDirectiveISAVersion();
1410 bool ParseDirectiveHSAMetadata();
1411 bool ParseDirectivePALMetadataBegin();
1412 bool ParseDirectivePALMetadata();
1413 bool ParseDirectiveAMDGPULDS();
1414
1415 /// Common code to parse out a block of text (typically YAML) between start and
1416 /// end directives.
1417 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1418 const char *AssemblerDirectiveEnd,
1419 std::string &CollectString);
1420
1421 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1422 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1423 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1424 unsigned &RegNum, unsigned &RegWidth,
1425 bool RestoreOnFailure = false);
1426 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1427 unsigned &RegNum, unsigned &RegWidth,
1428 SmallVectorImpl<AsmToken> &Tokens);
1429 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1430 unsigned &RegWidth,
1431 SmallVectorImpl<AsmToken> &Tokens);
1432 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1433 unsigned &RegWidth,
1434 SmallVectorImpl<AsmToken> &Tokens);
1435 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1436 unsigned &RegWidth,
1437 SmallVectorImpl<AsmToken> &Tokens);
1438 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1439 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1440 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1441
1442 bool isRegister();
1443 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1444 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1445 void initializeGprCountSymbol(RegisterKind RegKind);
1446 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1447 unsigned RegWidth);
1448 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1449 bool IsAtomic);
1450
1451public:
1452 enum OperandMode {
1453 OperandMode_Default,
1454 OperandMode_NSA,
1455 };
1456
1457 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1458
1459 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1460 const MCInstrInfo &MII, const MCTargetOptions &Options)
1461 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser),
1462 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1464
1465 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1466
1467 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1468 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1469 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1470 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1471 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1472 } else {
1473 createConstantSymbol(".option.machine_version_major", ISA.Major);
1474 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1475 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1476 }
1477 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1478 initializeGprCountSymbol(IS_VGPR);
1479 initializeGprCountSymbol(IS_SGPR);
1480 } else
1481 KernelScope.initialize(getContext());
1482
1483 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1484 createConstantSymbol(Symbol, Code);
1485
1486 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1487 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1488 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1489 }
1490
1491 bool hasMIMG_R128() const {
1492 return AMDGPU::hasMIMG_R128(getSTI());
1493 }
1494
1495 bool hasPackedD16() const {
1496 return AMDGPU::hasPackedD16(getSTI());
1497 }
1498
1499 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1500
1501 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1502
1503 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1504
1505 bool isSI() const {
1506 return AMDGPU::isSI(getSTI());
1507 }
1508
1509 bool isCI() const {
1510 return AMDGPU::isCI(getSTI());
1511 }
1512
1513 bool isVI() const {
1514 return AMDGPU::isVI(getSTI());
1515 }
1516
1517 bool isGFX9() const {
1518 return AMDGPU::isGFX9(getSTI());
1519 }
1520
1521 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1522 bool isGFX90A() const {
1523 return AMDGPU::isGFX90A(getSTI());
1524 }
1525
1526 bool isGFX940() const {
1527 return AMDGPU::isGFX940(getSTI());
1528 }
1529
1530 bool isGFX9Plus() const {
1531 return AMDGPU::isGFX9Plus(getSTI());
1532 }
1533
1534 bool isGFX10() const {
1535 return AMDGPU::isGFX10(getSTI());
1536 }
1537
1538 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1539
1540 bool isGFX11() const {
1541 return AMDGPU::isGFX11(getSTI());
1542 }
1543
1544 bool isGFX11Plus() const {
1545 return AMDGPU::isGFX11Plus(getSTI());
1546 }
1547
1548 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1549
1550 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1551
1552 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1553
1554 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1555
1556 bool isGFX10_BEncoding() const {
1557 return AMDGPU::isGFX10_BEncoding(getSTI());
1558 }
1559
1560 bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
1561
1562 bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
1563
1564 bool hasInv2PiInlineImm() const {
1565 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1566 }
1567
1568 bool has64BitLiterals() const {
1569 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1570 }
1571
1572 bool hasFlatOffsets() const {
1573 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1574 }
1575
1576 bool hasTrue16Insts() const {
1577 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1578 }
1579
1580 bool hasArchitectedFlatScratch() const {
1581 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1582 }
1583
1584 bool hasSGPR102_SGPR103() const {
1585 return !isVI() && !isGFX9();
1586 }
1587
1588 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1589
1590 bool hasIntClamp() const {
1591 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1592 }
1593
1594 bool hasPartialNSAEncoding() const {
1595 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1596 }
1597
1598 bool hasGloballyAddressableScratch() const {
1599 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1600 }
1601
1602 unsigned getNSAMaxSize(bool HasSampler = false) const {
1603 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1604 }
1605
1606 unsigned getMaxNumUserSGPRs() const {
1607 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1608 }
1609
1610 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1611
1612 AMDGPUTargetStreamer &getTargetStreamer() {
1613 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1614 return static_cast<AMDGPUTargetStreamer &>(TS);
1615 }
1616
1617 MCContext &getContext() const {
1618 // We need this const_cast because for some reason getContext() is not const
1619 // in MCAsmParser.
1620 return const_cast<AMDGPUAsmParser *>(this)->MCTargetAsmParser::getContext();
1621 }
1622
1623 const MCRegisterInfo *getMRI() const {
1624 return getContext().getRegisterInfo();
1625 }
1626
1627 const MCInstrInfo *getMII() const {
1628 return &MII;
1629 }
1630
1631 // FIXME: This should not be used. Instead, should use queries derived from
1632 // getAvailableFeatures().
1633 const FeatureBitset &getFeatureBits() const {
1634 return getSTI().getFeatureBits();
1635 }
1636
1637 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1638 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1639 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1640
1641 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1642 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1643 bool isForcedDPP() const { return ForcedDPP; }
1644 bool isForcedSDWA() const { return ForcedSDWA; }
1645 ArrayRef<unsigned> getMatchedVariants() const;
1646 StringRef getMatchedVariantName() const;
1647
1648 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1649 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1650 bool RestoreOnFailure);
1651 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1652 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1653 SMLoc &EndLoc) override;
1654 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1655 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1656 unsigned Kind) override;
1657 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1658 OperandVector &Operands, MCStreamer &Out,
1659 uint64_t &ErrorInfo,
1660 bool MatchingInlineAsm) override;
1661 bool ParseDirective(AsmToken DirectiveID) override;
1662 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1663 OperandMode Mode = OperandMode_Default);
1664 StringRef parseMnemonicSuffix(StringRef Name);
1665 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1666 SMLoc NameLoc, OperandVector &Operands) override;
1667 //bool ProcessInstruction(MCInst &Inst);
1668
1669 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1670
1671 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1672
1673 ParseStatus
1674 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1675 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1676 std::function<bool(int64_t &)> ConvertResult = nullptr);
1677
1678 ParseStatus parseOperandArrayWithPrefix(
1679 const char *Prefix, OperandVector &Operands,
1680 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1681 bool (*ConvertResult)(int64_t &) = nullptr);
1682
1683 ParseStatus
1684 parseNamedBit(StringRef Name, OperandVector &Operands,
1685 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1686 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1687 ParseStatus parseCPol(OperandVector &Operands);
1688 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1689 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1690 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1691 SMLoc &StringLoc);
1692 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1693 StringRef Name,
1694 ArrayRef<const char *> Ids,
1695 int64_t &IntVal);
1696 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1697 StringRef Name,
1698 ArrayRef<const char *> Ids,
1699 AMDGPUOperand::ImmTy Type);
1700
1701 bool isModifier();
1702 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1703 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1704 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1705 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1706 bool parseSP3NegModifier();
1707 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1708 LitModifier Lit = LitModifier::None);
1709 ParseStatus parseReg(OperandVector &Operands);
1710 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1711 LitModifier Lit = LitModifier::None);
1712 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1713 bool AllowImm = true);
1714 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1715 bool AllowImm = true);
1716 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1717 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1718 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1719 ParseStatus tryParseIndexKey(OperandVector &Operands,
1720 AMDGPUOperand::ImmTy ImmTy);
1721 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1722 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1723 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1724 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1725 AMDGPUOperand::ImmTy Type);
1726 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1727 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1728 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1729 AMDGPUOperand::ImmTy Type);
1730 ParseStatus parseMatrixAScale(OperandVector &Operands);
1731 ParseStatus parseMatrixBScale(OperandVector &Operands);
1732 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1733 AMDGPUOperand::ImmTy Type);
1734 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1735 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1736
1737 ParseStatus parseDfmtNfmt(int64_t &Format);
1738 ParseStatus parseUfmt(int64_t &Format);
1739 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1740 int64_t &Format);
1741 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1742 int64_t &Format);
1743 ParseStatus parseFORMAT(OperandVector &Operands);
1744 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1745 ParseStatus parseNumericFormat(int64_t &Format);
1746 ParseStatus parseFlatOffset(OperandVector &Operands);
1747 ParseStatus parseR128A16(OperandVector &Operands);
1748 ParseStatus parseBLGP(OperandVector &Operands);
1749 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1750 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1751
1752 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1753
1754 bool parseCnt(int64_t &IntVal);
1755 ParseStatus parseSWaitCnt(OperandVector &Operands);
1756
1757 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1758 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1759 ParseStatus parseDepCtr(OperandVector &Operands);
1760
1761 bool parseDelay(int64_t &Delay);
1762 ParseStatus parseSDelayALU(OperandVector &Operands);
1763
1764 ParseStatus parseHwreg(OperandVector &Operands);
1765
1766private:
1767 struct OperandInfoTy {
1768 SMLoc Loc;
1769 int64_t Val;
1770 bool IsSymbolic = false;
1771 bool IsDefined = false;
1772
1773 OperandInfoTy(int64_t Val) : Val(Val) {}
1774 };
1775
1776 struct StructuredOpField : OperandInfoTy {
1777 StringLiteral Id;
1778 StringLiteral Desc;
1779 unsigned Width;
1780 bool IsDefined = false;
1781
1782 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1783 int64_t Default)
1784 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1785 virtual ~StructuredOpField() = default;
1786
1787 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1788 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1789 return false;
1790 }
1791
1792 virtual bool validate(AMDGPUAsmParser &Parser) const {
1793 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1794 return Error(Parser, "not supported on this GPU");
1795 if (!isUIntN(Width, Val))
1796 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1797 return true;
1798 }
1799 };
1800
1801 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1802 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1803
1804 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1805 bool validateSendMsg(const OperandInfoTy &Msg,
1806 const OperandInfoTy &Op,
1807 const OperandInfoTy &Stream);
1808
1809 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1810 OperandInfoTy &Width);
1811
1812 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1813
1814 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1815 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1816 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1817
1818 SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1819 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1820 const OperandVector &Operands) const;
1821 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1822 const OperandVector &Operands) const;
1823 SMLoc getInstLoc(const OperandVector &Operands) const;
1824
1825 bool validateInstruction(const MCInst &Inst, SMLoc IDLoc,
1826 const OperandVector &Operands);
1827 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1828 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1829 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1830 bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1831 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1832 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1833 bool AsVOPD3);
1834 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1835 bool tryVOPD(const MCInst &Inst);
1836 bool tryVOPD3(const MCInst &Inst);
1837 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1838
1839 bool validateIntClampSupported(const MCInst &Inst);
1840 bool validateMIMGAtomicDMask(const MCInst &Inst);
1841 bool validateMIMGGatherDMask(const MCInst &Inst);
1842 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1843 bool validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc);
1844 bool validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc);
1845 bool validateMIMGD16(const MCInst &Inst);
1846 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1847 bool validateTensorR128(const MCInst &Inst);
1848 bool validateMIMGMSAA(const MCInst &Inst);
1849 bool validateOpSel(const MCInst &Inst);
1850 bool validateTrue16OpSel(const MCInst &Inst);
1851 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1852 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1853 bool validateVccOperand(MCRegister Reg) const;
1854 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1855 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1856 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1857 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1858 bool validateAGPRLdSt(const MCInst &Inst) const;
1859 bool validateVGPRAlign(const MCInst &Inst) const;
1860 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1861 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1862 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1863 bool validateDivScale(const MCInst &Inst);
1864 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1865 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1866 SMLoc IDLoc);
1867 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1868 const unsigned CPol);
1869 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1870 bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1871 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1872 unsigned getConstantBusLimit(unsigned Opcode) const;
1873 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1874 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1875 MCRegister findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1876
1877 bool isSupportedMnemo(StringRef Mnemo,
1878 const FeatureBitset &FBS);
1879 bool isSupportedMnemo(StringRef Mnemo,
1880 const FeatureBitset &FBS,
1881 ArrayRef<unsigned> Variants);
1882 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1883
1884 bool isId(const StringRef Id) const;
1885 bool isId(const AsmToken &Token, const StringRef Id) const;
1886 bool isToken(const AsmToken::TokenKind Kind) const;
1887 StringRef getId() const;
1888 bool trySkipId(const StringRef Id);
1889 bool trySkipId(const StringRef Pref, const StringRef Id);
1890 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1891 bool trySkipToken(const AsmToken::TokenKind Kind);
1892 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1893 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1894 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1895
1896 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1897 AsmToken::TokenKind getTokenKind() const;
1898 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1899 bool parseExpr(OperandVector &Operands);
1900 StringRef getTokenStr() const;
1901 AsmToken peekToken(bool ShouldSkipSpace = true);
1902 AsmToken getToken() const;
1903 SMLoc getLoc() const;
1904 void lex();
1905
1906public:
1907 void onBeginOfFile() override;
1908 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1909
1910 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1911
1912 ParseStatus parseExpTgt(OperandVector &Operands);
1913 ParseStatus parseSendMsg(OperandVector &Operands);
1914 ParseStatus parseInterpSlot(OperandVector &Operands);
1915 ParseStatus parseInterpAttr(OperandVector &Operands);
1916 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1917 ParseStatus parseBoolReg(OperandVector &Operands);
1918
1919 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1920 const unsigned MaxVal, const Twine &ErrMsg,
1921 SMLoc &Loc);
1922 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1923 const unsigned MinVal,
1924 const unsigned MaxVal,
1925 const StringRef ErrMsg);
1926 ParseStatus parseSwizzle(OperandVector &Operands);
1927 bool parseSwizzleOffset(int64_t &Imm);
1928 bool parseSwizzleMacro(int64_t &Imm);
1929 bool parseSwizzleQuadPerm(int64_t &Imm);
1930 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1931 bool parseSwizzleBroadcast(int64_t &Imm);
1932 bool parseSwizzleSwap(int64_t &Imm);
1933 bool parseSwizzleReverse(int64_t &Imm);
1934 bool parseSwizzleFFT(int64_t &Imm);
1935 bool parseSwizzleRotate(int64_t &Imm);
1936
1937 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1938 int64_t parseGPRIdxMacro();
1939
1940 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1941 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1942
1943 ParseStatus parseOModSI(OperandVector &Operands);
1944
1945 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1946 OptionalImmIndexMap &OptionalIdx);
1947 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1948 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1949 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1950 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1951 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1952
1953 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1954 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1955 OptionalImmIndexMap &OptionalIdx);
1956 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1957 OptionalImmIndexMap &OptionalIdx);
1958
1959 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1960 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1961 void cvtOpSelHelper(MCInst &Inst, unsigned OpSel);
1962
1963 bool parseDimId(unsigned &Encoding);
1964 ParseStatus parseDim(OperandVector &Operands);
1965 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1966 ParseStatus parseDPP8(OperandVector &Operands);
1967 ParseStatus parseDPPCtrl(OperandVector &Operands);
1968 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1969 int64_t parseDPPCtrlSel(StringRef Ctrl);
1970 int64_t parseDPPCtrlPerm();
1971 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1972 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1973 cvtDPP(Inst, Operands, true);
1974 }
1975 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1976 bool IsDPP8 = false);
1977 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1978 cvtVOP3DPP(Inst, Operands, true);
1979 }
1980
1981 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1982 AMDGPUOperand::ImmTy Type);
1983 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1984 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1985 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1986 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1987 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1988 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1989 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1990 uint64_t BasicInstType,
1991 bool SkipDstVcc = false,
1992 bool SkipSrcVcc = false);
1993
1994 ParseStatus parseEndpgm(OperandVector &Operands);
1995
1996 ParseStatus parseVOPD(OperandVector &Operands);
1997};
1998
1999} // end anonymous namespace
2000
2001// May be called with integer type with equivalent bitwidth.
2002static const fltSemantics *getFltSemantics(unsigned Size) {
2003 switch (Size) {
2004 case 4:
2005 return &APFloat::IEEEsingle();
2006 case 8:
2007 return &APFloat::IEEEdouble();
2008 case 2:
2009 return &APFloat::IEEEhalf();
2010 default:
2011 llvm_unreachable("unsupported fp type");
2012 }
2013}
2014
2016 return getFltSemantics(VT.getSizeInBits() / 8);
2017}
2018
2020 switch (OperandType) {
2021 // When floating-point immediate is used as operand of type i16, the 32-bit
2022 // representation of the constant truncated to the 16 LSBs should be used.
2037 return &APFloat::IEEEsingle();
2044 return &APFloat::IEEEdouble();
2052 return &APFloat::IEEEhalf();
2057 return &APFloat::BFloat();
2058 default:
2059 llvm_unreachable("unsupported fp type");
2060 }
2061}
2062
2063//===----------------------------------------------------------------------===//
2064// Operand
2065//===----------------------------------------------------------------------===//
2066
2067static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2068 bool Lost;
2069
2070 // Convert literal to single precision
2073 &Lost);
2074 // We allow precision lost but not overflow or underflow
2075 if (Status != APFloat::opOK &&
2076 Lost &&
2077 ((Status & APFloat::opOverflow) != 0 ||
2078 (Status & APFloat::opUnderflow) != 0)) {
2079 return false;
2080 }
2081
2082 return true;
2083}
2084
2085static bool isSafeTruncation(int64_t Val, unsigned Size) {
2086 return isUIntN(Size, Val) || isIntN(Size, Val);
2087}
2088
2089static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2090 if (VT.getScalarType() == MVT::i16)
2091 return isInlinableLiteral32(Val, HasInv2Pi);
2092
2093 if (VT.getScalarType() == MVT::f16)
2094 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2095
2096 assert(VT.getScalarType() == MVT::bf16);
2097
2098 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2099}
2100
2101bool AMDGPUOperand::isInlinableImm(MVT type) const {
2102
2103 // This is a hack to enable named inline values like
2104 // shared_base with both 32-bit and 64-bit operands.
2105 // Note that these values are defined as
2106 // 32-bit operands only.
2107 if (isInlineValue()) {
2108 return true;
2109 }
2110
2111 if (!isImmTy(ImmTyNone)) {
2112 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2113 return false;
2114 }
2115
2116 if (getModifiers().Lit != LitModifier::None)
2117 return false;
2118
2119 // TODO: We should avoid using host float here. It would be better to
2120 // check the float bit values which is what a few other places do.
2121 // We've had bot failures before due to weird NaN support on mips hosts.
2122
2123 APInt Literal(64, Imm.Val);
2124
2125 if (Imm.IsFPImm) { // We got fp literal token
2126 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2128 AsmParser->hasInv2PiInlineImm());
2129 }
2130
2131 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2132 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2133 return false;
2134
2135 if (type.getScalarSizeInBits() == 16) {
2136 bool Lost = false;
2137 switch (type.getScalarType().SimpleTy) {
2138 default:
2139 llvm_unreachable("unknown 16-bit type");
2140 case MVT::bf16:
2141 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2142 &Lost);
2143 break;
2144 case MVT::f16:
2145 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2146 &Lost);
2147 break;
2148 case MVT::i16:
2149 FPLiteral.convert(APFloatBase::IEEEsingle(),
2150 APFloat::rmNearestTiesToEven, &Lost);
2151 break;
2152 }
2153 // We need to use 32-bit representation here because when a floating-point
2154 // inline constant is used as an i16 operand, its 32-bit representation
2155 // representation will be used. We will need the 32-bit value to check if
2156 // it is FP inline constant.
2157 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2158 return isInlineableLiteralOp16(ImmVal, type,
2159 AsmParser->hasInv2PiInlineImm());
2160 }
2161
2162 // Check if single precision literal is inlinable
2164 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2165 AsmParser->hasInv2PiInlineImm());
2166 }
2167
2168 // We got int literal token.
2169 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2171 AsmParser->hasInv2PiInlineImm());
2172 }
2173
2174 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2175 return false;
2176 }
2177
2178 if (type.getScalarSizeInBits() == 16) {
2180 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2181 type, AsmParser->hasInv2PiInlineImm());
2182 }
2183
2185 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2186 AsmParser->hasInv2PiInlineImm());
2187}
2188
2189bool AMDGPUOperand::isLiteralImm(MVT type) const {
2190 // Check that this immediate can be added as literal
2191 if (!isImmTy(ImmTyNone)) {
2192 return false;
2193 }
2194
2195 bool Allow64Bit =
2196 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2197
2198 if (!Imm.IsFPImm) {
2199 // We got int literal token.
2200
2201 if (type == MVT::f64 && hasFPModifiers()) {
2202 // Cannot apply fp modifiers to int literals preserving the same semantics
2203 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2204 // disable these cases.
2205 return false;
2206 }
2207
2208 unsigned Size = type.getSizeInBits();
2209 if (Size == 64) {
2210 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2211 return true;
2212 Size = 32;
2213 }
2214
2215 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2216 // types.
2217 return isSafeTruncation(Imm.Val, Size);
2218 }
2219
2220 // We got fp literal token
2221 if (type == MVT::f64) { // Expected 64-bit fp operand
2222 // We would set low 64-bits of literal to zeroes but we accept this literals
2223 return true;
2224 }
2225
2226 if (type == MVT::i64) { // Expected 64-bit int operand
2227 // We don't allow fp literals in 64-bit integer instructions. It is
2228 // unclear how we should encode them.
2229 return false;
2230 }
2231
2232 // We allow fp literals with f16x2 operands assuming that the specified
2233 // literal goes into the lower half and the upper half is zero. We also
2234 // require that the literal may be losslessly converted to f16.
2235 //
2236 // For i16x2 operands, we assume that the specified literal is encoded as a
2237 // single-precision float. This is pretty odd, but it matches SP3 and what
2238 // happens in hardware.
2239 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2240 : (type == MVT::v2i16) ? MVT::f32
2241 : (type == MVT::v2f32) ? MVT::f32
2242 : type;
2243
2244 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2245 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2246}
2247
2248bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2249 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2250}
2251
2252bool AMDGPUOperand::isVRegWithInputMods() const {
2253 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2254 // GFX90A allows DPP on 64-bit operands.
2255 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2256 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2257}
2258
2259template <bool IsFake16>
2260bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2261 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2262 : AMDGPU::VGPR_16_Lo128RegClassID);
2263}
2264
2265template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2266 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2267 : AMDGPU::VGPR_16RegClassID);
2268}
2269
2270bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2271 if (AsmParser->isVI())
2272 return isVReg32();
2273 if (AsmParser->isGFX9Plus())
2274 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2275 return false;
2276}
2277
2278bool AMDGPUOperand::isSDWAFP16Operand() const {
2279 return isSDWAOperand(MVT::f16);
2280}
2281
2282bool AMDGPUOperand::isSDWAFP32Operand() const {
2283 return isSDWAOperand(MVT::f32);
2284}
2285
2286bool AMDGPUOperand::isSDWAInt16Operand() const {
2287 return isSDWAOperand(MVT::i16);
2288}
2289
2290bool AMDGPUOperand::isSDWAInt32Operand() const {
2291 return isSDWAOperand(MVT::i32);
2292}
2293
2294bool AMDGPUOperand::isBoolReg() const {
2295 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2296 (AsmParser->isWave32() && isSCSrc_b32()));
2297}
2298
2299uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2300{
2301 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2302 assert(Size == 2 || Size == 4 || Size == 8);
2303
2304 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2305
2306 if (Imm.Mods.Abs) {
2307 Val &= ~FpSignMask;
2308 }
2309 if (Imm.Mods.Neg) {
2310 Val ^= FpSignMask;
2311 }
2312
2313 return Val;
2314}
2315
2316void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2317 MCOpIdx = Inst.getNumOperands();
2318
2319 if (isExpr()) {
2321 return;
2322 }
2323
2324 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2325 Inst.getNumOperands())) {
2326 addLiteralImmOperand(Inst, Imm.Val,
2327 ApplyModifiers &
2328 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2329 } else {
2330 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2332 }
2333}
2334
2335void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2336 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2337 auto OpNum = Inst.getNumOperands();
2338 // Check that this operand accepts literals
2339 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2340
2341 if (ApplyModifiers) {
2342 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2343 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2344 Val = applyInputFPModifiers(Val, Size);
2345 }
2346
2347 APInt Literal(64, Val);
2348 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2349
2350 bool CanUse64BitLiterals =
2351 AsmParser->has64BitLiterals() &&
2352 !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
2353 LitModifier Lit = getModifiers().Lit;
2354 MCContext &Ctx = AsmParser->getContext();
2355
2356 if (Imm.IsFPImm) { // We got fp literal token
2357 switch (OpTy) {
2363 if (Lit == LitModifier::None &&
2365 AsmParser->hasInv2PiInlineImm())) {
2366 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2367 return;
2368 }
2369
2370 // Non-inlineable
2371 if (AMDGPU::isSISrcFPOperand(InstDesc,
2372 OpNum)) { // Expected 64-bit fp operand
2373 bool HasMandatoryLiteral =
2374 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2375 // For fp operands we check if low 32 bits are zeros
2376 if (Literal.getLoBits(32) != 0 &&
2377 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2378 !HasMandatoryLiteral) {
2379 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2380 Inst.getLoc(),
2381 "Can't encode literal as exact 64-bit floating-point operand. "
2382 "Low 32-bits will be set to zero");
2383 Val &= 0xffffffff00000000u;
2384 }
2385
2386 if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 ||
2389 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2390 (isInt<32>(Val) || isUInt<32>(Val))) {
2391 // The floating-point operand will be verbalized as an
2392 // integer one. If that integer happens to fit 32 bits, on
2393 // re-assembling it will be intepreted as the high half of
2394 // the actual value, so we have to wrap it into lit64().
2395 Lit = LitModifier::Lit64;
2396 } else if (Lit == LitModifier::Lit) {
2397 // For FP64 operands lit() specifies the high half of the value.
2398 Val = Hi_32(Val);
2399 }
2400 }
2401 break;
2402 }
2403
2404 // We don't allow fp literals in 64-bit integer instructions. It is
2405 // unclear how we should encode them. This case should be checked earlier
2406 // in predicate methods (isLiteralImm())
2407 llvm_unreachable("fp literal in 64-bit integer instruction.");
2408
2410 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2411 (isInt<32>(Val) || isUInt<32>(Val)))
2412 Lit = LitModifier::Lit64;
2413 break;
2414
2419 if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2420 Literal == 0x3fc45f306725feed) {
2421 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2422 // loss of precision. The constant represents ideomatic fp32 value of
2423 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2424 // bits. Prevent rounding below.
2425 Inst.addOperand(MCOperand::createImm(0x3e22));
2426 return;
2427 }
2428 [[fallthrough]];
2429
2451 bool lost;
2452 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2453 // Convert literal to single precision
2454 FPLiteral.convert(*getOpFltSemantics(OpTy),
2455 APFloat::rmNearestTiesToEven, &lost);
2456 // We allow precision lost but not overflow or underflow. This should be
2457 // checked earlier in isLiteralImm()
2458
2459 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2460 break;
2461 }
2462 default:
2463 llvm_unreachable("invalid operand size");
2464 }
2465
2466 if (Lit != LitModifier::None) {
2467 Inst.addOperand(
2469 } else {
2471 }
2472 return;
2473 }
2474
2475 // We got int literal token.
2476 // Only sign extend inline immediates.
2477 switch (OpTy) {
2492 break;
2493
2496 if (Lit == LitModifier::None &&
2497 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2499 return;
2500 }
2501
2502 // When the 32 MSBs are not zero (effectively means it can't be safely
2503 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2504 // the lit modifier is explicitly used, we need to truncate it to the 32
2505 // LSBs.
2506 if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit)
2507 Val = Lo_32(Val);
2508 break;
2509
2513 if (Lit == LitModifier::None &&
2514 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2516 return;
2517 }
2518
2519 // If the target doesn't support 64-bit literals, we need to use the
2520 // constant as the high 32 MSBs of a double-precision floating point value.
2521 if (!AsmParser->has64BitLiterals()) {
2522 Val = static_cast<uint64_t>(Val) << 32;
2523 } else {
2524 // Now the target does support 64-bit literals, there are two cases
2525 // where we still want to use src_literal encoding:
2526 // 1) explicitly forced by using lit modifier;
2527 // 2) the value is a valid 32-bit representation (signed or unsigned),
2528 // meanwhile not forced by lit64 modifier.
2529 if (Lit == LitModifier::Lit ||
2530 (Lit != LitModifier::Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2531 Val = static_cast<uint64_t>(Val) << 32;
2532 }
2533
2534 // For FP64 operands lit() specifies the high half of the value.
2535 if (Lit == LitModifier::Lit)
2536 Val = Hi_32(Val);
2537 break;
2538
2550 break;
2551
2553 if ((isInt<32>(Val) || isUInt<32>(Val)) && Lit != LitModifier::Lit64)
2554 Val <<= 32;
2555 break;
2556
2557 default:
2558 llvm_unreachable("invalid operand type");
2559 }
2560
2561 if (Lit != LitModifier::None) {
2562 Inst.addOperand(
2564 } else {
2566 }
2567}
2568
2569void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2570 MCOpIdx = Inst.getNumOperands();
2571 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2572}
2573
2574bool AMDGPUOperand::isInlineValue() const {
2575 return isRegKind() && ::isInlineValue(getReg());
2576}
2577
2578//===----------------------------------------------------------------------===//
2579// AsmParser
2580//===----------------------------------------------------------------------===//
2581
2582void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2583 // TODO: make those pre-defined variables read-only.
2584 // Currently there is none suitable machinery in the core llvm-mc for this.
2585 // MCSymbol::isRedefinable is intended for another purpose, and
2586 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2587 MCContext &Ctx = getContext();
2588 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2590}
2591
2592static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2593 if (Is == IS_VGPR) {
2594 switch (RegWidth) {
2595 default: return -1;
2596 case 32:
2597 return AMDGPU::VGPR_32RegClassID;
2598 case 64:
2599 return AMDGPU::VReg_64RegClassID;
2600 case 96:
2601 return AMDGPU::VReg_96RegClassID;
2602 case 128:
2603 return AMDGPU::VReg_128RegClassID;
2604 case 160:
2605 return AMDGPU::VReg_160RegClassID;
2606 case 192:
2607 return AMDGPU::VReg_192RegClassID;
2608 case 224:
2609 return AMDGPU::VReg_224RegClassID;
2610 case 256:
2611 return AMDGPU::VReg_256RegClassID;
2612 case 288:
2613 return AMDGPU::VReg_288RegClassID;
2614 case 320:
2615 return AMDGPU::VReg_320RegClassID;
2616 case 352:
2617 return AMDGPU::VReg_352RegClassID;
2618 case 384:
2619 return AMDGPU::VReg_384RegClassID;
2620 case 512:
2621 return AMDGPU::VReg_512RegClassID;
2622 case 1024:
2623 return AMDGPU::VReg_1024RegClassID;
2624 }
2625 } else if (Is == IS_TTMP) {
2626 switch (RegWidth) {
2627 default: return -1;
2628 case 32:
2629 return AMDGPU::TTMP_32RegClassID;
2630 case 64:
2631 return AMDGPU::TTMP_64RegClassID;
2632 case 128:
2633 return AMDGPU::TTMP_128RegClassID;
2634 case 256:
2635 return AMDGPU::TTMP_256RegClassID;
2636 case 512:
2637 return AMDGPU::TTMP_512RegClassID;
2638 }
2639 } else if (Is == IS_SGPR) {
2640 switch (RegWidth) {
2641 default: return -1;
2642 case 32:
2643 return AMDGPU::SGPR_32RegClassID;
2644 case 64:
2645 return AMDGPU::SGPR_64RegClassID;
2646 case 96:
2647 return AMDGPU::SGPR_96RegClassID;
2648 case 128:
2649 return AMDGPU::SGPR_128RegClassID;
2650 case 160:
2651 return AMDGPU::SGPR_160RegClassID;
2652 case 192:
2653 return AMDGPU::SGPR_192RegClassID;
2654 case 224:
2655 return AMDGPU::SGPR_224RegClassID;
2656 case 256:
2657 return AMDGPU::SGPR_256RegClassID;
2658 case 288:
2659 return AMDGPU::SGPR_288RegClassID;
2660 case 320:
2661 return AMDGPU::SGPR_320RegClassID;
2662 case 352:
2663 return AMDGPU::SGPR_352RegClassID;
2664 case 384:
2665 return AMDGPU::SGPR_384RegClassID;
2666 case 512:
2667 return AMDGPU::SGPR_512RegClassID;
2668 }
2669 } else if (Is == IS_AGPR) {
2670 switch (RegWidth) {
2671 default: return -1;
2672 case 32:
2673 return AMDGPU::AGPR_32RegClassID;
2674 case 64:
2675 return AMDGPU::AReg_64RegClassID;
2676 case 96:
2677 return AMDGPU::AReg_96RegClassID;
2678 case 128:
2679 return AMDGPU::AReg_128RegClassID;
2680 case 160:
2681 return AMDGPU::AReg_160RegClassID;
2682 case 192:
2683 return AMDGPU::AReg_192RegClassID;
2684 case 224:
2685 return AMDGPU::AReg_224RegClassID;
2686 case 256:
2687 return AMDGPU::AReg_256RegClassID;
2688 case 288:
2689 return AMDGPU::AReg_288RegClassID;
2690 case 320:
2691 return AMDGPU::AReg_320RegClassID;
2692 case 352:
2693 return AMDGPU::AReg_352RegClassID;
2694 case 384:
2695 return AMDGPU::AReg_384RegClassID;
2696 case 512:
2697 return AMDGPU::AReg_512RegClassID;
2698 case 1024:
2699 return AMDGPU::AReg_1024RegClassID;
2700 }
2701 }
2702 return -1;
2703}
2704
2707 .Case("exec", AMDGPU::EXEC)
2708 .Case("vcc", AMDGPU::VCC)
2709 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2710 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2711 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2712 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2713 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2714 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2715 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2716 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2717 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2718 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2719 .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2720 .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2721 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2722 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2723 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2724 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2725 .Case("m0", AMDGPU::M0)
2726 .Case("vccz", AMDGPU::SRC_VCCZ)
2727 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2728 .Case("execz", AMDGPU::SRC_EXECZ)
2729 .Case("src_execz", AMDGPU::SRC_EXECZ)
2730 .Case("scc", AMDGPU::SRC_SCC)
2731 .Case("src_scc", AMDGPU::SRC_SCC)
2732 .Case("tba", AMDGPU::TBA)
2733 .Case("tma", AMDGPU::TMA)
2734 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2735 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2736 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2737 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2738 .Case("vcc_lo", AMDGPU::VCC_LO)
2739 .Case("vcc_hi", AMDGPU::VCC_HI)
2740 .Case("exec_lo", AMDGPU::EXEC_LO)
2741 .Case("exec_hi", AMDGPU::EXEC_HI)
2742 .Case("tma_lo", AMDGPU::TMA_LO)
2743 .Case("tma_hi", AMDGPU::TMA_HI)
2744 .Case("tba_lo", AMDGPU::TBA_LO)
2745 .Case("tba_hi", AMDGPU::TBA_HI)
2746 .Case("pc", AMDGPU::PC_REG)
2747 .Case("null", AMDGPU::SGPR_NULL)
2748 .Default(AMDGPU::NoRegister);
2749}
2750
2751bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2752 SMLoc &EndLoc, bool RestoreOnFailure) {
2753 auto R = parseRegister();
2754 if (!R) return true;
2755 assert(R->isReg());
2756 RegNo = R->getReg();
2757 StartLoc = R->getStartLoc();
2758 EndLoc = R->getEndLoc();
2759 return false;
2760}
2761
2762bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2763 SMLoc &EndLoc) {
2764 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2765}
2766
2767ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2768 SMLoc &EndLoc) {
2769 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2770 bool PendingErrors = getParser().hasPendingError();
2771 getParser().clearPendingErrors();
2772 if (PendingErrors)
2773 return ParseStatus::Failure;
2774 if (Result)
2775 return ParseStatus::NoMatch;
2776 return ParseStatus::Success;
2777}
2778
2779bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2780 RegisterKind RegKind,
2781 MCRegister Reg1, SMLoc Loc) {
2782 switch (RegKind) {
2783 case IS_SPECIAL:
2784 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2785 Reg = AMDGPU::EXEC;
2786 RegWidth = 64;
2787 return true;
2788 }
2789 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2790 Reg = AMDGPU::FLAT_SCR;
2791 RegWidth = 64;
2792 return true;
2793 }
2794 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2795 Reg = AMDGPU::XNACK_MASK;
2796 RegWidth = 64;
2797 return true;
2798 }
2799 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2800 Reg = AMDGPU::VCC;
2801 RegWidth = 64;
2802 return true;
2803 }
2804 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2805 Reg = AMDGPU::TBA;
2806 RegWidth = 64;
2807 return true;
2808 }
2809 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2810 Reg = AMDGPU::TMA;
2811 RegWidth = 64;
2812 return true;
2813 }
2814 Error(Loc, "register does not fit in the list");
2815 return false;
2816 case IS_VGPR:
2817 case IS_SGPR:
2818 case IS_AGPR:
2819 case IS_TTMP:
2820 if (Reg1 != Reg + RegWidth / 32) {
2821 Error(Loc, "registers in a list must have consecutive indices");
2822 return false;
2823 }
2824 RegWidth += 32;
2825 return true;
2826 default:
2827 llvm_unreachable("unexpected register kind");
2828 }
2829}
2830
2831struct RegInfo {
2833 RegisterKind Kind;
2834};
2835
2836static constexpr RegInfo RegularRegisters[] = {
2837 {{"v"}, IS_VGPR},
2838 {{"s"}, IS_SGPR},
2839 {{"ttmp"}, IS_TTMP},
2840 {{"acc"}, IS_AGPR},
2841 {{"a"}, IS_AGPR},
2842};
2843
2844static bool isRegularReg(RegisterKind Kind) {
2845 return Kind == IS_VGPR ||
2846 Kind == IS_SGPR ||
2847 Kind == IS_TTMP ||
2848 Kind == IS_AGPR;
2849}
2850
2852 for (const RegInfo &Reg : RegularRegisters)
2853 if (Str.starts_with(Reg.Name))
2854 return &Reg;
2855 return nullptr;
2856}
2857
2858static bool getRegNum(StringRef Str, unsigned& Num) {
2859 return !Str.getAsInteger(10, Num);
2860}
2861
2862bool
2863AMDGPUAsmParser::isRegister(const AsmToken &Token,
2864 const AsmToken &NextToken) const {
2865
2866 // A list of consecutive registers: [s0,s1,s2,s3]
2867 if (Token.is(AsmToken::LBrac))
2868 return true;
2869
2870 if (!Token.is(AsmToken::Identifier))
2871 return false;
2872
2873 // A single register like s0 or a range of registers like s[0:1]
2874
2875 StringRef Str = Token.getString();
2876 const RegInfo *Reg = getRegularRegInfo(Str);
2877 if (Reg) {
2878 StringRef RegName = Reg->Name;
2879 StringRef RegSuffix = Str.substr(RegName.size());
2880 if (!RegSuffix.empty()) {
2881 RegSuffix.consume_back(".l");
2882 RegSuffix.consume_back(".h");
2883 unsigned Num;
2884 // A single register with an index: rXX
2885 if (getRegNum(RegSuffix, Num))
2886 return true;
2887 } else {
2888 // A range of registers: r[XX:YY].
2889 if (NextToken.is(AsmToken::LBrac))
2890 return true;
2891 }
2892 }
2893
2894 return getSpecialRegForName(Str).isValid();
2895}
2896
2897bool
2898AMDGPUAsmParser::isRegister()
2899{
2900 return isRegister(getToken(), peekToken());
2901}
2902
2903MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2904 unsigned SubReg, unsigned RegWidth,
2905 SMLoc Loc) {
2906 assert(isRegularReg(RegKind));
2907
2908 unsigned AlignSize = 1;
2909 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2910 // SGPR and TTMP registers must be aligned.
2911 // Max required alignment is 4 dwords.
2912 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2913 }
2914
2915 if (RegNum % AlignSize != 0) {
2916 Error(Loc, "invalid register alignment");
2917 return MCRegister();
2918 }
2919
2920 unsigned RegIdx = RegNum / AlignSize;
2921 int RCID = getRegClass(RegKind, RegWidth);
2922 if (RCID == -1) {
2923 Error(Loc, "invalid or unsupported register size");
2924 return MCRegister();
2925 }
2926
2927 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2928 const MCRegisterClass RC = TRI->getRegClass(RCID);
2929 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2930 Error(Loc, "register index is out of range");
2931 return AMDGPU::NoRegister;
2932 }
2933
2934 if (RegKind == IS_VGPR && !isGFX1250() && RegIdx + RegWidth / 32 > 256) {
2935 Error(Loc, "register index is out of range");
2936 return MCRegister();
2937 }
2938
2939 MCRegister Reg = RC.getRegister(RegIdx);
2940
2941 if (SubReg) {
2942 Reg = TRI->getSubReg(Reg, SubReg);
2943
2944 // Currently all regular registers have their .l and .h subregisters, so
2945 // we should never need to generate an error here.
2946 assert(Reg && "Invalid subregister!");
2947 }
2948
2949 return Reg;
2950}
2951
2952bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
2953 unsigned &SubReg) {
2954 int64_t RegLo, RegHi;
2955 if (!skipToken(AsmToken::LBrac, "missing register index"))
2956 return false;
2957
2958 SMLoc FirstIdxLoc = getLoc();
2959 SMLoc SecondIdxLoc;
2960
2961 if (!parseExpr(RegLo))
2962 return false;
2963
2964 if (trySkipToken(AsmToken::Colon)) {
2965 SecondIdxLoc = getLoc();
2966 if (!parseExpr(RegHi))
2967 return false;
2968 } else {
2969 RegHi = RegLo;
2970 }
2971
2972 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2973 return false;
2974
2975 if (!isUInt<32>(RegLo)) {
2976 Error(FirstIdxLoc, "invalid register index");
2977 return false;
2978 }
2979
2980 if (!isUInt<32>(RegHi)) {
2981 Error(SecondIdxLoc, "invalid register index");
2982 return false;
2983 }
2984
2985 if (RegLo > RegHi) {
2986 Error(FirstIdxLoc, "first register index should not exceed second index");
2987 return false;
2988 }
2989
2990 if (RegHi == RegLo) {
2991 StringRef RegSuffix = getTokenStr();
2992 if (RegSuffix == ".l") {
2993 SubReg = AMDGPU::lo16;
2994 lex();
2995 } else if (RegSuffix == ".h") {
2996 SubReg = AMDGPU::hi16;
2997 lex();
2998 }
2999 }
3000
3001 Num = static_cast<unsigned>(RegLo);
3002 RegWidth = 32 * ((RegHi - RegLo) + 1);
3003
3004 return true;
3005}
3006
3007MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3008 unsigned &RegNum,
3009 unsigned &RegWidth,
3010 SmallVectorImpl<AsmToken> &Tokens) {
3011 assert(isToken(AsmToken::Identifier));
3012 MCRegister Reg = getSpecialRegForName(getTokenStr());
3013 if (Reg) {
3014 RegNum = 0;
3015 RegWidth = 32;
3016 RegKind = IS_SPECIAL;
3017 Tokens.push_back(getToken());
3018 lex(); // skip register name
3019 }
3020 return Reg;
3021}
3022
3023MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3024 unsigned &RegNum,
3025 unsigned &RegWidth,
3026 SmallVectorImpl<AsmToken> &Tokens) {
3027 assert(isToken(AsmToken::Identifier));
3028 StringRef RegName = getTokenStr();
3029 auto Loc = getLoc();
3030
3031 const RegInfo *RI = getRegularRegInfo(RegName);
3032 if (!RI) {
3033 Error(Loc, "invalid register name");
3034 return MCRegister();
3035 }
3036
3037 Tokens.push_back(getToken());
3038 lex(); // skip register name
3039
3040 RegKind = RI->Kind;
3041 StringRef RegSuffix = RegName.substr(RI->Name.size());
3042 unsigned SubReg = NoSubRegister;
3043 if (!RegSuffix.empty()) {
3044 if (RegSuffix.consume_back(".l"))
3045 SubReg = AMDGPU::lo16;
3046 else if (RegSuffix.consume_back(".h"))
3047 SubReg = AMDGPU::hi16;
3048
3049 // Single 32-bit register: vXX.
3050 if (!getRegNum(RegSuffix, RegNum)) {
3051 Error(Loc, "invalid register index");
3052 return MCRegister();
3053 }
3054 RegWidth = 32;
3055 } else {
3056 // Range of registers: v[XX:YY]. ":YY" is optional.
3057 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3058 return MCRegister();
3059 }
3060
3061 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3062}
3063
3064MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3065 unsigned &RegNum, unsigned &RegWidth,
3066 SmallVectorImpl<AsmToken> &Tokens) {
3067 MCRegister Reg;
3068 auto ListLoc = getLoc();
3069
3070 if (!skipToken(AsmToken::LBrac,
3071 "expected a register or a list of registers")) {
3072 return MCRegister();
3073 }
3074
3075 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3076
3077 auto Loc = getLoc();
3078 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3079 return MCRegister();
3080 if (RegWidth != 32) {
3081 Error(Loc, "expected a single 32-bit register");
3082 return MCRegister();
3083 }
3084
3085 for (; trySkipToken(AsmToken::Comma); ) {
3086 RegisterKind NextRegKind;
3087 MCRegister NextReg;
3088 unsigned NextRegNum, NextRegWidth;
3089 Loc = getLoc();
3090
3091 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3092 NextRegNum, NextRegWidth,
3093 Tokens)) {
3094 return MCRegister();
3095 }
3096 if (NextRegWidth != 32) {
3097 Error(Loc, "expected a single 32-bit register");
3098 return MCRegister();
3099 }
3100 if (NextRegKind != RegKind) {
3101 Error(Loc, "registers in a list must be of the same kind");
3102 return MCRegister();
3103 }
3104 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3105 return MCRegister();
3106 }
3107
3108 if (!skipToken(AsmToken::RBrac,
3109 "expected a comma or a closing square bracket")) {
3110 return MCRegister();
3111 }
3112
3113 if (isRegularReg(RegKind))
3114 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3115
3116 return Reg;
3117}
3118
3119bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3120 MCRegister &Reg, unsigned &RegNum,
3121 unsigned &RegWidth,
3122 SmallVectorImpl<AsmToken> &Tokens) {
3123 auto Loc = getLoc();
3124 Reg = MCRegister();
3125
3126 if (isToken(AsmToken::Identifier)) {
3127 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3128 if (!Reg)
3129 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3130 } else {
3131 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3132 }
3133
3134 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3135 if (!Reg) {
3136 assert(Parser.hasPendingError());
3137 return false;
3138 }
3139
3140 if (!subtargetHasRegister(*TRI, Reg)) {
3141 if (Reg == AMDGPU::SGPR_NULL) {
3142 Error(Loc, "'null' operand is not supported on this GPU");
3143 } else {
3145 " register not available on this GPU");
3146 }
3147 return false;
3148 }
3149
3150 return true;
3151}
3152
3153bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3154 MCRegister &Reg, unsigned &RegNum,
3155 unsigned &RegWidth,
3156 bool RestoreOnFailure /*=false*/) {
3157 Reg = MCRegister();
3158
3160 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3161 if (RestoreOnFailure) {
3162 while (!Tokens.empty()) {
3163 getLexer().UnLex(Tokens.pop_back_val());
3164 }
3165 }
3166 return true;
3167 }
3168 return false;
3169}
3170
3171std::optional<StringRef>
3172AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3173 switch (RegKind) {
3174 case IS_VGPR:
3175 return StringRef(".amdgcn.next_free_vgpr");
3176 case IS_SGPR:
3177 return StringRef(".amdgcn.next_free_sgpr");
3178 default:
3179 return std::nullopt;
3180 }
3181}
3182
3183void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3184 auto SymbolName = getGprCountSymbolName(RegKind);
3185 assert(SymbolName && "initializing invalid register kind");
3186 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3188 Sym->setRedefinable(true);
3189}
3190
3191bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3192 unsigned DwordRegIndex,
3193 unsigned RegWidth) {
3194 // Symbols are only defined for GCN targets
3195 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3196 return true;
3197
3198 auto SymbolName = getGprCountSymbolName(RegKind);
3199 if (!SymbolName)
3200 return true;
3201 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3202
3203 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3204 int64_t OldCount;
3205
3206 if (!Sym->isVariable())
3207 return !Error(getLoc(),
3208 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3209 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3210 return !Error(
3211 getLoc(),
3212 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3213
3214 if (OldCount <= NewMax)
3216
3217 return true;
3218}
3219
3220std::unique_ptr<AMDGPUOperand>
3221AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3222 const auto &Tok = getToken();
3223 SMLoc StartLoc = Tok.getLoc();
3224 SMLoc EndLoc = Tok.getEndLoc();
3225 RegisterKind RegKind;
3226 MCRegister Reg;
3227 unsigned RegNum, RegWidth;
3228
3229 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3230 return nullptr;
3231 }
3232 if (isHsaAbi(getSTI())) {
3233 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3234 return nullptr;
3235 } else
3236 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3237 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3238}
3239
3240ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3241 bool HasSP3AbsModifier, LitModifier Lit) {
3242 // TODO: add syntactic sugar for 1/(2*PI)
3243
3244 if (isRegister() || isModifier())
3245 return ParseStatus::NoMatch;
3246
3247 if (Lit == LitModifier::None) {
3248 if (trySkipId("lit"))
3249 Lit = LitModifier::Lit;
3250 else if (trySkipId("lit64"))
3251 Lit = LitModifier::Lit64;
3252
3253 if (Lit != LitModifier::None) {
3254 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3255 return ParseStatus::Failure;
3256 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, Lit);
3257 if (S.isSuccess() &&
3258 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3259 return ParseStatus::Failure;
3260 return S;
3261 }
3262 }
3263
3264 const auto& Tok = getToken();
3265 const auto& NextTok = peekToken();
3266 bool IsReal = Tok.is(AsmToken::Real);
3267 SMLoc S = getLoc();
3268 bool Negate = false;
3269
3270 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3271 lex();
3272 IsReal = true;
3273 Negate = true;
3274 }
3275
3276 AMDGPUOperand::Modifiers Mods;
3277 Mods.Lit = Lit;
3278
3279 if (IsReal) {
3280 // Floating-point expressions are not supported.
3281 // Can only allow floating-point literals with an
3282 // optional sign.
3283
3284 StringRef Num = getTokenStr();
3285 lex();
3286
3287 APFloat RealVal(APFloat::IEEEdouble());
3288 auto roundMode = APFloat::rmNearestTiesToEven;
3289 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3290 return ParseStatus::Failure;
3291 if (Negate)
3292 RealVal.changeSign();
3293
3294 Operands.push_back(
3295 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3296 AMDGPUOperand::ImmTyNone, true));
3297 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3298 Op.setModifiers(Mods);
3299
3300 return ParseStatus::Success;
3301
3302 } else {
3303 int64_t IntVal;
3304 const MCExpr *Expr;
3305 SMLoc S = getLoc();
3306
3307 if (HasSP3AbsModifier) {
3308 // This is a workaround for handling expressions
3309 // as arguments of SP3 'abs' modifier, for example:
3310 // |1.0|
3311 // |-1|
3312 // |1+x|
3313 // This syntax is not compatible with syntax of standard
3314 // MC expressions (due to the trailing '|').
3315 SMLoc EndLoc;
3316 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3317 return ParseStatus::Failure;
3318 } else {
3319 if (Parser.parseExpression(Expr))
3320 return ParseStatus::Failure;
3321 }
3322
3323 if (Expr->evaluateAsAbsolute(IntVal)) {
3324 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3325 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3326 Op.setModifiers(Mods);
3327 } else {
3328 if (Lit != LitModifier::None)
3329 return ParseStatus::NoMatch;
3330 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3331 }
3332
3333 return ParseStatus::Success;
3334 }
3335
3336 return ParseStatus::NoMatch;
3337}
3338
3339ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3340 if (!isRegister())
3341 return ParseStatus::NoMatch;
3342
3343 if (auto R = parseRegister()) {
3344 assert(R->isReg());
3345 Operands.push_back(std::move(R));
3346 return ParseStatus::Success;
3347 }
3348 return ParseStatus::Failure;
3349}
3350
3351ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3352 bool HasSP3AbsMod, LitModifier Lit) {
3353 ParseStatus Res = parseReg(Operands);
3354 if (!Res.isNoMatch())
3355 return Res;
3356 if (isModifier())
3357 return ParseStatus::NoMatch;
3358 return parseImm(Operands, HasSP3AbsMod, Lit);
3359}
3360
3361bool
3362AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3363 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3364 const auto &str = Token.getString();
3365 return str == "abs" || str == "neg" || str == "sext";
3366 }
3367 return false;
3368}
3369
3370bool
3371AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3372 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3373}
3374
3375bool
3376AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3377 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3378}
3379
3380bool
3381AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3382 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3383}
3384
3385// Check if this is an operand modifier or an opcode modifier
3386// which may look like an expression but it is not. We should
3387// avoid parsing these modifiers as expressions. Currently
3388// recognized sequences are:
3389// |...|
3390// abs(...)
3391// neg(...)
3392// sext(...)
3393// -reg
3394// -|...|
3395// -abs(...)
3396// name:...
3397//
3398bool
3399AMDGPUAsmParser::isModifier() {
3400
3401 AsmToken Tok = getToken();
3402 AsmToken NextToken[2];
3403 peekTokens(NextToken);
3404
3405 return isOperandModifier(Tok, NextToken[0]) ||
3406 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3407 isOpcodeModifierWithVal(Tok, NextToken[0]);
3408}
3409
3410// Check if the current token is an SP3 'neg' modifier.
3411// Currently this modifier is allowed in the following context:
3412//
3413// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3414// 2. Before an 'abs' modifier: -abs(...)
3415// 3. Before an SP3 'abs' modifier: -|...|
3416//
3417// In all other cases "-" is handled as a part
3418// of an expression that follows the sign.
3419//
3420// Note: When "-" is followed by an integer literal,
3421// this is interpreted as integer negation rather
3422// than a floating-point NEG modifier applied to N.
3423// Beside being contr-intuitive, such use of floating-point
3424// NEG modifier would have resulted in different meaning
3425// of integer literals used with VOP1/2/C and VOP3,
3426// for example:
3427// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3428// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3429// Negative fp literals with preceding "-" are
3430// handled likewise for uniformity
3431//
3432bool
3433AMDGPUAsmParser::parseSP3NegModifier() {
3434
3435 AsmToken NextToken[2];
3436 peekTokens(NextToken);
3437
3438 if (isToken(AsmToken::Minus) &&
3439 (isRegister(NextToken[0], NextToken[1]) ||
3440 NextToken[0].is(AsmToken::Pipe) ||
3441 isId(NextToken[0], "abs"))) {
3442 lex();
3443 return true;
3444 }
3445
3446 return false;
3447}
3448
3449ParseStatus
3450AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3451 bool AllowImm) {
3452 bool Neg, SP3Neg;
3453 bool Abs, SP3Abs;
3454 SMLoc Loc;
3455
3456 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3457 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3458 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3459
3460 SP3Neg = parseSP3NegModifier();
3461
3462 Loc = getLoc();
3463 Neg = trySkipId("neg");
3464 if (Neg && SP3Neg)
3465 return Error(Loc, "expected register or immediate");
3466 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3467 return ParseStatus::Failure;
3468
3469 Abs = trySkipId("abs");
3470 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3471 return ParseStatus::Failure;
3472
3473 LitModifier Lit = LitModifier::None;
3474 if (trySkipId("lit")) {
3475 Lit = LitModifier::Lit;
3476 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3477 return ParseStatus::Failure;
3478 } else if (trySkipId("lit64")) {
3479 Lit = LitModifier::Lit64;
3480 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3481 return ParseStatus::Failure;
3482 if (!has64BitLiterals())
3483 return Error(Loc, "lit64 is not supported on this GPU");
3484 }
3485
3486 Loc = getLoc();
3487 SP3Abs = trySkipToken(AsmToken::Pipe);
3488 if (Abs && SP3Abs)
3489 return Error(Loc, "expected register or immediate");
3490
3491 ParseStatus Res;
3492 if (AllowImm) {
3493 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3494 } else {
3495 Res = parseReg(Operands);
3496 }
3497 if (!Res.isSuccess())
3498 return (SP3Neg || Neg || SP3Abs || Abs || Lit != LitModifier::None)
3500 : Res;
3501
3502 if (Lit != LitModifier::None && !Operands.back()->isImm())
3503 Error(Loc, "expected immediate with lit modifier");
3504
3505 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3506 return ParseStatus::Failure;
3507 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3508 return ParseStatus::Failure;
3509 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3510 return ParseStatus::Failure;
3511 if (Lit != LitModifier::None &&
3512 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3513 return ParseStatus::Failure;
3514
3515 AMDGPUOperand::Modifiers Mods;
3516 Mods.Abs = Abs || SP3Abs;
3517 Mods.Neg = Neg || SP3Neg;
3518 Mods.Lit = Lit;
3519
3520 if (Mods.hasFPModifiers() || Lit != LitModifier::None) {
3521 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3522 if (Op.isExpr())
3523 return Error(Op.getStartLoc(), "expected an absolute expression");
3524 Op.setModifiers(Mods);
3525 }
3526 return ParseStatus::Success;
3527}
3528
3529ParseStatus
3530AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3531 bool AllowImm) {
3532 bool Sext = trySkipId("sext");
3533 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3534 return ParseStatus::Failure;
3535
3536 ParseStatus Res;
3537 if (AllowImm) {
3538 Res = parseRegOrImm(Operands);
3539 } else {
3540 Res = parseReg(Operands);
3541 }
3542 if (!Res.isSuccess())
3543 return Sext ? ParseStatus::Failure : Res;
3544
3545 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3546 return ParseStatus::Failure;
3547
3548 AMDGPUOperand::Modifiers Mods;
3549 Mods.Sext = Sext;
3550
3551 if (Mods.hasIntModifiers()) {
3552 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3553 if (Op.isExpr())
3554 return Error(Op.getStartLoc(), "expected an absolute expression");
3555 Op.setModifiers(Mods);
3556 }
3557
3558 return ParseStatus::Success;
3559}
3560
3561ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3562 return parseRegOrImmWithFPInputMods(Operands, false);
3563}
3564
3565ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3566 return parseRegOrImmWithIntInputMods(Operands, false);
3567}
3568
3569ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3570 auto Loc = getLoc();
3571 if (trySkipId("off")) {
3572 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3573 AMDGPUOperand::ImmTyOff, false));
3574 return ParseStatus::Success;
3575 }
3576
3577 if (!isRegister())
3578 return ParseStatus::NoMatch;
3579
3580 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3581 if (Reg) {
3582 Operands.push_back(std::move(Reg));
3583 return ParseStatus::Success;
3584 }
3585
3586 return ParseStatus::Failure;
3587}
3588
3589unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3590 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3591
3592 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3593 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3594 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3595 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3596 return Match_InvalidOperand;
3597
3598 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3599 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3600 // v_mac_f32/16 allow only dst_sel == DWORD;
3601 auto OpNum =
3602 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3603 const auto &Op = Inst.getOperand(OpNum);
3604 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3605 return Match_InvalidOperand;
3606 }
3607 }
3608
3609 // Asm can first try to match VOPD or VOPD3. By failing early here with
3610 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3611 // Checking later during validateInstruction does not give a chance to retry
3612 // parsing as a different encoding.
3613 if (tryAnotherVOPDEncoding(Inst))
3614 return Match_InvalidOperand;
3615
3616 return Match_Success;
3617}
3618
3628
3629// What asm variants we should check
3630ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3631 if (isForcedDPP() && isForcedVOP3()) {
3632 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3633 return ArrayRef(Variants);
3634 }
3635 if (getForcedEncodingSize() == 32) {
3636 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3637 return ArrayRef(Variants);
3638 }
3639
3640 if (isForcedVOP3()) {
3641 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3642 return ArrayRef(Variants);
3643 }
3644
3645 if (isForcedSDWA()) {
3646 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3648 return ArrayRef(Variants);
3649 }
3650
3651 if (isForcedDPP()) {
3652 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3653 return ArrayRef(Variants);
3654 }
3655
3656 return getAllVariants();
3657}
3658
3659StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3660 if (isForcedDPP() && isForcedVOP3())
3661 return "e64_dpp";
3662
3663 if (getForcedEncodingSize() == 32)
3664 return "e32";
3665
3666 if (isForcedVOP3())
3667 return "e64";
3668
3669 if (isForcedSDWA())
3670 return "sdwa";
3671
3672 if (isForcedDPP())
3673 return "dpp";
3674
3675 return "";
3676}
3677
3678MCRegister
3679AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3680 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3681 for (MCPhysReg Reg : Desc.implicit_uses()) {
3682 switch (Reg) {
3683 case AMDGPU::FLAT_SCR:
3684 case AMDGPU::VCC:
3685 case AMDGPU::VCC_LO:
3686 case AMDGPU::VCC_HI:
3687 case AMDGPU::M0:
3688 return Reg;
3689 default:
3690 break;
3691 }
3692 }
3693 return MCRegister();
3694}
3695
3696// NB: This code is correct only when used to check constant
3697// bus limitations because GFX7 support no f16 inline constants.
3698// Note that there are no cases when a GFX7 opcode violates
3699// constant bus limitations due to the use of an f16 constant.
3700bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3701 unsigned OpIdx) const {
3702 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3703
3706 return false;
3707 }
3708
3709 const MCOperand &MO = Inst.getOperand(OpIdx);
3710
3711 int64_t Val = MO.isImm() ? MO.getImm() : getLitValue(MO.getExpr());
3712 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3713
3714 switch (OpSize) { // expected operand size
3715 case 8:
3716 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3717 case 4:
3718 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3719 case 2: {
3720 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3723 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3724
3728
3732
3735
3739
3742 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3743
3746 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3747
3749 return false;
3750
3751 llvm_unreachable("invalid operand type");
3752 }
3753 default:
3754 llvm_unreachable("invalid operand size");
3755 }
3756}
3757
3758unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3759 if (!isGFX10Plus())
3760 return 1;
3761
3762 switch (Opcode) {
3763 // 64-bit shift instructions can use only one scalar value input
3764 case AMDGPU::V_LSHLREV_B64_e64:
3765 case AMDGPU::V_LSHLREV_B64_gfx10:
3766 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3767 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3768 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3769 case AMDGPU::V_LSHRREV_B64_e64:
3770 case AMDGPU::V_LSHRREV_B64_gfx10:
3771 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3772 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3773 case AMDGPU::V_ASHRREV_I64_e64:
3774 case AMDGPU::V_ASHRREV_I64_gfx10:
3775 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3776 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3777 case AMDGPU::V_LSHL_B64_e64:
3778 case AMDGPU::V_LSHR_B64_e64:
3779 case AMDGPU::V_ASHR_I64_e64:
3780 return 1;
3781 default:
3782 return 2;
3783 }
3784}
3785
3786constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3788
3789// Get regular operand indices in the same order as specified
3790// in the instruction (but append mandatory literals to the end).
3792 bool AddMandatoryLiterals = false) {
3793
3794 int16_t ImmIdx =
3795 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3796
3797 if (isVOPD(Opcode)) {
3798 int16_t ImmXIdx =
3799 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3800
3801 return {getNamedOperandIdx(Opcode, OpName::src0X),
3802 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3803 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3804 getNamedOperandIdx(Opcode, OpName::src0Y),
3805 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3806 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3807 ImmXIdx,
3808 ImmIdx};
3809 }
3810
3811 return {getNamedOperandIdx(Opcode, OpName::src0),
3812 getNamedOperandIdx(Opcode, OpName::src1),
3813 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3814}
3815
3816bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3817 const MCOperand &MO = Inst.getOperand(OpIdx);
3818 if (MO.isImm())
3819 return !isInlineConstant(Inst, OpIdx);
3820 if (MO.isReg()) {
3821 auto Reg = MO.getReg();
3822 if (!Reg)
3823 return false;
3824 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3825 auto PReg = mc2PseudoReg(Reg);
3826 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3827 }
3828 return true;
3829}
3830
3831// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3832// Writelane is special in that it can use SGPR and M0 (which would normally
3833// count as using the constant bus twice - but in this case it is allowed since
3834// the lane selector doesn't count as a use of the constant bus). However, it is
3835// still required to abide by the 1 SGPR rule.
3836static bool checkWriteLane(const MCInst &Inst) {
3837 const unsigned Opcode = Inst.getOpcode();
3838 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3839 return false;
3840 const MCOperand &LaneSelOp = Inst.getOperand(2);
3841 if (!LaneSelOp.isReg())
3842 return false;
3843 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3844 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3845}
3846
3847bool AMDGPUAsmParser::validateConstantBusLimitations(
3848 const MCInst &Inst, const OperandVector &Operands) {
3849 const unsigned Opcode = Inst.getOpcode();
3850 const MCInstrDesc &Desc = MII.get(Opcode);
3851 MCRegister LastSGPR;
3852 unsigned ConstantBusUseCount = 0;
3853 unsigned NumLiterals = 0;
3854 unsigned LiteralSize;
3855
3856 if (!(Desc.TSFlags &
3859 !isVOPD(Opcode))
3860 return true;
3861
3862 if (checkWriteLane(Inst))
3863 return true;
3864
3865 // Check special imm operands (used by madmk, etc)
3866 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3867 ++NumLiterals;
3868 LiteralSize = 4;
3869 }
3870
3871 SmallDenseSet<MCRegister> SGPRsUsed;
3872 MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3873 if (SGPRUsed) {
3874 SGPRsUsed.insert(SGPRUsed);
3875 ++ConstantBusUseCount;
3876 }
3877
3878 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3879
3880 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3881
3882 for (int OpIdx : OpIndices) {
3883 if (OpIdx == -1)
3884 continue;
3885
3886 const MCOperand &MO = Inst.getOperand(OpIdx);
3887 if (usesConstantBus(Inst, OpIdx)) {
3888 if (MO.isReg()) {
3889 LastSGPR = mc2PseudoReg(MO.getReg());
3890 // Pairs of registers with a partial intersections like these
3891 // s0, s[0:1]
3892 // flat_scratch_lo, flat_scratch
3893 // flat_scratch_lo, flat_scratch_hi
3894 // are theoretically valid but they are disabled anyway.
3895 // Note that this code mimics SIInstrInfo::verifyInstruction
3896 if (SGPRsUsed.insert(LastSGPR).second) {
3897 ++ConstantBusUseCount;
3898 }
3899 } else { // Expression or a literal
3900
3901 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3902 continue; // special operand like VINTERP attr_chan
3903
3904 // An instruction may use only one literal.
3905 // This has been validated on the previous step.
3906 // See validateVOPLiteral.
3907 // This literal may be used as more than one operand.
3908 // If all these operands are of the same size,
3909 // this literal counts as one scalar value.
3910 // Otherwise it counts as 2 scalar values.
3911 // See "GFX10 Shader Programming", section 3.6.2.3.
3912
3914 if (Size < 4)
3915 Size = 4;
3916
3917 if (NumLiterals == 0) {
3918 NumLiterals = 1;
3919 LiteralSize = Size;
3920 } else if (LiteralSize != Size) {
3921 NumLiterals = 2;
3922 }
3923 }
3924 }
3925
3926 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3927 Error(getOperandLoc(Operands, OpIdx),
3928 "invalid operand (violates constant bus restrictions)");
3929 return false;
3930 }
3931 }
3932 return true;
3933}
3934
3935std::optional<unsigned>
3936AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
3937
3938 const unsigned Opcode = Inst.getOpcode();
3939 if (!isVOPD(Opcode))
3940 return {};
3941
3942 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3943
3944 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3945 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3946 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3947 ? Opr.getReg()
3948 : MCRegister();
3949 };
3950
3951 // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3952 // source-cache.
3953 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3954 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3955 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
3956 bool AllowSameVGPR = isGFX1250();
3957
3958 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3959 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3960 int I = getNamedOperandIdx(Opcode, OpName);
3961 const MCOperand &Op = Inst.getOperand(I);
3962 if (!Op.isImm())
3963 continue;
3964 int64_t Imm = Op.getImm();
3965 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
3966 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
3967 return (unsigned)I;
3968 }
3969
3970 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3971 OpName::vsrc2Y, OpName::imm}) {
3972 int I = getNamedOperandIdx(Opcode, OpName);
3973 if (I == -1)
3974 continue;
3975 const MCOperand &Op = Inst.getOperand(I);
3976 if (Op.isImm())
3977 return (unsigned)I;
3978 }
3979 }
3980
3981 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3982 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
3983 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
3984
3985 return InvalidCompOprIdx;
3986}
3987
3988bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
3989 const OperandVector &Operands) {
3990
3991 unsigned Opcode = Inst.getOpcode();
3992 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
3993
3994 if (AsVOPD3) {
3995 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
3996 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
3997 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
3998 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
3999 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
4000 }
4001 }
4002
4003 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4004 if (!InvalidCompOprIdx.has_value())
4005 return true;
4006
4007 auto CompOprIdx = *InvalidCompOprIdx;
4008 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4009 auto ParsedIdx =
4010 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
4011 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4012 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4013
4014 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4015 if (CompOprIdx == VOPD::Component::DST) {
4016 if (AsVOPD3)
4017 Error(Loc, "dst registers must be distinct");
4018 else
4019 Error(Loc, "one dst register must be even and the other odd");
4020 } else {
4021 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4022 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
4023 " operands must use different VGPR banks");
4024 }
4025
4026 return false;
4027}
4028
4029// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4030// potentially used as VOPD3 with the same operands.
4031bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4032 // First check if it fits VOPD
4033 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4034 if (!InvalidCompOprIdx.has_value())
4035 return false;
4036
4037 // Then if it fits VOPD3
4038 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4039 if (InvalidCompOprIdx.has_value()) {
4040 // If failed operand is dst it is better to show error about VOPD3
4041 // instruction as it has more capabilities and error message will be
4042 // more informative. If the dst is not legal for VOPD3, then it is not
4043 // legal for VOPD either.
4044 if (*InvalidCompOprIdx == VOPD::Component::DST)
4045 return true;
4046
4047 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4048 // with a conflict in tied implicit src2 of fmac and no asm operand to
4049 // to point to.
4050 return false;
4051 }
4052 return true;
4053}
4054
4055// \returns true is a VOPD3 instruction can be also represented as a shorter
4056// VOPD encoding.
4057bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4058 const unsigned Opcode = Inst.getOpcode();
4059 const auto &II = getVOPDInstInfo(Opcode, &MII);
4060 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4061 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4062 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4063 return false;
4064
4065 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4066 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4067 // be parsed as VOPD which does not accept src2.
4068 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4069 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4070 return false;
4071
4072 // If any modifiers are set this cannot be VOPD.
4073 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4074 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4075 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4076 int I = getNamedOperandIdx(Opcode, OpName);
4077 if (I == -1)
4078 continue;
4079 if (Inst.getOperand(I).getImm())
4080 return false;
4081 }
4082
4083 return !tryVOPD3(Inst);
4084}
4085
4086// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4087// form but switch to VOPD3 otherwise.
4088bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4089 const unsigned Opcode = Inst.getOpcode();
4090 if (!isGFX1250() || !isVOPD(Opcode))
4091 return false;
4092
4093 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4094 return tryVOPD(Inst);
4095 return tryVOPD3(Inst);
4096}
4097
4098bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4099
4100 const unsigned Opc = Inst.getOpcode();
4101 const MCInstrDesc &Desc = MII.get(Opc);
4102
4103 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4104 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4105 assert(ClampIdx != -1);
4106 return Inst.getOperand(ClampIdx).getImm() == 0;
4107 }
4108
4109 return true;
4110}
4111
4114
4115bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) {
4116
4117 const unsigned Opc = Inst.getOpcode();
4118 const MCInstrDesc &Desc = MII.get(Opc);
4119
4120 if ((Desc.TSFlags & MIMGFlags) == 0)
4121 return true;
4122
4123 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4124 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4125 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4126
4127 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4128 return true;
4129
4130 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4131 return true;
4132
4133 unsigned VDataSize = getRegOperandSize(Desc, VDataIdx);
4134 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4135 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4136 if (DMask == 0)
4137 DMask = 1;
4138
4139 bool IsPackedD16 = false;
4140 unsigned DataSize =
4141 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4142 if (hasPackedD16()) {
4143 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4144 IsPackedD16 = D16Idx >= 0;
4145 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4146 DataSize = (DataSize + 1) / 2;
4147 }
4148
4149 if ((VDataSize / 4) == DataSize + TFESize)
4150 return true;
4151
4152 StringRef Modifiers;
4153 if (isGFX90A())
4154 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4155 else
4156 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4157
4158 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4159 return false;
4160}
4161
4162bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
4163 const unsigned Opc = Inst.getOpcode();
4164 const MCInstrDesc &Desc = MII.get(Opc);
4165
4166 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4167 return true;
4168
4169 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4170
4171 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4173 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4174 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4175 ? AMDGPU::OpName::srsrc
4176 : AMDGPU::OpName::rsrc;
4177 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4178 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4179 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4180
4181 assert(VAddr0Idx != -1);
4182 assert(SrsrcIdx != -1);
4183 assert(SrsrcIdx > VAddr0Idx);
4184
4185 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4186 if (BaseOpcode->BVH) {
4187 if (IsA16 == BaseOpcode->A16)
4188 return true;
4189 Error(IDLoc, "image address size does not match a16");
4190 return false;
4191 }
4192
4193 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4194 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4195 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4196 unsigned ActualAddrSize =
4197 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(Desc, VAddr0Idx) / 4;
4198
4199 unsigned ExpectedAddrSize =
4200 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4201
4202 if (IsNSA) {
4203 if (hasPartialNSAEncoding() &&
4204 ExpectedAddrSize >
4206 int VAddrLastIdx = SrsrcIdx - 1;
4207 unsigned VAddrLastSize = getRegOperandSize(Desc, VAddrLastIdx) / 4;
4208
4209 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4210 }
4211 } else {
4212 if (ExpectedAddrSize > 12)
4213 ExpectedAddrSize = 16;
4214
4215 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4216 // This provides backward compatibility for assembly created
4217 // before 160b/192b/224b types were directly supported.
4218 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4219 return true;
4220 }
4221
4222 if (ActualAddrSize == ExpectedAddrSize)
4223 return true;
4224
4225 Error(IDLoc, "image address size does not match dim and a16");
4226 return false;
4227}
4228
4229bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4230
4231 const unsigned Opc = Inst.getOpcode();
4232 const MCInstrDesc &Desc = MII.get(Opc);
4233
4234 if ((Desc.TSFlags & MIMGFlags) == 0)
4235 return true;
4236 if (!Desc.mayLoad() || !Desc.mayStore())
4237 return true; // Not atomic
4238
4239 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4240 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4241
4242 // This is an incomplete check because image_atomic_cmpswap
4243 // may only use 0x3 and 0xf while other atomic operations
4244 // may use 0x1 and 0x3. However these limitations are
4245 // verified when we check that dmask matches dst size.
4246 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4247}
4248
4249bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4250
4251 const unsigned Opc = Inst.getOpcode();
4252 const MCInstrDesc &Desc = MII.get(Opc);
4253
4254 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4255 return true;
4256
4257 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4258 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4259
4260 // GATHER4 instructions use dmask in a different fashion compared to
4261 // other MIMG instructions. The only useful DMASK values are
4262 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4263 // (red,red,red,red) etc.) The ISA document doesn't mention
4264 // this.
4265 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4266}
4267
4268bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4269 const OperandVector &Operands) {
4270 if (!isGFX10Plus())
4271 return true;
4272
4273 const unsigned Opc = Inst.getOpcode();
4274 const MCInstrDesc &Desc = MII.get(Opc);
4275
4276 if ((Desc.TSFlags & MIMGFlags) == 0)
4277 return true;
4278
4279 // image_bvh_intersect_ray instructions do not have dim
4281 return true;
4282
4283 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4284 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4285 if (Op.isDim())
4286 return true;
4287 }
4288 return false;
4289}
4290
4291bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4292 const unsigned Opc = Inst.getOpcode();
4293 const MCInstrDesc &Desc = MII.get(Opc);
4294
4295 if ((Desc.TSFlags & MIMGFlags) == 0)
4296 return true;
4297
4298 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4299 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4301
4302 if (!BaseOpcode->MSAA)
4303 return true;
4304
4305 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4306 assert(DimIdx != -1);
4307
4308 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4309 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4310
4311 return DimInfo->MSAA;
4312}
4313
4314static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4315{
4316 switch (Opcode) {
4317 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4318 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4319 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4320 return true;
4321 default:
4322 return false;
4323 }
4324}
4325
4326// movrels* opcodes should only allow VGPRS as src0.
4327// This is specified in .td description for vop1/vop3,
4328// but sdwa is handled differently. See isSDWAOperand.
4329bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4330 const OperandVector &Operands) {
4331
4332 const unsigned Opc = Inst.getOpcode();
4333 const MCInstrDesc &Desc = MII.get(Opc);
4334
4335 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4336 return true;
4337
4338 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4339 assert(Src0Idx != -1);
4340
4341 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4342 if (Src0.isReg()) {
4343 auto Reg = mc2PseudoReg(Src0.getReg());
4344 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4345 if (!isSGPR(Reg, TRI))
4346 return true;
4347 }
4348
4349 Error(getOperandLoc(Operands, Src0Idx), "source operand must be a VGPR");
4350 return false;
4351}
4352
4353bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4354 const OperandVector &Operands) {
4355
4356 const unsigned Opc = Inst.getOpcode();
4357
4358 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4359 return true;
4360
4361 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4362 assert(Src0Idx != -1);
4363
4364 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4365 if (!Src0.isReg())
4366 return true;
4367
4368 auto Reg = mc2PseudoReg(Src0.getReg());
4369 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4370 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4371 Error(getOperandLoc(Operands, Src0Idx),
4372 "source operand must be either a VGPR or an inline constant");
4373 return false;
4374 }
4375
4376 return true;
4377}
4378
4379bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4380 const OperandVector &Operands) {
4381 unsigned Opcode = Inst.getOpcode();
4382 const MCInstrDesc &Desc = MII.get(Opcode);
4383
4384 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4385 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4386 return true;
4387
4388 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4389 if (Src2Idx == -1)
4390 return true;
4391
4392 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4393 Error(getOperandLoc(Operands, Src2Idx),
4394 "inline constants are not allowed for this operand");
4395 return false;
4396 }
4397
4398 return true;
4399}
4400
4401bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4402 const OperandVector &Operands) {
4403 const unsigned Opc = Inst.getOpcode();
4404 const MCInstrDesc &Desc = MII.get(Opc);
4405
4406 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4407 return true;
4408
4409 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4410 if (BlgpIdx != -1) {
4411 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4412 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4413
4414 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4415 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4416
4417 // Validate the correct register size was used for the floating point
4418 // format operands
4419
4420 bool Success = true;
4421 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4422 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4423 Error(getOperandLoc(Operands, Src0Idx),
4424 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4425 Success = false;
4426 }
4427
4428 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4429 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4430 Error(getOperandLoc(Operands, Src1Idx),
4431 "wrong register tuple size for blgp value " + Twine(BLGP));
4432 Success = false;
4433 }
4434
4435 return Success;
4436 }
4437 }
4438
4439 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4440 if (Src2Idx == -1)
4441 return true;
4442
4443 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4444 if (!Src2.isReg())
4445 return true;
4446
4447 MCRegister Src2Reg = Src2.getReg();
4448 MCRegister DstReg = Inst.getOperand(0).getReg();
4449 if (Src2Reg == DstReg)
4450 return true;
4451
4452 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4453 if (TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[0], HwMode))
4454 .getSizeInBits() <= 128)
4455 return true;
4456
4457 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4458 Error(getOperandLoc(Operands, Src2Idx),
4459 "source 2 operand must not partially overlap with dst");
4460 return false;
4461 }
4462
4463 return true;
4464}
4465
4466bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4467 switch (Inst.getOpcode()) {
4468 default:
4469 return true;
4470 case V_DIV_SCALE_F32_gfx6_gfx7:
4471 case V_DIV_SCALE_F32_vi:
4472 case V_DIV_SCALE_F32_gfx10:
4473 case V_DIV_SCALE_F64_gfx6_gfx7:
4474 case V_DIV_SCALE_F64_vi:
4475 case V_DIV_SCALE_F64_gfx10:
4476 break;
4477 }
4478
4479 // TODO: Check that src0 = src1 or src2.
4480
4481 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4482 AMDGPU::OpName::src2_modifiers,
4483 AMDGPU::OpName::src2_modifiers}) {
4484 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4485 .getImm() &
4487 return false;
4488 }
4489 }
4490
4491 return true;
4492}
4493
4494bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4495
4496 const unsigned Opc = Inst.getOpcode();
4497 const MCInstrDesc &Desc = MII.get(Opc);
4498
4499 if ((Desc.TSFlags & MIMGFlags) == 0)
4500 return true;
4501
4502 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4503 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4504 if (isCI() || isSI())
4505 return false;
4506 }
4507
4508 return true;
4509}
4510
4511bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4512 const unsigned Opc = Inst.getOpcode();
4513 const MCInstrDesc &Desc = MII.get(Opc);
4514
4515 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4516 return true;
4517
4518 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4519
4520 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4521}
4522
4523static bool IsRevOpcode(const unsigned Opcode)
4524{
4525 switch (Opcode) {
4526 case AMDGPU::V_SUBREV_F32_e32:
4527 case AMDGPU::V_SUBREV_F32_e64:
4528 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4529 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4530 case AMDGPU::V_SUBREV_F32_e32_vi:
4531 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4532 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4533 case AMDGPU::V_SUBREV_F32_e64_vi:
4534
4535 case AMDGPU::V_SUBREV_CO_U32_e32:
4536 case AMDGPU::V_SUBREV_CO_U32_e64:
4537 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4538 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4539
4540 case AMDGPU::V_SUBBREV_U32_e32:
4541 case AMDGPU::V_SUBBREV_U32_e64:
4542 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4543 case AMDGPU::V_SUBBREV_U32_e32_vi:
4544 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4545 case AMDGPU::V_SUBBREV_U32_e64_vi:
4546
4547 case AMDGPU::V_SUBREV_U32_e32:
4548 case AMDGPU::V_SUBREV_U32_e64:
4549 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4550 case AMDGPU::V_SUBREV_U32_e32_vi:
4551 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4552 case AMDGPU::V_SUBREV_U32_e64_vi:
4553
4554 case AMDGPU::V_SUBREV_F16_e32:
4555 case AMDGPU::V_SUBREV_F16_e64:
4556 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4557 case AMDGPU::V_SUBREV_F16_e32_vi:
4558 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4559 case AMDGPU::V_SUBREV_F16_e64_vi:
4560
4561 case AMDGPU::V_SUBREV_U16_e32:
4562 case AMDGPU::V_SUBREV_U16_e64:
4563 case AMDGPU::V_SUBREV_U16_e32_vi:
4564 case AMDGPU::V_SUBREV_U16_e64_vi:
4565
4566 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4567 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4568 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4569
4570 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4571 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4572
4573 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4574 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4575
4576 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4577 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4578
4579 case AMDGPU::V_LSHRREV_B32_e32:
4580 case AMDGPU::V_LSHRREV_B32_e64:
4581 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4582 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4583 case AMDGPU::V_LSHRREV_B32_e32_vi:
4584 case AMDGPU::V_LSHRREV_B32_e64_vi:
4585 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4586 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4587
4588 case AMDGPU::V_ASHRREV_I32_e32:
4589 case AMDGPU::V_ASHRREV_I32_e64:
4590 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4591 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4592 case AMDGPU::V_ASHRREV_I32_e32_vi:
4593 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4594 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4595 case AMDGPU::V_ASHRREV_I32_e64_vi:
4596
4597 case AMDGPU::V_LSHLREV_B32_e32:
4598 case AMDGPU::V_LSHLREV_B32_e64:
4599 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4600 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4601 case AMDGPU::V_LSHLREV_B32_e32_vi:
4602 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4603 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4604 case AMDGPU::V_LSHLREV_B32_e64_vi:
4605
4606 case AMDGPU::V_LSHLREV_B16_e32:
4607 case AMDGPU::V_LSHLREV_B16_e64:
4608 case AMDGPU::V_LSHLREV_B16_e32_vi:
4609 case AMDGPU::V_LSHLREV_B16_e64_vi:
4610 case AMDGPU::V_LSHLREV_B16_gfx10:
4611
4612 case AMDGPU::V_LSHRREV_B16_e32:
4613 case AMDGPU::V_LSHRREV_B16_e64:
4614 case AMDGPU::V_LSHRREV_B16_e32_vi:
4615 case AMDGPU::V_LSHRREV_B16_e64_vi:
4616 case AMDGPU::V_LSHRREV_B16_gfx10:
4617
4618 case AMDGPU::V_ASHRREV_I16_e32:
4619 case AMDGPU::V_ASHRREV_I16_e64:
4620 case AMDGPU::V_ASHRREV_I16_e32_vi:
4621 case AMDGPU::V_ASHRREV_I16_e64_vi:
4622 case AMDGPU::V_ASHRREV_I16_gfx10:
4623
4624 case AMDGPU::V_LSHLREV_B64_e64:
4625 case AMDGPU::V_LSHLREV_B64_gfx10:
4626 case AMDGPU::V_LSHLREV_B64_vi:
4627
4628 case AMDGPU::V_LSHRREV_B64_e64:
4629 case AMDGPU::V_LSHRREV_B64_gfx10:
4630 case AMDGPU::V_LSHRREV_B64_vi:
4631
4632 case AMDGPU::V_ASHRREV_I64_e64:
4633 case AMDGPU::V_ASHRREV_I64_gfx10:
4634 case AMDGPU::V_ASHRREV_I64_vi:
4635
4636 case AMDGPU::V_PK_LSHLREV_B16:
4637 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4638 case AMDGPU::V_PK_LSHLREV_B16_vi:
4639
4640 case AMDGPU::V_PK_LSHRREV_B16:
4641 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4642 case AMDGPU::V_PK_LSHRREV_B16_vi:
4643 case AMDGPU::V_PK_ASHRREV_I16:
4644 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4645 case AMDGPU::V_PK_ASHRREV_I16_vi:
4646 return true;
4647 default:
4648 return false;
4649 }
4650}
4651
4652bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4653 const OperandVector &Operands) {
4654 using namespace SIInstrFlags;
4655 const unsigned Opcode = Inst.getOpcode();
4656 const MCInstrDesc &Desc = MII.get(Opcode);
4657
4658 // lds_direct register is defined so that it can be used
4659 // with 9-bit operands only. Ignore encodings which do not accept these.
4660 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4661 if ((Desc.TSFlags & Enc) == 0)
4662 return true;
4663
4664 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4665 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4666 if (SrcIdx == -1)
4667 break;
4668 const auto &Src = Inst.getOperand(SrcIdx);
4669 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4670
4671 if (isGFX90A() || isGFX11Plus()) {
4672 Error(getOperandLoc(Operands, SrcIdx),
4673 "lds_direct is not supported on this GPU");
4674 return false;
4675 }
4676
4677 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) {
4678 Error(getOperandLoc(Operands, SrcIdx),
4679 "lds_direct cannot be used with this instruction");
4680 return false;
4681 }
4682
4683 if (SrcName != OpName::src0) {
4684 Error(getOperandLoc(Operands, SrcIdx),
4685 "lds_direct may be used as src0 only");
4686 return false;
4687 }
4688 }
4689 }
4690
4691 return true;
4692}
4693
4694SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4695 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4696 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4697 if (Op.isFlatOffset())
4698 return Op.getStartLoc();
4699 }
4700 return getLoc();
4701}
4702
4703bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4704 const OperandVector &Operands) {
4705 auto Opcode = Inst.getOpcode();
4706 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4707 if (OpNum == -1)
4708 return true;
4709
4710 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4711 if ((TSFlags & SIInstrFlags::FLAT))
4712 return validateFlatOffset(Inst, Operands);
4713
4714 if ((TSFlags & SIInstrFlags::SMRD))
4715 return validateSMEMOffset(Inst, Operands);
4716
4717 const auto &Op = Inst.getOperand(OpNum);
4718 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4719 if (isGFX12Plus() &&
4720 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4721 const unsigned OffsetSize = 24;
4722 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
4723 Error(getFlatOffsetLoc(Operands),
4724 Twine("expected a ") + Twine(OffsetSize - 1) +
4725 "-bit unsigned offset for buffer ops");
4726 return false;
4727 }
4728 } else {
4729 const unsigned OffsetSize = 16;
4730 if (!isUIntN(OffsetSize, Op.getImm())) {
4731 Error(getFlatOffsetLoc(Operands),
4732 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4733 return false;
4734 }
4735 }
4736 return true;
4737}
4738
4739bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4740 const OperandVector &Operands) {
4741 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4742 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4743 return true;
4744
4745 auto Opcode = Inst.getOpcode();
4746 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4747 assert(OpNum != -1);
4748
4749 const auto &Op = Inst.getOperand(OpNum);
4750 if (!hasFlatOffsets() && Op.getImm() != 0) {
4751 Error(getFlatOffsetLoc(Operands),
4752 "flat offset modifier is not supported on this GPU");
4753 return false;
4754 }
4755
4756 // For pre-GFX12 FLAT instructions the offset must be positive;
4757 // MSB is ignored and forced to zero.
4758 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4759 bool AllowNegative =
4761 isGFX12Plus();
4762 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4763 Error(getFlatOffsetLoc(Operands),
4764 Twine("expected a ") +
4765 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4766 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4767 return false;
4768 }
4769
4770 return true;
4771}
4772
4773SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4774 // Start with second operand because SMEM Offset cannot be dst or src0.
4775 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4776 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4777 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4778 return Op.getStartLoc();
4779 }
4780 return getLoc();
4781}
4782
4783bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4784 const OperandVector &Operands) {
4785 if (isCI() || isSI())
4786 return true;
4787
4788 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4789 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4790 return true;
4791
4792 auto Opcode = Inst.getOpcode();
4793 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4794 if (OpNum == -1)
4795 return true;
4796
4797 const auto &Op = Inst.getOperand(OpNum);
4798 if (!Op.isImm())
4799 return true;
4800
4801 uint64_t Offset = Op.getImm();
4802 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4805 return true;
4806
4807 Error(getSMEMOffsetLoc(Operands),
4808 isGFX12Plus() && IsBuffer
4809 ? "expected a 23-bit unsigned offset for buffer ops"
4810 : isGFX12Plus() ? "expected a 24-bit signed offset"
4811 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4812 : "expected a 21-bit signed offset");
4813
4814 return false;
4815}
4816
4817bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4818 const OperandVector &Operands) {
4819 unsigned Opcode = Inst.getOpcode();
4820 const MCInstrDesc &Desc = MII.get(Opcode);
4821 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4822 return true;
4823
4824 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4825 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4826
4827 const int OpIndices[] = { Src0Idx, Src1Idx };
4828
4829 unsigned NumExprs = 0;
4830 unsigned NumLiterals = 0;
4831 int64_t LiteralValue;
4832
4833 for (int OpIdx : OpIndices) {
4834 if (OpIdx == -1) break;
4835
4836 const MCOperand &MO = Inst.getOperand(OpIdx);
4837 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4839 bool IsLit = false;
4840 std::optional<int64_t> Imm;
4841 if (MO.isImm()) {
4842 Imm = MO.getImm();
4843 } else if (MO.isExpr()) {
4844 if (isLitExpr(MO.getExpr())) {
4845 IsLit = true;
4846 Imm = getLitValue(MO.getExpr());
4847 }
4848 } else {
4849 continue;
4850 }
4851
4852 if (!Imm.has_value()) {
4853 ++NumExprs;
4854 } else if (!isInlineConstant(Inst, OpIdx)) {
4855 auto OpType = static_cast<AMDGPU::OperandType>(
4856 Desc.operands()[OpIdx].OperandType);
4857 int64_t Value = encode32BitLiteral(*Imm, OpType, IsLit);
4858 if (NumLiterals == 0 || LiteralValue != Value) {
4860 ++NumLiterals;
4861 }
4862 }
4863 }
4864 }
4865
4866 if (NumLiterals + NumExprs <= 1)
4867 return true;
4868
4869 Error(getOperandLoc(Operands, Src1Idx),
4870 "only one unique literal operand is allowed");
4871 return false;
4872}
4873
4874bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4875 const unsigned Opc = Inst.getOpcode();
4876 if (isPermlane16(Opc)) {
4877 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4878 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4879
4880 if (OpSel & ~3)
4881 return false;
4882 }
4883
4884 uint64_t TSFlags = MII.get(Opc).TSFlags;
4885
4886 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4887 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4888 if (OpSelIdx != -1) {
4889 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4890 return false;
4891 }
4892 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4893 if (OpSelHiIdx != -1) {
4894 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4895 return false;
4896 }
4897 }
4898
4899 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4900 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4901 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4902 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4903 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4904 if (OpSel & 3)
4905 return false;
4906 }
4907
4908 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4909 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4910 // the first SGPR and use it for both the low and high operations.
4911 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4912 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4913 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4914 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4915 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4916
4917 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4918 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4919 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4920 unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4921
4922 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4923
4924 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
4925 unsigned Mask = 1U << Index;
4926 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
4927 };
4928
4929 if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
4930 !VerifyOneSGPR(/*Index=*/0))
4931 return false;
4932 if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
4933 !VerifyOneSGPR(/*Index=*/1))
4934 return false;
4935
4936 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4937 if (Src2Idx != -1) {
4938 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4939 if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
4940 !VerifyOneSGPR(/*Index=*/2))
4941 return false;
4942 }
4943 }
4944
4945 return true;
4946}
4947
4948bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4949 if (!hasTrue16Insts())
4950 return true;
4951 const MCRegisterInfo *MRI = getMRI();
4952 const unsigned Opc = Inst.getOpcode();
4953 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4954 if (OpSelIdx == -1)
4955 return true;
4956 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
4957 // If the value is 0 we could have a default OpSel Operand, so conservatively
4958 // allow it.
4959 if (OpSelOpValue == 0)
4960 return true;
4961 unsigned OpCount = 0;
4962 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4963 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4964 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
4965 if (OpIdx == -1)
4966 continue;
4967 const MCOperand &Op = Inst.getOperand(OpIdx);
4968 if (Op.isReg() &&
4969 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
4970 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
4971 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
4972 if (OpSelOpIsHi != VGPRSuffixIsHi)
4973 return false;
4974 }
4975 ++OpCount;
4976 }
4977
4978 return true;
4979}
4980
4981bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
4982 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4983
4984 const unsigned Opc = Inst.getOpcode();
4985 uint64_t TSFlags = MII.get(Opc).TSFlags;
4986
4987 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4988 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4989 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4990 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4991 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4992 !(TSFlags & SIInstrFlags::IsSWMMAC))
4993 return true;
4994
4995 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4996 if (NegIdx == -1)
4997 return true;
4998
4999 unsigned Neg = Inst.getOperand(NegIdx).getImm();
5000
5001 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
5002 // on some src operands but not allowed on other.
5003 // It is convenient that such instructions don't have src_modifiers operand
5004 // for src operands that don't allow neg because they also don't allow opsel.
5005
5006 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5007 AMDGPU::OpName::src1_modifiers,
5008 AMDGPU::OpName::src2_modifiers};
5009
5010 for (unsigned i = 0; i < 3; ++i) {
5011 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
5012 if (Neg & (1 << i))
5013 return false;
5014 }
5015 }
5016
5017 return true;
5018}
5019
5020bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
5021 const OperandVector &Operands) {
5022 const unsigned Opc = Inst.getOpcode();
5023 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
5024 if (DppCtrlIdx >= 0) {
5025 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
5026
5027 if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
5028 AMDGPU::isDPALU_DPP(MII.get(Opc), MII, getSTI())) {
5029 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5030 // only on GFX12.
5031 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5032 Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
5033 : "DP ALU dpp only supports row_newbcast");
5034 return false;
5035 }
5036 }
5037
5038 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
5039 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5040
5041 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
5042 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5043 if (Src1Idx >= 0) {
5044 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5045 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5046 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
5047 Error(getOperandLoc(Operands, Src1Idx),
5048 "invalid operand for instruction");
5049 return false;
5050 }
5051 if (Src1.isImm()) {
5052 Error(getInstLoc(Operands),
5053 "src1 immediate operand invalid for instruction");
5054 return false;
5055 }
5056 }
5057 }
5058
5059 return true;
5060}
5061
5062// Check if VCC register matches wavefront size
5063bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5064 return (Reg == AMDGPU::VCC && isWave64()) ||
5065 (Reg == AMDGPU::VCC_LO && isWave32());
5066}
5067
5068// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5069bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5070 const OperandVector &Operands) {
5071 unsigned Opcode = Inst.getOpcode();
5072 const MCInstrDesc &Desc = MII.get(Opcode);
5073 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5074 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5075 !HasMandatoryLiteral && !isVOPD(Opcode))
5076 return true;
5077
5078 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
5079
5080 std::optional<unsigned> LiteralOpIdx;
5081 std::optional<uint64_t> LiteralValue;
5082
5083 for (int OpIdx : OpIndices) {
5084 if (OpIdx == -1)
5085 continue;
5086
5087 const MCOperand &MO = Inst.getOperand(OpIdx);
5088 if (!MO.isImm() && !MO.isExpr())
5089 continue;
5090 if (!isSISrcOperand(Desc, OpIdx))
5091 continue;
5092
5093 std::optional<int64_t> Imm;
5094 if (MO.isImm())
5095 Imm = MO.getImm();
5096 else if (MO.isExpr() && isLitExpr(MO.getExpr()))
5097 Imm = getLitValue(MO.getExpr());
5098
5099 bool IsAnotherLiteral = false;
5100 if (!Imm.has_value()) {
5101 // Literal value not known, so we conservately assume it's different.
5102 IsAnotherLiteral = true;
5103 } else if (!isInlineConstant(Inst, OpIdx)) {
5104 uint64_t Value = *Imm;
5105 bool IsForcedFP64 =
5106 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5108 HasMandatoryLiteral);
5109 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
5110 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5111 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
5112
5113 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5114 !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) {
5115 Error(getOperandLoc(Operands, OpIdx),
5116 "invalid operand for instruction");
5117 return false;
5118 }
5119
5120 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5121 Value = Hi_32(Value);
5122
5123 IsAnotherLiteral = !LiteralValue || *LiteralValue != Value;
5125 }
5126
5127 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5128 !getFeatureBits()[FeatureVOP3Literal]) {
5129 Error(getOperandLoc(Operands, OpIdx),
5130 "literal operands are not supported");
5131 return false;
5132 }
5133
5134 if (LiteralOpIdx && IsAnotherLiteral) {
5135 Error(getLaterLoc(getOperandLoc(Operands, OpIdx),
5136 getOperandLoc(Operands, *LiteralOpIdx)),
5137 "only one unique literal operand is allowed");
5138 return false;
5139 }
5140
5141 if (IsAnotherLiteral)
5142 LiteralOpIdx = OpIdx;
5143 }
5144
5145 return true;
5146}
5147
5148// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5149static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5150 const MCRegisterInfo *MRI) {
5151 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5152 if (OpIdx < 0)
5153 return -1;
5154
5155 const MCOperand &Op = Inst.getOperand(OpIdx);
5156 if (!Op.isReg())
5157 return -1;
5158
5159 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5160 auto Reg = Sub ? Sub : Op.getReg();
5161 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5162 return AGPR32.contains(Reg) ? 1 : 0;
5163}
5164
5165bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5166 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5167 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5169 SIInstrFlags::DS)) == 0)
5170 return true;
5171
5172 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5173 ? AMDGPU::OpName::data0
5174 : AMDGPU::OpName::vdata;
5175
5176 const MCRegisterInfo *MRI = getMRI();
5177 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5178 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5179
5180 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5181 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5182 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5183 return false;
5184 }
5185
5186 auto FB = getFeatureBits();
5187 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5188 if (DataAreg < 0 || DstAreg < 0)
5189 return true;
5190 return DstAreg == DataAreg;
5191 }
5192
5193 return DstAreg < 1 && DataAreg < 1;
5194}
5195
5196bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5197 auto FB = getFeatureBits();
5198 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5199 return true;
5200
5201 unsigned Opc = Inst.getOpcode();
5202 const MCRegisterInfo *MRI = getMRI();
5203 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5204 // unaligned VGPR. All others only allow even aligned VGPRs.
5205 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5206 return true;
5207
5208 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5209 switch (Opc) {
5210 default:
5211 break;
5212 case AMDGPU::DS_LOAD_TR6_B96:
5213 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5214 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5215 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5216 return true;
5217 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5218 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5219 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5220 // allows unaligned VGPR for vdst, but other operands still only allow
5221 // even aligned VGPRs.
5222 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
5223 if (VAddrIdx != -1) {
5224 const MCOperand &Op = Inst.getOperand(VAddrIdx);
5225 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5226 if ((Sub - AMDGPU::VGPR0) & 1)
5227 return false;
5228 }
5229 return true;
5230 }
5231 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5232 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5233 return true;
5234 }
5235 }
5236
5237 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5238 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5239 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5240 const MCOperand &Op = Inst.getOperand(I);
5241 if (!Op.isReg())
5242 continue;
5243
5244 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5245 if (!Sub)
5246 continue;
5247
5248 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5249 return false;
5250 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5251 return false;
5252 }
5253
5254 return true;
5255}
5256
5257SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5258 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5259 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5260 if (Op.isBLGP())
5261 return Op.getStartLoc();
5262 }
5263 return SMLoc();
5264}
5265
5266bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5267 const OperandVector &Operands) {
5268 unsigned Opc = Inst.getOpcode();
5269 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5270 if (BlgpIdx == -1)
5271 return true;
5272 SMLoc BLGPLoc = getBLGPLoc(Operands);
5273 if (!BLGPLoc.isValid())
5274 return true;
5275 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5276 auto FB = getFeatureBits();
5277 bool UsesNeg = false;
5278 if (FB[AMDGPU::FeatureGFX940Insts]) {
5279 switch (Opc) {
5280 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5281 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5282 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5283 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5284 UsesNeg = true;
5285 }
5286 }
5287
5288 if (IsNeg == UsesNeg)
5289 return true;
5290
5291 Error(BLGPLoc,
5292 UsesNeg ? "invalid modifier: blgp is not supported"
5293 : "invalid modifier: neg is not supported");
5294
5295 return false;
5296}
5297
5298bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5299 const OperandVector &Operands) {
5300 if (!isGFX11Plus())
5301 return true;
5302
5303 unsigned Opc = Inst.getOpcode();
5304 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5305 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5306 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5307 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5308 return true;
5309
5310 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5311 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5312 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5313 if (Reg == AMDGPU::SGPR_NULL)
5314 return true;
5315
5316 Error(getOperandLoc(Operands, Src0Idx), "src0 must be null");
5317 return false;
5318}
5319
5320bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5321 const OperandVector &Operands) {
5322 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5323 if ((TSFlags & SIInstrFlags::DS) == 0)
5324 return true;
5325 if (TSFlags & SIInstrFlags::GWS)
5326 return validateGWS(Inst, Operands);
5327 // Only validate GDS for non-GWS instructions.
5328 if (hasGDS())
5329 return true;
5330 int GDSIdx =
5331 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5332 if (GDSIdx < 0)
5333 return true;
5334 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5335 if (GDS) {
5336 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5337 Error(S, "gds modifier is not supported on this GPU");
5338 return false;
5339 }
5340 return true;
5341}
5342
5343// gfx90a has an undocumented limitation:
5344// DS_GWS opcodes must use even aligned registers.
5345bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5346 const OperandVector &Operands) {
5347 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5348 return true;
5349
5350 int Opc = Inst.getOpcode();
5351 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5352 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5353 return true;
5354
5355 const MCRegisterInfo *MRI = getMRI();
5356 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5357 int Data0Pos =
5358 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5359 assert(Data0Pos != -1);
5360 auto Reg = Inst.getOperand(Data0Pos).getReg();
5361 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5362 if (RegIdx & 1) {
5363 Error(getOperandLoc(Operands, Data0Pos), "vgpr must be even aligned");
5364 return false;
5365 }
5366
5367 return true;
5368}
5369
5370bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5371 const OperandVector &Operands,
5372 SMLoc IDLoc) {
5373 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5374 AMDGPU::OpName::cpol);
5375 if (CPolPos == -1)
5376 return true;
5377
5378 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5379
5380 if (!isGFX1250()) {
5381 if (CPol & CPol::SCAL) {
5382 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5383 StringRef CStr(S.getPointer());
5384 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5385 Error(S, "scale_offset is not supported on this GPU");
5386 }
5387 if (CPol & CPol::NV) {
5388 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5389 StringRef CStr(S.getPointer());
5390 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5391 Error(S, "nv is not supported on this GPU");
5392 }
5393 }
5394
5395 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
5396 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5397 StringRef CStr(S.getPointer());
5398 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5399 Error(S, "scale_offset is not supported for this instruction");
5400 }
5401
5402 if (isGFX12Plus())
5403 return validateTHAndScopeBits(Inst, Operands, CPol);
5404
5405 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5406 if (TSFlags & SIInstrFlags::SMRD) {
5407 if (CPol && (isSI() || isCI())) {
5408 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5409 Error(S, "cache policy is not supported for SMRD instructions");
5410 return false;
5411 }
5412 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5413 Error(IDLoc, "invalid cache policy for SMEM instruction");
5414 return false;
5415 }
5416 }
5417
5418 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5419 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5422 if (!(TSFlags & AllowSCCModifier)) {
5423 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5424 StringRef CStr(S.getPointer());
5425 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5426 Error(S,
5427 "scc modifier is not supported for this instruction on this GPU");
5428 return false;
5429 }
5430 }
5431
5433 return true;
5434
5435 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5436 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5437 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5438 : "instruction must use glc");
5439 return false;
5440 }
5441 } else {
5442 if (CPol & CPol::GLC) {
5443 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5444 StringRef CStr(S.getPointer());
5446 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5447 Error(S, isGFX940() ? "instruction must not use sc0"
5448 : "instruction must not use glc");
5449 return false;
5450 }
5451 }
5452
5453 return true;
5454}
5455
5456bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5457 const OperandVector &Operands,
5458 const unsigned CPol) {
5459 const unsigned TH = CPol & AMDGPU::CPol::TH;
5460 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5461
5462 const unsigned Opcode = Inst.getOpcode();
5463 const MCInstrDesc &TID = MII.get(Opcode);
5464
5465 auto PrintError = [&](StringRef Msg) {
5466 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5467 Error(S, Msg);
5468 return false;
5469 };
5470
5471 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5474 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5475
5476 if (TH == 0)
5477 return true;
5478
5479 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5480 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5481 (TH == AMDGPU::CPol::TH_NT_HT)))
5482 return PrintError("invalid th value for SMEM instruction");
5483
5484 if (TH == AMDGPU::CPol::TH_BYPASS) {
5485 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5487 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5489 return PrintError("scope and th combination is not valid");
5490 }
5491
5492 unsigned THType = AMDGPU::getTemporalHintType(TID);
5493 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5494 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5495 return PrintError("invalid th value for atomic instructions");
5496 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5497 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5498 return PrintError("invalid th value for store instructions");
5499 } else {
5500 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5501 return PrintError("invalid th value for load instructions");
5502 }
5503
5504 return true;
5505}
5506
5507bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5508 const OperandVector &Operands) {
5509 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5510 if (Desc.mayStore() &&
5512 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5513 if (Loc != getInstLoc(Operands)) {
5514 Error(Loc, "TFE modifier has no meaning for store instructions");
5515 return false;
5516 }
5517 }
5518
5519 return true;
5520}
5521
5522bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5523 const OperandVector &Operands) {
5524 unsigned Opc = Inst.getOpcode();
5525 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5526 const MCInstrDesc &Desc = MII.get(Opc);
5527
5528 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) -> bool {
5529 int FmtIdx = AMDGPU::getNamedOperandIdx(Opc, FmtOp);
5530 if (FmtIdx == -1)
5531 return true;
5532 unsigned Fmt = Inst.getOperand(FmtIdx).getImm();
5533 int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
5534 unsigned RegSize =
5535 TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[SrcIdx], HwMode))
5536 .getSizeInBits();
5537
5539 return true;
5540
5541 static const char *FmtNames[] = {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
5542 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
5543 "MATRIX_FMT_FP4"};
5544
5545 Error(getOperandLoc(Operands, SrcIdx),
5546 "wrong register tuple size for " + Twine(FmtNames[Fmt]));
5547 return false;
5548 };
5549
5550 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5551 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5552}
5553
5554bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc,
5555 const OperandVector &Operands) {
5556 if (!validateLdsDirect(Inst, Operands))
5557 return false;
5558 if (!validateTrue16OpSel(Inst)) {
5559 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5560 "op_sel operand conflicts with 16-bit operand suffix");
5561 return false;
5562 }
5563 if (!validateSOPLiteral(Inst, Operands))
5564 return false;
5565 if (!validateVOPLiteral(Inst, Operands)) {
5566 return false;
5567 }
5568 if (!validateConstantBusLimitations(Inst, Operands)) {
5569 return false;
5570 }
5571 if (!validateVOPD(Inst, Operands)) {
5572 return false;
5573 }
5574 if (!validateIntClampSupported(Inst)) {
5575 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5576 "integer clamping is not supported on this GPU");
5577 return false;
5578 }
5579 if (!validateOpSel(Inst)) {
5580 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5581 "invalid op_sel operand");
5582 return false;
5583 }
5584 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5585 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5586 "invalid neg_lo operand");
5587 return false;
5588 }
5589 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5590 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5591 "invalid neg_hi operand");
5592 return false;
5593 }
5594 if (!validateDPP(Inst, Operands)) {
5595 return false;
5596 }
5597 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5598 if (!validateMIMGD16(Inst)) {
5599 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5600 "d16 modifier is not supported on this GPU");
5601 return false;
5602 }
5603 if (!validateMIMGDim(Inst, Operands)) {
5604 Error(IDLoc, "missing dim operand");
5605 return false;
5606 }
5607 if (!validateTensorR128(Inst)) {
5608 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5609 "instruction must set modifier r128=0");
5610 return false;
5611 }
5612 if (!validateMIMGMSAA(Inst)) {
5613 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5614 "invalid dim; must be MSAA type");
5615 return false;
5616 }
5617 if (!validateMIMGDataSize(Inst, IDLoc)) {
5618 return false;
5619 }
5620 if (!validateMIMGAddrSize(Inst, IDLoc))
5621 return false;
5622 if (!validateMIMGAtomicDMask(Inst)) {
5623 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5624 "invalid atomic image dmask");
5625 return false;
5626 }
5627 if (!validateMIMGGatherDMask(Inst)) {
5628 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5629 "invalid image_gather dmask: only one bit must be set");
5630 return false;
5631 }
5632 if (!validateMovrels(Inst, Operands)) {
5633 return false;
5634 }
5635 if (!validateOffset(Inst, Operands)) {
5636 return false;
5637 }
5638 if (!validateMAIAccWrite(Inst, Operands)) {
5639 return false;
5640 }
5641 if (!validateMAISrc2(Inst, Operands)) {
5642 return false;
5643 }
5644 if (!validateMFMA(Inst, Operands)) {
5645 return false;
5646 }
5647 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5648 return false;
5649 }
5650
5651 if (!validateAGPRLdSt(Inst)) {
5652 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5653 ? "invalid register class: data and dst should be all VGPR or AGPR"
5654 : "invalid register class: agpr loads and stores not supported on this GPU"
5655 );
5656 return false;
5657 }
5658 if (!validateVGPRAlign(Inst)) {
5659 Error(IDLoc,
5660 "invalid register class: vgpr tuples must be 64 bit aligned");
5661 return false;
5662 }
5663 if (!validateDS(Inst, Operands)) {
5664 return false;
5665 }
5666
5667 if (!validateBLGP(Inst, Operands)) {
5668 return false;
5669 }
5670
5671 if (!validateDivScale(Inst)) {
5672 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5673 return false;
5674 }
5675 if (!validateWaitCnt(Inst, Operands)) {
5676 return false;
5677 }
5678 if (!validateTFE(Inst, Operands)) {
5679 return false;
5680 }
5681 if (!validateWMMA(Inst, Operands)) {
5682 return false;
5683 }
5684
5685 return true;
5686}
5687
5689 const FeatureBitset &FBS,
5690 unsigned VariantID = 0);
5691
5692static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5693 const FeatureBitset &AvailableFeatures,
5694 unsigned VariantID);
5695
5696bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5697 const FeatureBitset &FBS) {
5698 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5699}
5700
5701bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5702 const FeatureBitset &FBS,
5703 ArrayRef<unsigned> Variants) {
5704 for (auto Variant : Variants) {
5705 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5706 return true;
5707 }
5708
5709 return false;
5710}
5711
5712bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5713 SMLoc IDLoc) {
5714 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5715
5716 // Check if requested instruction variant is supported.
5717 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5718 return false;
5719
5720 // This instruction is not supported.
5721 // Clear any other pending errors because they are no longer relevant.
5722 getParser().clearPendingErrors();
5723
5724 // Requested instruction variant is not supported.
5725 // Check if any other variants are supported.
5726 StringRef VariantName = getMatchedVariantName();
5727 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5728 return Error(IDLoc,
5729 Twine(VariantName,
5730 " variant of this instruction is not supported"));
5731 }
5732
5733 // Check if this instruction may be used with a different wavesize.
5734 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5735 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5736 // FIXME: Use getAvailableFeatures, and do not manually recompute
5737 FeatureBitset FeaturesWS32 = getFeatureBits();
5738 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5739 .flip(AMDGPU::FeatureWavefrontSize32);
5740 FeatureBitset AvailableFeaturesWS32 =
5741 ComputeAvailableFeatures(FeaturesWS32);
5742
5743 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5744 return Error(IDLoc, "instruction requires wavesize=32");
5745 }
5746
5747 // Finally check if this instruction is supported on any other GPU.
5748 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5749 return Error(IDLoc, "instruction not supported on this GPU");
5750 }
5751
5752 // Instruction not supported on any GPU. Probably a typo.
5753 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5754 return Error(IDLoc, "invalid instruction" + Suggestion);
5755}
5756
5757static bool isInvalidVOPDY(const OperandVector &Operands,
5758 uint64_t InvalidOprIdx) {
5759 assert(InvalidOprIdx < Operands.size());
5760 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5761 if (Op.isToken() && InvalidOprIdx > 1) {
5762 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5763 return PrevOp.isToken() && PrevOp.getToken() == "::";
5764 }
5765 return false;
5766}
5767
5768bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5769 OperandVector &Operands,
5770 MCStreamer &Out,
5771 uint64_t &ErrorInfo,
5772 bool MatchingInlineAsm) {
5773 MCInst Inst;
5774 Inst.setLoc(IDLoc);
5775 unsigned Result = Match_Success;
5776 for (auto Variant : getMatchedVariants()) {
5777 uint64_t EI;
5778 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5779 Variant);
5780 // We order match statuses from least to most specific. We use most specific
5781 // status as resulting
5782 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5783 if (R == Match_Success || R == Match_MissingFeature ||
5784 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5785 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5786 Result != Match_MissingFeature)) {
5787 Result = R;
5788 ErrorInfo = EI;
5789 }
5790 if (R == Match_Success)
5791 break;
5792 }
5793
5794 if (Result == Match_Success) {
5795 if (!validateInstruction(Inst, IDLoc, Operands)) {
5796 return true;
5797 }
5798 Out.emitInstruction(Inst, getSTI());
5799 return false;
5800 }
5801
5802 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5803 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5804 return true;
5805 }
5806
5807 switch (Result) {
5808 default: break;
5809 case Match_MissingFeature:
5810 // It has been verified that the specified instruction
5811 // mnemonic is valid. A match was found but it requires
5812 // features which are not supported on this GPU.
5813 return Error(IDLoc, "operands are not valid for this GPU or mode");
5814
5815 case Match_InvalidOperand: {
5816 SMLoc ErrorLoc = IDLoc;
5817 if (ErrorInfo != ~0ULL) {
5818 if (ErrorInfo >= Operands.size()) {
5819 return Error(IDLoc, "too few operands for instruction");
5820 }
5821 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5822 if (ErrorLoc == SMLoc())
5823 ErrorLoc = IDLoc;
5824
5825 if (isInvalidVOPDY(Operands, ErrorInfo))
5826 return Error(ErrorLoc, "invalid VOPDY instruction");
5827 }
5828 return Error(ErrorLoc, "invalid operand for instruction");
5829 }
5830
5831 case Match_MnemonicFail:
5832 llvm_unreachable("Invalid instructions should have been handled already");
5833 }
5834 llvm_unreachable("Implement any new match types added!");
5835}
5836
5837bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5838 int64_t Tmp = -1;
5839 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5840 return true;
5841 }
5842 if (getParser().parseAbsoluteExpression(Tmp)) {
5843 return true;
5844 }
5845 Ret = static_cast<uint32_t>(Tmp);
5846 return false;
5847}
5848
5849bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5850 if (!getSTI().getTargetTriple().isAMDGCN())
5851 return TokError("directive only supported for amdgcn architecture");
5852
5853 std::string TargetIDDirective;
5854 SMLoc TargetStart = getTok().getLoc();
5855 if (getParser().parseEscapedString(TargetIDDirective))
5856 return true;
5857
5858 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5859 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5860 return getParser().Error(TargetRange.Start,
5861 (Twine(".amdgcn_target directive's target id ") +
5862 Twine(TargetIDDirective) +
5863 Twine(" does not match the specified target id ") +
5864 Twine(getTargetStreamer().getTargetID()->toString())).str());
5865
5866 return false;
5867}
5868
5869bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5870 return Error(Range.Start, "value out of range", Range);
5871}
5872
5873bool AMDGPUAsmParser::calculateGPRBlocks(
5874 const FeatureBitset &Features, const MCExpr *VCCUsed,
5875 const MCExpr *FlatScrUsed, bool XNACKUsed,
5876 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5877 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5878 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5879 // TODO(scott.linder): These calculations are duplicated from
5880 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5881 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5882 MCContext &Ctx = getContext();
5883
5884 const MCExpr *NumSGPRs = NextFreeSGPR;
5885 int64_t EvaluatedSGPRs;
5886
5887 if (Version.Major >= 10)
5889 else {
5890 unsigned MaxAddressableNumSGPRs =
5892
5893 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5894 !Features.test(FeatureSGPRInitBug) &&
5895 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5896 return OutOfRangeError(SGPRRange);
5897
5898 const MCExpr *ExtraSGPRs =
5899 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5900 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5901
5902 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5903 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5904 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5905 return OutOfRangeError(SGPRRange);
5906
5907 if (Features.test(FeatureSGPRInitBug))
5908 NumSGPRs =
5910 }
5911
5912 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5913 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5914 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5915 unsigned Granule) -> const MCExpr * {
5916 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5917 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5918 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5919 const MCExpr *AlignToGPR =
5920 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5921 const MCExpr *DivGPR =
5922 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5923 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5924 return SubGPR;
5925 };
5926
5927 VGPRBlocks = GetNumGPRBlocks(
5928 NextFreeVGPR,
5929 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5930 SGPRBlocks =
5931 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5932
5933 return false;
5934}
5935
5936bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5937 if (!getSTI().getTargetTriple().isAMDGCN())
5938 return TokError("directive only supported for amdgcn architecture");
5939
5940 if (!isHsaAbi(getSTI()))
5941 return TokError("directive only supported for amdhsa OS");
5942
5943 StringRef KernelName;
5944 if (getParser().parseIdentifier(KernelName))
5945 return true;
5946
5947 AMDGPU::MCKernelDescriptor KD =
5949 &getSTI(), getContext());
5950
5951 StringSet<> Seen;
5952
5953 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5954
5955 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5956 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5957
5958 SMRange VGPRRange;
5959 const MCExpr *NextFreeVGPR = ZeroExpr;
5960 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5961 const MCExpr *NamedBarCnt = ZeroExpr;
5962 uint64_t SharedVGPRCount = 0;
5963 uint64_t PreloadLength = 0;
5964 uint64_t PreloadOffset = 0;
5965 SMRange SGPRRange;
5966 const MCExpr *NextFreeSGPR = ZeroExpr;
5967
5968 // Count the number of user SGPRs implied from the enabled feature bits.
5969 unsigned ImpliedUserSGPRCount = 0;
5970
5971 // Track if the asm explicitly contains the directive for the user SGPR
5972 // count.
5973 std::optional<unsigned> ExplicitUserSGPRCount;
5974 const MCExpr *ReserveVCC = OneExpr;
5975 const MCExpr *ReserveFlatScr = OneExpr;
5976 std::optional<bool> EnableWavefrontSize32;
5977
5978 while (true) {
5979 while (trySkipToken(AsmToken::EndOfStatement));
5980
5981 StringRef ID;
5982 SMRange IDRange = getTok().getLocRange();
5983 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5984 return true;
5985
5986 if (ID == ".end_amdhsa_kernel")
5987 break;
5988
5989 if (!Seen.insert(ID).second)
5990 return TokError(".amdhsa_ directives cannot be repeated");
5991
5992 SMLoc ValStart = getLoc();
5993 const MCExpr *ExprVal;
5994 if (getParser().parseExpression(ExprVal))
5995 return true;
5996 SMLoc ValEnd = getLoc();
5997 SMRange ValRange = SMRange(ValStart, ValEnd);
5998
5999 int64_t IVal = 0;
6000 uint64_t Val = IVal;
6001 bool EvaluatableExpr;
6002 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6003 if (IVal < 0)
6004 return OutOfRangeError(ValRange);
6005 Val = IVal;
6006 }
6007
6008#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6009 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6010 return OutOfRangeError(RANGE); \
6011 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6012 getContext());
6013
6014// Some fields use the parsed value immediately which requires the expression to
6015// be solvable.
6016#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6017 if (!(RESOLVED)) \
6018 return Error(IDRange.Start, "directive should have resolvable expression", \
6019 IDRange);
6020
6021 if (ID == ".amdhsa_group_segment_fixed_size") {
6023 CHAR_BIT>(Val))
6024 return OutOfRangeError(ValRange);
6025 KD.group_segment_fixed_size = ExprVal;
6026 } else if (ID == ".amdhsa_private_segment_fixed_size") {
6028 CHAR_BIT>(Val))
6029 return OutOfRangeError(ValRange);
6030 KD.private_segment_fixed_size = ExprVal;
6031 } else if (ID == ".amdhsa_kernarg_size") {
6032 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
6033 return OutOfRangeError(ValRange);
6034 KD.kernarg_size = ExprVal;
6035 } else if (ID == ".amdhsa_user_sgpr_count") {
6036 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6037 ExplicitUserSGPRCount = Val;
6038 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6039 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6041 return Error(IDRange.Start,
6042 "directive is not supported with architected flat scratch",
6043 IDRange);
6045 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6046 ExprVal, ValRange);
6047 if (Val)
6048 ImpliedUserSGPRCount += 4;
6049 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6050 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6051 if (!hasKernargPreload())
6052 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6053
6054 if (Val > getMaxNumUserSGPRs())
6055 return OutOfRangeError(ValRange);
6056 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6057 ValRange);
6058 if (Val) {
6059 ImpliedUserSGPRCount += Val;
6060 PreloadLength = Val;
6061 }
6062 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6063 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6064 if (!hasKernargPreload())
6065 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6066
6067 if (Val >= 1024)
6068 return OutOfRangeError(ValRange);
6069 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6070 ValRange);
6071 if (Val)
6072 PreloadOffset = Val;
6073 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6074 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6076 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6077 ValRange);
6078 if (Val)
6079 ImpliedUserSGPRCount += 2;
6080 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6081 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6083 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6084 ValRange);
6085 if (Val)
6086 ImpliedUserSGPRCount += 2;
6087 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6088 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6090 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6091 ExprVal, ValRange);
6092 if (Val)
6093 ImpliedUserSGPRCount += 2;
6094 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6095 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6097 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6098 ValRange);
6099 if (Val)
6100 ImpliedUserSGPRCount += 2;
6101 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6103 return Error(IDRange.Start,
6104 "directive is not supported with architected flat scratch",
6105 IDRange);
6106 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6108 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6109 ExprVal, ValRange);
6110 if (Val)
6111 ImpliedUserSGPRCount += 2;
6112 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6113 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6115 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6116 ExprVal, ValRange);
6117 if (Val)
6118 ImpliedUserSGPRCount += 1;
6119 } else if (ID == ".amdhsa_wavefront_size32") {
6120 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6121 if (IVersion.Major < 10)
6122 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6123 EnableWavefrontSize32 = Val;
6125 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6126 ValRange);
6127 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6129 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6130 ValRange);
6131 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6133 return Error(IDRange.Start,
6134 "directive is not supported with architected flat scratch",
6135 IDRange);
6137 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6138 ValRange);
6139 } else if (ID == ".amdhsa_enable_private_segment") {
6141 return Error(
6142 IDRange.Start,
6143 "directive is not supported without architected flat scratch",
6144 IDRange);
6146 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6147 ValRange);
6148 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6150 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6151 ValRange);
6152 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6154 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6155 ValRange);
6156 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6158 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6159 ValRange);
6160 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6162 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6163 ValRange);
6164 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6166 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6167 ValRange);
6168 } else if (ID == ".amdhsa_next_free_vgpr") {
6169 VGPRRange = ValRange;
6170 NextFreeVGPR = ExprVal;
6171 } else if (ID == ".amdhsa_next_free_sgpr") {
6172 SGPRRange = ValRange;
6173 NextFreeSGPR = ExprVal;
6174 } else if (ID == ".amdhsa_accum_offset") {
6175 if (!isGFX90A())
6176 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6177 AccumOffset = ExprVal;
6178 } else if (ID == ".amdhsa_named_barrier_count") {
6179 if (!isGFX1250())
6180 return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
6181 NamedBarCnt = ExprVal;
6182 } else if (ID == ".amdhsa_reserve_vcc") {
6183 if (EvaluatableExpr && !isUInt<1>(Val))
6184 return OutOfRangeError(ValRange);
6185 ReserveVCC = ExprVal;
6186 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6187 if (IVersion.Major < 7)
6188 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6190 return Error(IDRange.Start,
6191 "directive is not supported with architected flat scratch",
6192 IDRange);
6193 if (EvaluatableExpr && !isUInt<1>(Val))
6194 return OutOfRangeError(ValRange);
6195 ReserveFlatScr = ExprVal;
6196 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6197 if (IVersion.Major < 8)
6198 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6199 if (!isUInt<1>(Val))
6200 return OutOfRangeError(ValRange);
6201 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6202 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6203 IDRange);
6204 } else if (ID == ".amdhsa_float_round_mode_32") {
6206 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6207 ValRange);
6208 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6210 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6211 ValRange);
6212 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6214 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6215 ValRange);
6216 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6218 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6219 ValRange);
6220 } else if (ID == ".amdhsa_dx10_clamp") {
6221 if (IVersion.Major >= 12)
6222 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6224 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6225 ValRange);
6226 } else if (ID == ".amdhsa_ieee_mode") {
6227 if (IVersion.Major >= 12)
6228 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6230 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6231 ValRange);
6232 } else if (ID == ".amdhsa_fp16_overflow") {
6233 if (IVersion.Major < 9)
6234 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6236 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6237 ValRange);
6238 } else if (ID == ".amdhsa_tg_split") {
6239 if (!isGFX90A())
6240 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6241 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6242 ExprVal, ValRange);
6243 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6244 if (!supportsWGP(getSTI()))
6245 return Error(IDRange.Start,
6246 "directive unsupported on " + getSTI().getCPU(), IDRange);
6248 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6249 ValRange);
6250 } else if (ID == ".amdhsa_memory_ordered") {
6251 if (IVersion.Major < 10)
6252 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6254 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6255 ValRange);
6256 } else if (ID == ".amdhsa_forward_progress") {
6257 if (IVersion.Major < 10)
6258 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6260 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6261 ValRange);
6262 } else if (ID == ".amdhsa_shared_vgpr_count") {
6263 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6264 if (IVersion.Major < 10 || IVersion.Major >= 12)
6265 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6266 IDRange);
6267 SharedVGPRCount = Val;
6269 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6270 ValRange);
6271 } else if (ID == ".amdhsa_inst_pref_size") {
6272 if (IVersion.Major < 11)
6273 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6274 if (IVersion.Major == 11) {
6276 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6277 ValRange);
6278 } else {
6280 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6281 ValRange);
6282 }
6283 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6286 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6287 ExprVal, ValRange);
6288 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6290 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6291 ExprVal, ValRange);
6292 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6295 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6296 ExprVal, ValRange);
6297 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6299 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6300 ExprVal, ValRange);
6301 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6303 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6304 ExprVal, ValRange);
6305 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6307 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6308 ExprVal, ValRange);
6309 } else if (ID == ".amdhsa_exception_int_div_zero") {
6311 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6312 ExprVal, ValRange);
6313 } else if (ID == ".amdhsa_round_robin_scheduling") {
6314 if (IVersion.Major < 12)
6315 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6317 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6318 ValRange);
6319 } else {
6320 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6321 }
6322
6323#undef PARSE_BITS_ENTRY
6324 }
6325
6326 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6327 return TokError(".amdhsa_next_free_vgpr directive is required");
6328
6329 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6330 return TokError(".amdhsa_next_free_sgpr directive is required");
6331
6332 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6333
6334 // Consider the case where the total number of UserSGPRs with trailing
6335 // allocated preload SGPRs, is greater than the number of explicitly
6336 // referenced SGPRs.
6337 if (PreloadLength) {
6338 MCContext &Ctx = getContext();
6339 NextFreeSGPR = AMDGPUMCExpr::createMax(
6340 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6341 }
6342
6343 const MCExpr *VGPRBlocks;
6344 const MCExpr *SGPRBlocks;
6345 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6346 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6347 EnableWavefrontSize32, NextFreeVGPR,
6348 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6349 SGPRBlocks))
6350 return true;
6351
6352 int64_t EvaluatedVGPRBlocks;
6353 bool VGPRBlocksEvaluatable =
6354 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6355 if (VGPRBlocksEvaluatable &&
6357 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6358 return OutOfRangeError(VGPRRange);
6359 }
6361 KD.compute_pgm_rsrc1, VGPRBlocks,
6362 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6363 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6364
6365 int64_t EvaluatedSGPRBlocks;
6366 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6368 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6369 return OutOfRangeError(SGPRRange);
6371 KD.compute_pgm_rsrc1, SGPRBlocks,
6372 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6373 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6374
6375 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6376 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
6377 "enabled user SGPRs");
6378
6379 if (isGFX1250()) {
6381 return TokError("too many user SGPRs enabled");
6384 MCConstantExpr::create(UserSGPRCount, getContext()),
6385 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6386 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6387 } else {
6389 UserSGPRCount))
6390 return TokError("too many user SGPRs enabled");
6393 MCConstantExpr::create(UserSGPRCount, getContext()),
6394 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6395 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6396 }
6397
6398 int64_t IVal = 0;
6399 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6400 return TokError("Kernarg size should be resolvable");
6401 uint64_t kernarg_size = IVal;
6402 if (PreloadLength && kernarg_size &&
6403 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6404 return TokError("Kernarg preload length + offset is larger than the "
6405 "kernarg segment size");
6406
6407 if (isGFX90A()) {
6408 if (!Seen.contains(".amdhsa_accum_offset"))
6409 return TokError(".amdhsa_accum_offset directive is required");
6410 int64_t EvaluatedAccum;
6411 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6412 uint64_t UEvaluatedAccum = EvaluatedAccum;
6413 if (AccumEvaluatable &&
6414 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6415 return TokError("accum_offset should be in range [4..256] in "
6416 "increments of 4");
6417
6418 int64_t EvaluatedNumVGPR;
6419 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6420 AccumEvaluatable &&
6421 UEvaluatedAccum >
6422 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6423 return TokError("accum_offset exceeds total VGPR allocation");
6424 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6426 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6429 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6430 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6431 getContext());
6432 }
6433
6434 if (isGFX1250())
6436 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6437 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6438 getContext());
6439
6440 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6441 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6442 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6443 return TokError("shared_vgpr_count directive not valid on "
6444 "wavefront size 32");
6445 }
6446
6447 if (VGPRBlocksEvaluatable &&
6448 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6449 63)) {
6450 return TokError("shared_vgpr_count*2 + "
6451 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6452 "exceed 63\n");
6453 }
6454 }
6455
6456 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6457 NextFreeVGPR, NextFreeSGPR,
6458 ReserveVCC, ReserveFlatScr);
6459 return false;
6460}
6461
6462bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6463 uint32_t Version;
6464 if (ParseAsAbsoluteExpression(Version))
6465 return true;
6466
6467 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6468 return false;
6469}
6470
6471bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6472 AMDGPUMCKernelCodeT &C) {
6473 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6474 // assembly for backwards compatibility.
6475 if (ID == "max_scratch_backing_memory_byte_size") {
6476 Parser.eatToEndOfStatement();
6477 return false;
6478 }
6479
6480 SmallString<40> ErrStr;
6481 raw_svector_ostream Err(ErrStr);
6482 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6483 return TokError(Err.str());
6484 }
6485 Lex();
6486
6487 if (ID == "enable_wavefront_size32") {
6488 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6489 if (!isGFX10Plus())
6490 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6491 if (!isWave32())
6492 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6493 } else {
6494 if (!isWave64())
6495 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6496 }
6497 }
6498
6499 if (ID == "wavefront_size") {
6500 if (C.wavefront_size == 5) {
6501 if (!isGFX10Plus())
6502 return TokError("wavefront_size=5 is only allowed on GFX10+");
6503 if (!isWave32())
6504 return TokError("wavefront_size=5 requires +WavefrontSize32");
6505 } else if (C.wavefront_size == 6) {
6506 if (!isWave64())
6507 return TokError("wavefront_size=6 requires +WavefrontSize64");
6508 }
6509 }
6510
6511 return false;
6512}
6513
6514bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6515 AMDGPUMCKernelCodeT KernelCode;
6516 KernelCode.initDefault(&getSTI(), getContext());
6517
6518 while (true) {
6519 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6520 // will set the current token to EndOfStatement.
6521 while(trySkipToken(AsmToken::EndOfStatement));
6522
6523 StringRef ID;
6524 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6525 return true;
6526
6527 if (ID == ".end_amd_kernel_code_t")
6528 break;
6529
6530 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6531 return true;
6532 }
6533
6534 KernelCode.validate(&getSTI(), getContext());
6535 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6536
6537 return false;
6538}
6539
6540bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6541 StringRef KernelName;
6542 if (!parseId(KernelName, "expected symbol name"))
6543 return true;
6544
6545 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6547
6548 KernelScope.initialize(getContext());
6549 return false;
6550}
6551
6552bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6553 if (!getSTI().getTargetTriple().isAMDGCN()) {
6554 return Error(getLoc(),
6555 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6556 "architectures");
6557 }
6558
6559 auto TargetIDDirective = getLexer().getTok().getStringContents();
6560 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6561 return Error(getParser().getTok().getLoc(), "target id must match options");
6562
6563 getTargetStreamer().EmitISAVersion();
6564 Lex();
6565
6566 return false;
6567}
6568
6569bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6570 assert(isHsaAbi(getSTI()));
6571
6572 std::string HSAMetadataString;
6573 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6574 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6575 return true;
6576
6577 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6578 return Error(getLoc(), "invalid HSA metadata");
6579
6580 return false;
6581}
6582
6583/// Common code to parse out a block of text (typically YAML) between start and
6584/// end directives.
6585bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6586 const char *AssemblerDirectiveEnd,
6587 std::string &CollectString) {
6588
6589 raw_string_ostream CollectStream(CollectString);
6590
6591 getLexer().setSkipSpace(false);
6592
6593 bool FoundEnd = false;
6594 while (!isToken(AsmToken::Eof)) {
6595 while (isToken(AsmToken::Space)) {
6596 CollectStream << getTokenStr();
6597 Lex();
6598 }
6599
6600 if (trySkipId(AssemblerDirectiveEnd)) {
6601 FoundEnd = true;
6602 break;
6603 }
6604
6605 CollectStream << Parser.parseStringToEndOfStatement()
6606 << getContext().getAsmInfo()->getSeparatorString();
6607
6608 Parser.eatToEndOfStatement();
6609 }
6610
6611 getLexer().setSkipSpace(true);
6612
6613 if (isToken(AsmToken::Eof) && !FoundEnd) {
6614 return TokError(Twine("expected directive ") +
6615 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6616 }
6617
6618 return false;
6619}
6620
6621/// Parse the assembler directive for new MsgPack-format PAL metadata.
6622bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6623 std::string String;
6624 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6626 return true;
6627
6628 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6629 if (!PALMetadata->setFromString(String))
6630 return Error(getLoc(), "invalid PAL metadata");
6631 return false;
6632}
6633
6634/// Parse the assembler directive for old linear-format PAL metadata.
6635bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6636 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6637 return Error(getLoc(),
6638 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6639 "not available on non-amdpal OSes")).str());
6640 }
6641
6642 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6643 PALMetadata->setLegacy();
6644 for (;;) {
6645 uint32_t Key, Value;
6646 if (ParseAsAbsoluteExpression(Key)) {
6647 return TokError(Twine("invalid value in ") +
6649 }
6650 if (!trySkipToken(AsmToken::Comma)) {
6651 return TokError(Twine("expected an even number of values in ") +
6653 }
6654 if (ParseAsAbsoluteExpression(Value)) {
6655 return TokError(Twine("invalid value in ") +
6657 }
6658 PALMetadata->setRegister(Key, Value);
6659 if (!trySkipToken(AsmToken::Comma))
6660 break;
6661 }
6662 return false;
6663}
6664
6665/// ParseDirectiveAMDGPULDS
6666/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6667bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6668 if (getParser().checkForValidSection())
6669 return true;
6670
6671 StringRef Name;
6672 SMLoc NameLoc = getLoc();
6673 if (getParser().parseIdentifier(Name))
6674 return TokError("expected identifier in directive");
6675
6676 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6677 if (getParser().parseComma())
6678 return true;
6679
6680 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6681
6682 int64_t Size;
6683 SMLoc SizeLoc = getLoc();
6684 if (getParser().parseAbsoluteExpression(Size))
6685 return true;
6686 if (Size < 0)
6687 return Error(SizeLoc, "size must be non-negative");
6688 if (Size > LocalMemorySize)
6689 return Error(SizeLoc, "size is too large");
6690
6691 int64_t Alignment = 4;
6692 if (trySkipToken(AsmToken::Comma)) {
6693 SMLoc AlignLoc = getLoc();
6694 if (getParser().parseAbsoluteExpression(Alignment))
6695 return true;
6696 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6697 return Error(AlignLoc, "alignment must be a power of two");
6698
6699 // Alignment larger than the size of LDS is possible in theory, as long
6700 // as the linker manages to place to symbol at address 0, but we do want
6701 // to make sure the alignment fits nicely into a 32-bit integer.
6702 if (Alignment >= 1u << 31)
6703 return Error(AlignLoc, "alignment is too large");
6704 }
6705
6706 if (parseEOL())
6707 return true;
6708
6709 Symbol->redefineIfPossible();
6710 if (!Symbol->isUndefined())
6711 return Error(NameLoc, "invalid symbol redefinition");
6712
6713 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6714 return false;
6715}
6716
6717bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6718 StringRef IDVal = DirectiveID.getString();
6719
6720 if (isHsaAbi(getSTI())) {
6721 if (IDVal == ".amdhsa_kernel")
6722 return ParseDirectiveAMDHSAKernel();
6723
6724 if (IDVal == ".amdhsa_code_object_version")
6725 return ParseDirectiveAMDHSACodeObjectVersion();
6726
6727 // TODO: Restructure/combine with PAL metadata directive.
6729 return ParseDirectiveHSAMetadata();
6730 } else {
6731 if (IDVal == ".amd_kernel_code_t")
6732 return ParseDirectiveAMDKernelCodeT();
6733
6734 if (IDVal == ".amdgpu_hsa_kernel")
6735 return ParseDirectiveAMDGPUHsaKernel();
6736
6737 if (IDVal == ".amd_amdgpu_isa")
6738 return ParseDirectiveISAVersion();
6739
6741 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6742 Twine(" directive is "
6743 "not available on non-amdhsa OSes"))
6744 .str());
6745 }
6746 }
6747
6748 if (IDVal == ".amdgcn_target")
6749 return ParseDirectiveAMDGCNTarget();
6750
6751 if (IDVal == ".amdgpu_lds")
6752 return ParseDirectiveAMDGPULDS();
6753
6754 if (IDVal == PALMD::AssemblerDirectiveBegin)
6755 return ParseDirectivePALMetadataBegin();
6756
6757 if (IDVal == PALMD::AssemblerDirective)
6758 return ParseDirectivePALMetadata();
6759
6760 return true;
6761}
6762
6763bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6764 MCRegister Reg) {
6765 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6766 return isGFX9Plus();
6767
6768 // GFX10+ has 2 more SGPRs 104 and 105.
6769 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6770 return hasSGPR104_SGPR105();
6771
6772 switch (Reg.id()) {
6773 case SRC_SHARED_BASE_LO:
6774 case SRC_SHARED_BASE:
6775 case SRC_SHARED_LIMIT_LO:
6776 case SRC_SHARED_LIMIT:
6777 case SRC_PRIVATE_BASE_LO:
6778 case SRC_PRIVATE_BASE:
6779 case SRC_PRIVATE_LIMIT_LO:
6780 case SRC_PRIVATE_LIMIT:
6781 return isGFX9Plus();
6782 case SRC_FLAT_SCRATCH_BASE_LO:
6783 case SRC_FLAT_SCRATCH_BASE_HI:
6784 return hasGloballyAddressableScratch();
6785 case SRC_POPS_EXITING_WAVE_ID:
6786 return isGFX9Plus() && !isGFX11Plus();
6787 case TBA:
6788 case TBA_LO:
6789 case TBA_HI:
6790 case TMA:
6791 case TMA_LO:
6792 case TMA_HI:
6793 return !isGFX9Plus();
6794 case XNACK_MASK:
6795 case XNACK_MASK_LO:
6796 case XNACK_MASK_HI:
6797 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6798 case SGPR_NULL:
6799 return isGFX10Plus();
6800 case SRC_EXECZ:
6801 case SRC_VCCZ:
6802 return !isGFX11Plus();
6803 default:
6804 break;
6805 }
6806
6807 if (isCI())
6808 return true;
6809
6810 if (isSI() || isGFX10Plus()) {
6811 // No flat_scr on SI.
6812 // On GFX10Plus flat scratch is not a valid register operand and can only be
6813 // accessed with s_setreg/s_getreg.
6814 switch (Reg.id()) {
6815 case FLAT_SCR:
6816 case FLAT_SCR_LO:
6817 case FLAT_SCR_HI:
6818 return false;
6819 default:
6820 return true;
6821 }
6822 }
6823
6824 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6825 // SI/CI have.
6826 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
6827 return hasSGPR102_SGPR103();
6828
6829 return true;
6830}
6831
6832ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6833 StringRef Mnemonic,
6834 OperandMode Mode) {
6835 ParseStatus Res = parseVOPD(Operands);
6836 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6837 return Res;
6838
6839 // Try to parse with a custom parser
6840 Res = MatchOperandParserImpl(Operands, Mnemonic);
6841
6842 // If we successfully parsed the operand or if there as an error parsing,
6843 // we are done.
6844 //
6845 // If we are parsing after we reach EndOfStatement then this means we
6846 // are appending default values to the Operands list. This is only done
6847 // by custom parser, so we shouldn't continue on to the generic parsing.
6848 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6849 return Res;
6850
6851 SMLoc RBraceLoc;
6852 SMLoc LBraceLoc = getLoc();
6853 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6854 unsigned Prefix = Operands.size();
6855
6856 for (;;) {
6857 auto Loc = getLoc();
6858 Res = parseReg(Operands);
6859 if (Res.isNoMatch())
6860 Error(Loc, "expected a register");
6861 if (!Res.isSuccess())
6862 return ParseStatus::Failure;
6863
6864 RBraceLoc = getLoc();
6865 if (trySkipToken(AsmToken::RBrac))
6866 break;
6867
6868 if (!skipToken(AsmToken::Comma,
6869 "expected a comma or a closing square bracket"))
6870 return ParseStatus::Failure;
6871 }
6872
6873 if (Operands.size() - Prefix > 1) {
6874 Operands.insert(Operands.begin() + Prefix,
6875 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6876 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6877 }
6878
6879 return ParseStatus::Success;
6880 }
6881
6882 return parseRegOrImm(Operands);
6883}
6884
6885StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6886 // Clear any forced encodings from the previous instruction.
6887 setForcedEncodingSize(0);
6888 setForcedDPP(false);
6889 setForcedSDWA(false);
6890
6891 if (Name.consume_back("_e64_dpp")) {
6892 setForcedDPP(true);
6893 setForcedEncodingSize(64);
6894 return Name;
6895 }
6896 if (Name.consume_back("_e64")) {
6897 setForcedEncodingSize(64);
6898 return Name;
6899 }
6900 if (Name.consume_back("_e32")) {
6901 setForcedEncodingSize(32);
6902 return Name;
6903 }
6904 if (Name.consume_back("_dpp")) {
6905 setForcedDPP(true);
6906 return Name;
6907 }
6908 if (Name.consume_back("_sdwa")) {
6909 setForcedSDWA(true);
6910 return Name;
6911 }
6912 return Name;
6913}
6914
6915static void applyMnemonicAliases(StringRef &Mnemonic,
6916 const FeatureBitset &Features,
6917 unsigned VariantID);
6918
6919bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6920 StringRef Name, SMLoc NameLoc,
6921 OperandVector &Operands) {
6922 // Add the instruction mnemonic
6923 Name = parseMnemonicSuffix(Name);
6924
6925 // If the target architecture uses MnemonicAlias, call it here to parse
6926 // operands correctly.
6927 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6928
6929 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6930
6931 bool IsMIMG = Name.starts_with("image_");
6932
6933 while (!trySkipToken(AsmToken::EndOfStatement)) {
6934 OperandMode Mode = OperandMode_Default;
6935 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6936 Mode = OperandMode_NSA;
6937 ParseStatus Res = parseOperand(Operands, Name, Mode);
6938
6939 if (!Res.isSuccess()) {
6940 checkUnsupportedInstruction(Name, NameLoc);
6941 if (!Parser.hasPendingError()) {
6942 // FIXME: use real operand location rather than the current location.
6943 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6944 : "not a valid operand.";
6945 Error(getLoc(), Msg);
6946 }
6947 while (!trySkipToken(AsmToken::EndOfStatement)) {
6948 lex();
6949 }
6950 return true;
6951 }
6952
6953 // Eat the comma or space if there is one.
6954 trySkipToken(AsmToken::Comma);
6955 }
6956
6957 return false;
6958}
6959
6960//===----------------------------------------------------------------------===//
6961// Utility functions
6962//===----------------------------------------------------------------------===//
6963
6964ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6965 OperandVector &Operands) {
6966 SMLoc S = getLoc();
6967 if (!trySkipId(Name))
6968 return ParseStatus::NoMatch;
6969
6970 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6971 return ParseStatus::Success;
6972}
6973
6974ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6975 int64_t &IntVal) {
6976
6977 if (!trySkipId(Prefix, AsmToken::Colon))
6978 return ParseStatus::NoMatch;
6979
6981}
6982
6983ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6984 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6985 std::function<bool(int64_t &)> ConvertResult) {
6986 SMLoc S = getLoc();
6987 int64_t Value = 0;
6988
6989 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6990 if (!Res.isSuccess())
6991 return Res;
6992
6993 if (ConvertResult && !ConvertResult(Value)) {
6994 Error(S, "invalid " + StringRef(Prefix) + " value.");
6995 }
6996
6997 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6998 return ParseStatus::Success;
6999}
7000
7001ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7002 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7003 bool (*ConvertResult)(int64_t &)) {
7004 SMLoc S = getLoc();
7005 if (!trySkipId(Prefix, AsmToken::Colon))
7006 return ParseStatus::NoMatch;
7007
7008 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
7009 return ParseStatus::Failure;
7010
7011 unsigned Val = 0;
7012 const unsigned MaxSize = 4;
7013
7014 // FIXME: How to verify the number of elements matches the number of src
7015 // operands?
7016 for (int I = 0; ; ++I) {
7017 int64_t Op;
7018 SMLoc Loc = getLoc();
7019 if (!parseExpr(Op))
7020 return ParseStatus::Failure;
7021
7022 if (Op != 0 && Op != 1)
7023 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
7024
7025 Val |= (Op << I);
7026
7027 if (trySkipToken(AsmToken::RBrac))
7028 break;
7029
7030 if (I + 1 == MaxSize)
7031 return Error(getLoc(), "expected a closing square bracket");
7032
7033 if (!skipToken(AsmToken::Comma, "expected a comma"))
7034 return ParseStatus::Failure;
7035 }
7036
7037 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
7038 return ParseStatus::Success;
7039}
7040
7041ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7042 OperandVector &Operands,
7043 AMDGPUOperand::ImmTy ImmTy) {
7044 int64_t Bit;
7045 SMLoc S = getLoc();
7046
7047 if (trySkipId(Name)) {
7048 Bit = 1;
7049 } else if (trySkipId("no", Name)) {
7050 Bit = 0;
7051 } else {
7052 return ParseStatus::NoMatch;
7053 }
7054
7055 if (Name == "r128" && !hasMIMG_R128())
7056 return Error(S, "r128 modifier is not supported on this GPU");
7057 if (Name == "a16" && !hasA16())
7058 return Error(S, "a16 modifier is not supported on this GPU");
7059
7060 if (Bit == 0 && Name == "gds") {
7061 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7062 if (Mnemo.starts_with("ds_gws"))
7063 return Error(S, "nogds is not allowed");
7064 }
7065
7066 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7067 ImmTy = AMDGPUOperand::ImmTyR128A16;
7068
7069 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
7070 return ParseStatus::Success;
7071}
7072
7073unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7074 bool &Disabling) const {
7075 Disabling = Id.consume_front("no");
7076
7077 if (isGFX940() && !Mnemo.starts_with("s_")) {
7078 return StringSwitch<unsigned>(Id)
7079 .Case("nt", AMDGPU::CPol::NT)
7080 .Case("sc0", AMDGPU::CPol::SC0)
7081 .Case("sc1", AMDGPU::CPol::SC1)
7082 .Default(0);
7083 }
7084
7085 return StringSwitch<unsigned>(Id)
7086 .Case("dlc", AMDGPU::CPol::DLC)
7087 .Case("glc", AMDGPU::CPol::GLC)
7088 .Case("scc", AMDGPU::CPol::SCC)
7089 .Case("slc", AMDGPU::CPol::SLC)
7090 .Default(0);
7091}
7092
7093ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7094 if (isGFX12Plus()) {
7095 SMLoc StringLoc = getLoc();
7096
7097 int64_t CPolVal = 0;
7098 ParseStatus ResTH = ParseStatus::NoMatch;
7099 ParseStatus ResScope = ParseStatus::NoMatch;
7100 ParseStatus ResNV = ParseStatus::NoMatch;
7101 ParseStatus ResScal = ParseStatus::NoMatch;
7102
7103 for (;;) {
7104 if (ResTH.isNoMatch()) {
7105 int64_t TH;
7106 ResTH = parseTH(Operands, TH);
7107 if (ResTH.isFailure())
7108 return ResTH;
7109 if (ResTH.isSuccess()) {
7110 CPolVal |= TH;
7111 continue;
7112 }
7113 }
7114
7115 if (ResScope.isNoMatch()) {
7116 int64_t Scope;
7117 ResScope = parseScope(Operands, Scope);
7118 if (ResScope.isFailure())
7119 return ResScope;
7120 if (ResScope.isSuccess()) {
7121 CPolVal |= Scope;
7122 continue;
7123 }
7124 }
7125
7126 // NV bit exists on GFX12+, but does something starting from GFX1250.
7127 // Allow parsing on all GFX12 and fail on validation for better
7128 // diagnostics.
7129 if (ResNV.isNoMatch()) {
7130 if (trySkipId("nv")) {
7131 ResNV = ParseStatus::Success;
7132 CPolVal |= CPol::NV;
7133 continue;
7134 } else if (trySkipId("no", "nv")) {
7135 ResNV = ParseStatus::Success;
7136 continue;
7137 }
7138 }
7139
7140 if (ResScal.isNoMatch()) {
7141 if (trySkipId("scale_offset")) {
7142 ResScal = ParseStatus::Success;
7143 CPolVal |= CPol::SCAL;
7144 continue;
7145 } else if (trySkipId("no", "scale_offset")) {
7146 ResScal = ParseStatus::Success;
7147 continue;
7148 }
7149 }
7150
7151 break;
7152 }
7153
7154 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7155 ResScal.isNoMatch())
7156 return ParseStatus::NoMatch;
7157
7158 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
7159 AMDGPUOperand::ImmTyCPol));
7160 return ParseStatus::Success;
7161 }
7162
7163 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7164 SMLoc OpLoc = getLoc();
7165 unsigned Enabled = 0, Seen = 0;
7166 for (;;) {
7167 SMLoc S = getLoc();
7168 bool Disabling;
7169 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7170 if (!CPol)
7171 break;
7172
7173 lex();
7174
7175 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7176 return Error(S, "dlc modifier is not supported on this GPU");
7177
7178 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7179 return Error(S, "scc modifier is not supported on this GPU");
7180
7181 if (Seen & CPol)
7182 return Error(S, "duplicate cache policy modifier");
7183
7184 if (!Disabling)
7185 Enabled |= CPol;
7186
7187 Seen |= CPol;
7188 }
7189
7190 if (!Seen)
7191 return ParseStatus::NoMatch;
7192
7193 Operands.push_back(
7194 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7195 return ParseStatus::Success;
7196}
7197
7198ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7199 int64_t &Scope) {
7200 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7202
7203 ParseStatus Res = parseStringOrIntWithPrefix(
7204 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7205 Scope);
7206
7207 if (Res.isSuccess())
7208 Scope = Scopes[Scope];
7209
7210 return Res;
7211}
7212
7213ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7214 TH = AMDGPU::CPol::TH_RT; // default
7215
7216 StringRef Value;
7217 SMLoc StringLoc;
7218 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7219 if (!Res.isSuccess())
7220 return Res;
7221
7222 if (Value == "TH_DEFAULT")
7224 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7225 Value == "TH_LOAD_NT_WB") {
7226 return Error(StringLoc, "invalid th value");
7227 } else if (Value.consume_front("TH_ATOMIC_")) {
7229 } else if (Value.consume_front("TH_LOAD_")) {
7231 } else if (Value.consume_front("TH_STORE_")) {
7233 } else {
7234 return Error(StringLoc, "invalid th value");
7235 }
7236
7237 if (Value == "BYPASS")
7239
7240 if (TH != 0) {
7242 TH |= StringSwitch<int64_t>(Value)
7243 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7244 .Case("RT", AMDGPU::CPol::TH_RT)
7245 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7246 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7247 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7249 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7250 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7252 .Default(0xffffffff);
7253 else
7254 TH |= StringSwitch<int64_t>(Value)
7255 .Case("RT", AMDGPU::CPol::TH_RT)
7256 .Case("NT", AMDGPU::CPol::TH_NT)
7257 .Case("HT", AMDGPU::CPol::TH_HT)
7258 .Case("LU", AMDGPU::CPol::TH_LU)
7259 .Case("WB", AMDGPU::CPol::TH_WB)
7260 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7261 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7262 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7263 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7264 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7265 .Default(0xffffffff);
7266 }
7267
7268 if (TH == 0xffffffff)
7269 return Error(StringLoc, "invalid th value");
7270
7271 return ParseStatus::Success;
7272}
7273
7274static void
7276 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7277 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7278 std::optional<unsigned> InsertAt = std::nullopt) {
7279 auto i = OptionalIdx.find(ImmT);
7280 if (i != OptionalIdx.end()) {
7281 unsigned Idx = i->second;
7282 const AMDGPUOperand &Op =
7283 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7284 if (InsertAt)
7285 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7286 else
7287 Op.addImmOperands(Inst, 1);
7288 } else {
7289 if (InsertAt.has_value())
7290 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7291 else
7293 }
7294}
7295
7296ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7297 StringRef &Value,
7298 SMLoc &StringLoc) {
7299 if (!trySkipId(Prefix, AsmToken::Colon))
7300 return ParseStatus::NoMatch;
7301
7302 StringLoc = getLoc();
7303 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7305}
7306
7307ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7308 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7309 int64_t &IntVal) {
7310 if (!trySkipId(Name, AsmToken::Colon))
7311 return ParseStatus::NoMatch;
7312
7313 SMLoc StringLoc = getLoc();
7314
7315 StringRef Value;
7316 if (isToken(AsmToken::Identifier)) {
7317 Value = getTokenStr();
7318 lex();
7319
7320 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7321 if (Value == Ids[IntVal])
7322 break;
7323 } else if (!parseExpr(IntVal))
7324 return ParseStatus::Failure;
7325
7326 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7327 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7328
7329 return ParseStatus::Success;
7330}
7331
7332ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7333 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7334 AMDGPUOperand::ImmTy Type) {
7335 SMLoc S = getLoc();
7336 int64_t IntVal;
7337
7338 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7339 if (Res.isSuccess())
7340 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7341
7342 return Res;
7343}
7344
7345//===----------------------------------------------------------------------===//
7346// MTBUF format
7347//===----------------------------------------------------------------------===//
7348
7349bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7350 int64_t MaxVal,
7351 int64_t &Fmt) {
7352 int64_t Val;
7353 SMLoc Loc = getLoc();
7354
7355 auto Res = parseIntWithPrefix(Pref, Val);
7356 if (Res.isFailure())
7357 return false;
7358 if (Res.isNoMatch())
7359 return true;
7360
7361 if (Val < 0 || Val > MaxVal) {
7362 Error(Loc, Twine("out of range ", StringRef(Pref)));
7363 return false;
7364 }
7365
7366 Fmt = Val;
7367 return true;
7368}
7369
7370ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7371 AMDGPUOperand::ImmTy ImmTy) {
7372 const char *Pref = "index_key";
7373 int64_t ImmVal = 0;
7374 SMLoc Loc = getLoc();
7375 auto Res = parseIntWithPrefix(Pref, ImmVal);
7376 if (!Res.isSuccess())
7377 return Res;
7378
7379 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7380 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7381 (ImmVal < 0 || ImmVal > 1))
7382 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7383
7384 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7385 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7386
7387 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7388 return ParseStatus::Success;
7389}
7390
7391ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7392 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7393}
7394
7395ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7396 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7397}
7398
7399ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7400 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7401}
7402
7403ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7404 StringRef Name,
7405 AMDGPUOperand::ImmTy Type) {
7406 return parseStringOrIntWithPrefix(Operands, Name,
7407 {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
7408 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
7409 "MATRIX_FMT_FP4"},
7410 Type);
7411}
7412
7413ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7414 return tryParseMatrixFMT(Operands, "matrix_a_fmt",
7415 AMDGPUOperand::ImmTyMatrixAFMT);
7416}
7417
7418ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7419 return tryParseMatrixFMT(Operands, "matrix_b_fmt",
7420 AMDGPUOperand::ImmTyMatrixBFMT);
7421}
7422
7423ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7424 StringRef Name,
7425 AMDGPUOperand::ImmTy Type) {
7426 return parseStringOrIntWithPrefix(
7427 Operands, Name, {"MATRIX_SCALE_ROW0", "MATRIX_SCALE_ROW1"}, Type);
7428}
7429
7430ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7431 return tryParseMatrixScale(Operands, "matrix_a_scale",
7432 AMDGPUOperand::ImmTyMatrixAScale);
7433}
7434
7435ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7436 return tryParseMatrixScale(Operands, "matrix_b_scale",
7437 AMDGPUOperand::ImmTyMatrixBScale);
7438}
7439
7440ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7441 StringRef Name,
7442 AMDGPUOperand::ImmTy Type) {
7443 return parseStringOrIntWithPrefix(
7444 Operands, Name,
7445 {"MATRIX_SCALE_FMT_E8", "MATRIX_SCALE_FMT_E5M3", "MATRIX_SCALE_FMT_E4M3"},
7446 Type);
7447}
7448
7449ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7450 return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
7451 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7452}
7453
7454ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7455 return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
7456 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7457}
7458
7459// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7460// values to live in a joint format operand in the MCInst encoding.
7461ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7462 using namespace llvm::AMDGPU::MTBUFFormat;
7463
7464 int64_t Dfmt = DFMT_UNDEF;
7465 int64_t Nfmt = NFMT_UNDEF;
7466
7467 // dfmt and nfmt can appear in either order, and each is optional.
7468 for (int I = 0; I < 2; ++I) {
7469 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7470 return ParseStatus::Failure;
7471
7472 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7473 return ParseStatus::Failure;
7474
7475 // Skip optional comma between dfmt/nfmt
7476 // but guard against 2 commas following each other.
7477 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7478 !peekToken().is(AsmToken::Comma)) {
7479 trySkipToken(AsmToken::Comma);
7480 }
7481 }
7482
7483 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7484 return ParseStatus::NoMatch;
7485
7486 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7487 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7488
7489 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7490 return ParseStatus::Success;
7491}
7492
7493ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7494 using namespace llvm::AMDGPU::MTBUFFormat;
7495
7496 int64_t Fmt = UFMT_UNDEF;
7497
7498 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7499 return ParseStatus::Failure;
7500
7501 if (Fmt == UFMT_UNDEF)
7502 return ParseStatus::NoMatch;
7503
7504 Format = Fmt;
7505 return ParseStatus::Success;
7506}
7507
7508bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7509 int64_t &Nfmt,
7510 StringRef FormatStr,
7511 SMLoc Loc) {
7512 using namespace llvm::AMDGPU::MTBUFFormat;
7513 int64_t Format;
7514
7515 Format = getDfmt(FormatStr);
7516 if (Format != DFMT_UNDEF) {
7517 Dfmt = Format;
7518 return true;
7519 }
7520
7521 Format = getNfmt(FormatStr, getSTI());
7522 if (Format != NFMT_UNDEF) {
7523 Nfmt = Format;
7524 return true;
7525 }
7526
7527 Error(Loc, "unsupported format");
7528 return false;
7529}
7530
7531ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7532 SMLoc FormatLoc,
7533 int64_t &Format) {
7534 using namespace llvm::AMDGPU::MTBUFFormat;
7535
7536 int64_t Dfmt = DFMT_UNDEF;
7537 int64_t Nfmt = NFMT_UNDEF;
7538 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7539 return ParseStatus::Failure;
7540
7541 if (trySkipToken(AsmToken::Comma)) {
7542 StringRef Str;
7543 SMLoc Loc = getLoc();
7544 if (!parseId(Str, "expected a format string") ||
7545 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7546 return ParseStatus::Failure;
7547 if (Dfmt == DFMT_UNDEF)
7548 return Error(Loc, "duplicate numeric format");
7549 if (Nfmt == NFMT_UNDEF)
7550 return Error(Loc, "duplicate data format");
7551 }
7552
7553 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7554 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7555
7556 if (isGFX10Plus()) {
7557 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7558 if (Ufmt == UFMT_UNDEF)
7559 return Error(FormatLoc, "unsupported format");
7560 Format = Ufmt;
7561 } else {
7562 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7563 }
7564
7565 return ParseStatus::Success;
7566}
7567
7568ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7569 SMLoc Loc,
7570 int64_t &Format) {
7571 using namespace llvm::AMDGPU::MTBUFFormat;
7572
7573 auto Id = getUnifiedFormat(FormatStr, getSTI());
7574 if (Id == UFMT_UNDEF)
7575 return ParseStatus::NoMatch;
7576
7577 if (!isGFX10Plus())
7578 return Error(Loc, "unified format is not supported on this GPU");
7579
7580 Format = Id;
7581 return ParseStatus::Success;
7582}
7583
7584ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7585 using namespace llvm::AMDGPU::MTBUFFormat;
7586 SMLoc Loc = getLoc();
7587
7588 if (!parseExpr(Format))
7589 return ParseStatus::Failure;
7590 if (!isValidFormatEncoding(Format, getSTI()))
7591 return Error(Loc, "out of range format");
7592
7593 return ParseStatus::Success;
7594}
7595
7596ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7597 using namespace llvm::AMDGPU::MTBUFFormat;
7598
7599 if (!trySkipId("format", AsmToken::Colon))
7600 return ParseStatus::NoMatch;
7601
7602 if (trySkipToken(AsmToken::LBrac)) {
7603 StringRef FormatStr;
7604 SMLoc Loc = getLoc();
7605 if (!parseId(FormatStr, "expected a format string"))
7606 return ParseStatus::Failure;
7607
7608 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7609 if (Res.isNoMatch())
7610 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7611 if (!Res.isSuccess())
7612 return Res;
7613
7614 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7615 return ParseStatus::Failure;
7616
7617 return ParseStatus::Success;
7618 }
7619
7620 return parseNumericFormat(Format);
7621}
7622
7623ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7624 using namespace llvm::AMDGPU::MTBUFFormat;
7625
7626 int64_t Format = getDefaultFormatEncoding(getSTI());
7627 ParseStatus Res;
7628 SMLoc Loc = getLoc();
7629
7630 // Parse legacy format syntax.
7631 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7632 if (Res.isFailure())
7633 return Res;
7634
7635 bool FormatFound = Res.isSuccess();
7636
7637 Operands.push_back(
7638 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7639
7640 if (FormatFound)
7641 trySkipToken(AsmToken::Comma);
7642
7643 if (isToken(AsmToken::EndOfStatement)) {
7644 // We are expecting an soffset operand,
7645 // but let matcher handle the error.
7646 return ParseStatus::Success;
7647 }
7648
7649 // Parse soffset.
7650 Res = parseRegOrImm(Operands);
7651 if (!Res.isSuccess())
7652 return Res;
7653
7654 trySkipToken(AsmToken::Comma);
7655
7656 if (!FormatFound) {
7657 Res = parseSymbolicOrNumericFormat(Format);
7658 if (Res.isFailure())
7659 return Res;
7660 if (Res.isSuccess()) {
7661 auto Size = Operands.size();
7662 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7663 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7664 Op.setImm(Format);
7665 }
7666 return ParseStatus::Success;
7667 }
7668
7669 if (isId("format") && peekToken().is(AsmToken::Colon))
7670 return Error(getLoc(), "duplicate format");
7671 return ParseStatus::Success;
7672}
7673
7674ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7675 ParseStatus Res =
7676 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7677 if (Res.isNoMatch()) {
7678 Res = parseIntWithPrefix("inst_offset", Operands,
7679 AMDGPUOperand::ImmTyInstOffset);
7680 }
7681 return Res;
7682}
7683
7684ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7685 ParseStatus Res =
7686 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7687 if (Res.isNoMatch())
7688 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7689 return Res;
7690}
7691
7692ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7693 ParseStatus Res =
7694 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7695 if (Res.isNoMatch()) {
7696 Res =
7697 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7698 }
7699 return Res;
7700}
7701
7702//===----------------------------------------------------------------------===//
7703// Exp
7704//===----------------------------------------------------------------------===//
7705
7706void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7707 OptionalImmIndexMap OptionalIdx;
7708
7709 unsigned OperandIdx[4];
7710 unsigned EnMask = 0;
7711 int SrcIdx = 0;
7712
7713 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7714 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7715
7716 // Add the register arguments
7717 if (Op.isReg()) {
7718 assert(SrcIdx < 4);
7719 OperandIdx[SrcIdx] = Inst.size();
7720 Op.addRegOperands(Inst, 1);
7721 ++SrcIdx;
7722 continue;
7723 }
7724
7725 if (Op.isOff()) {
7726 assert(SrcIdx < 4);
7727 OperandIdx[SrcIdx] = Inst.size();
7728 Inst.addOperand(MCOperand::createReg(MCRegister()));
7729 ++SrcIdx;
7730 continue;
7731 }
7732
7733 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7734 Op.addImmOperands(Inst, 1);
7735 continue;
7736 }
7737
7738 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7739 continue;
7740
7741 // Handle optional arguments
7742 OptionalIdx[Op.getImmTy()] = i;
7743 }
7744
7745 assert(SrcIdx == 4);
7746
7747 bool Compr = false;
7748 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7749 Compr = true;
7750 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7751 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7752 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7753 }
7754
7755 for (auto i = 0; i < SrcIdx; ++i) {
7756 if (Inst.getOperand(OperandIdx[i]).getReg()) {
7757 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7758 }
7759 }
7760
7761 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7762 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7763
7764 Inst.addOperand(MCOperand::createImm(EnMask));
7765}
7766
7767//===----------------------------------------------------------------------===//
7768// s_waitcnt
7769//===----------------------------------------------------------------------===//
7770
7771static bool
7773 const AMDGPU::IsaVersion ISA,
7774 int64_t &IntVal,
7775 int64_t CntVal,
7776 bool Saturate,
7777 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7778 unsigned (*decode)(const IsaVersion &Version, unsigned))
7779{
7780 bool Failed = false;
7781
7782 IntVal = encode(ISA, IntVal, CntVal);
7783 if (CntVal != decode(ISA, IntVal)) {
7784 if (Saturate) {
7785 IntVal = encode(ISA, IntVal, -1);
7786 } else {
7787 Failed = true;
7788 }
7789 }
7790 return Failed;
7791}
7792
7793bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7794
7795 SMLoc CntLoc = getLoc();
7796 StringRef CntName = getTokenStr();
7797
7798 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7799 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7800 return false;
7801
7802 int64_t CntVal;
7803 SMLoc ValLoc = getLoc();
7804 if (!parseExpr(CntVal))
7805 return false;
7806
7807 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7808
7809 bool Failed = true;
7810 bool Sat = CntName.ends_with("_sat");
7811
7812 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7813 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7814 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7815 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7816 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7817 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7818 } else {
7819 Error(CntLoc, "invalid counter name " + CntName);
7820 return false;
7821 }
7822
7823 if (Failed) {
7824 Error(ValLoc, "too large value for " + CntName);
7825 return false;
7826 }
7827
7828 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7829 return false;
7830
7831 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7832 if (isToken(AsmToken::EndOfStatement)) {
7833 Error(getLoc(), "expected a counter name");
7834 return false;
7835 }
7836 }
7837
7838 return true;
7839}
7840
7841ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7842 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7843 int64_t Waitcnt = getWaitcntBitMask(ISA);
7844 SMLoc S = getLoc();
7845
7846 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7847 while (!isToken(AsmToken::EndOfStatement)) {
7848 if (!parseCnt(Waitcnt))
7849 return ParseStatus::Failure;
7850 }
7851 } else {
7852 if (!parseExpr(Waitcnt))
7853 return ParseStatus::Failure;
7854 }
7855
7856 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7857 return ParseStatus::Success;
7858}
7859
7860bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7861 SMLoc FieldLoc = getLoc();
7862 StringRef FieldName = getTokenStr();
7863 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7864 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7865 return false;
7866
7867 SMLoc ValueLoc = getLoc();
7868 StringRef ValueName = getTokenStr();
7869 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7870 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7871 return false;
7872
7873 unsigned Shift;
7874 if (FieldName == "instid0") {
7875 Shift = 0;
7876 } else if (FieldName == "instskip") {
7877 Shift = 4;
7878 } else if (FieldName == "instid1") {
7879 Shift = 7;
7880 } else {
7881 Error(FieldLoc, "invalid field name " + FieldName);
7882 return false;
7883 }
7884
7885 int Value;
7886 if (Shift == 4) {
7887 // Parse values for instskip.
7888 Value = StringSwitch<int>(ValueName)
7889 .Case("SAME", 0)
7890 .Case("NEXT", 1)
7891 .Case("SKIP_1", 2)
7892 .Case("SKIP_2", 3)
7893 .Case("SKIP_3", 4)
7894 .Case("SKIP_4", 5)
7895 .Default(-1);
7896 } else {
7897 // Parse values for instid0 and instid1.
7898 Value = StringSwitch<int>(ValueName)
7899 .Case("NO_DEP", 0)
7900 .Case("VALU_DEP_1", 1)
7901 .Case("VALU_DEP_2", 2)
7902 .Case("VALU_DEP_3", 3)
7903 .Case("VALU_DEP_4", 4)
7904 .Case("TRANS32_DEP_1", 5)
7905 .Case("TRANS32_DEP_2", 6)
7906 .Case("TRANS32_DEP_3", 7)
7907 .Case("FMA_ACCUM_CYCLE_1", 8)
7908 .Case("SALU_CYCLE_1", 9)
7909 .Case("SALU_CYCLE_2", 10)
7910 .Case("SALU_CYCLE_3", 11)
7911 .Default(-1);
7912 }
7913 if (Value < 0) {
7914 Error(ValueLoc, "invalid value name " + ValueName);
7915 return false;
7916 }
7917
7918 Delay |= Value << Shift;
7919 return true;
7920}
7921
7922ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7923 int64_t Delay = 0;
7924 SMLoc S = getLoc();
7925
7926 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7927 do {
7928 if (!parseDelay(Delay))
7929 return ParseStatus::Failure;
7930 } while (trySkipToken(AsmToken::Pipe));
7931 } else {
7932 if (!parseExpr(Delay))
7933 return ParseStatus::Failure;
7934 }
7935
7936 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7937 return ParseStatus::Success;
7938}
7939
7940bool
7941AMDGPUOperand::isSWaitCnt() const {
7942 return isImm();
7943}
7944
7945bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7946
7947//===----------------------------------------------------------------------===//
7948// DepCtr
7949//===----------------------------------------------------------------------===//
7950
7951void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7952 StringRef DepCtrName) {
7953 switch (ErrorId) {
7954 case OPR_ID_UNKNOWN:
7955 Error(Loc, Twine("invalid counter name ", DepCtrName));
7956 return;
7957 case OPR_ID_UNSUPPORTED:
7958 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7959 return;
7960 case OPR_ID_DUPLICATE:
7961 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7962 return;
7963 case OPR_VAL_INVALID:
7964 Error(Loc, Twine("invalid value for ", DepCtrName));
7965 return;
7966 default:
7967 assert(false);
7968 }
7969}
7970
7971bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7972
7973 using namespace llvm::AMDGPU::DepCtr;
7974
7975 SMLoc DepCtrLoc = getLoc();
7976 StringRef DepCtrName = getTokenStr();
7977
7978 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7979 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7980 return false;
7981
7982 int64_t ExprVal;
7983 if (!parseExpr(ExprVal))
7984 return false;
7985
7986 unsigned PrevOprMask = UsedOprMask;
7987 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7988
7989 if (CntVal < 0) {
7990 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7991 return false;
7992 }
7993
7994 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7995 return false;
7996
7997 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7998 if (isToken(AsmToken::EndOfStatement)) {
7999 Error(getLoc(), "expected a counter name");
8000 return false;
8001 }
8002 }
8003
8004 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8005 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8006 return true;
8007}
8008
8009ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
8010 using namespace llvm::AMDGPU::DepCtr;
8011
8012 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
8013 SMLoc Loc = getLoc();
8014
8015 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8016 unsigned UsedOprMask = 0;
8017 while (!isToken(AsmToken::EndOfStatement)) {
8018 if (!parseDepCtr(DepCtr, UsedOprMask))
8019 return ParseStatus::Failure;
8020 }
8021 } else {
8022 if (!parseExpr(DepCtr))
8023 return ParseStatus::Failure;
8024 }
8025
8026 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
8027 return ParseStatus::Success;
8028}
8029
8030bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8031
8032//===----------------------------------------------------------------------===//
8033// hwreg
8034//===----------------------------------------------------------------------===//
8035
8036ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8037 OperandInfoTy &Offset,
8038 OperandInfoTy &Width) {
8039 using namespace llvm::AMDGPU::Hwreg;
8040
8041 if (!trySkipId("hwreg", AsmToken::LParen))
8042 return ParseStatus::NoMatch;
8043
8044 // The register may be specified by name or using a numeric code
8045 HwReg.Loc = getLoc();
8046 if (isToken(AsmToken::Identifier) &&
8047 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8048 HwReg.IsSymbolic = true;
8049 lex(); // skip register name
8050 } else if (!parseExpr(HwReg.Val, "a register name")) {
8051 return ParseStatus::Failure;
8052 }
8053
8054 if (trySkipToken(AsmToken::RParen))
8055 return ParseStatus::Success;
8056
8057 // parse optional params
8058 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
8059 return ParseStatus::Failure;
8060
8061 Offset.Loc = getLoc();
8062 if (!parseExpr(Offset.Val))
8063 return ParseStatus::Failure;
8064
8065 if (!skipToken(AsmToken::Comma, "expected a comma"))
8066 return ParseStatus::Failure;
8067
8068 Width.Loc = getLoc();
8069 if (!parseExpr(Width.Val) ||
8070 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8071 return ParseStatus::Failure;
8072
8073 return ParseStatus::Success;
8074}
8075
8076ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8077 using namespace llvm::AMDGPU::Hwreg;
8078
8079 int64_t ImmVal = 0;
8080 SMLoc Loc = getLoc();
8081
8082 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8083 HwregId::Default);
8084 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8085 HwregOffset::Default);
8086 struct : StructuredOpField {
8087 using StructuredOpField::StructuredOpField;
8088 bool validate(AMDGPUAsmParser &Parser) const override {
8089 if (!isUIntN(Width, Val - 1))
8090 return Error(Parser, "only values from 1 to 32 are legal");
8091 return true;
8092 }
8093 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8094 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
8095
8096 if (Res.isNoMatch())
8097 Res = parseHwregFunc(HwReg, Offset, Width);
8098
8099 if (Res.isSuccess()) {
8100 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
8101 return ParseStatus::Failure;
8102 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
8103 }
8104
8105 if (Res.isNoMatch() &&
8106 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
8108
8109 if (!Res.isSuccess())
8110 return ParseStatus::Failure;
8111
8112 if (!isUInt<16>(ImmVal))
8113 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8114 Operands.push_back(
8115 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8116 return ParseStatus::Success;
8117}
8118
8119bool AMDGPUOperand::isHwreg() const {
8120 return isImmTy(ImmTyHwreg);
8121}
8122
8123//===----------------------------------------------------------------------===//
8124// sendmsg
8125//===----------------------------------------------------------------------===//
8126
8127bool
8128AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8129 OperandInfoTy &Op,
8130 OperandInfoTy &Stream) {
8131 using namespace llvm::AMDGPU::SendMsg;
8132
8133 Msg.Loc = getLoc();
8134 if (isToken(AsmToken::Identifier) &&
8135 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8136 Msg.IsSymbolic = true;
8137 lex(); // skip message name
8138 } else if (!parseExpr(Msg.Val, "a message name")) {
8139 return false;
8140 }
8141
8142 if (trySkipToken(AsmToken::Comma)) {
8143 Op.IsDefined = true;
8144 Op.Loc = getLoc();
8145 if (isToken(AsmToken::Identifier) &&
8146 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8148 lex(); // skip operation name
8149 } else if (!parseExpr(Op.Val, "an operation name")) {
8150 return false;
8151 }
8152
8153 if (trySkipToken(AsmToken::Comma)) {
8154 Stream.IsDefined = true;
8155 Stream.Loc = getLoc();
8156 if (!parseExpr(Stream.Val))
8157 return false;
8158 }
8159 }
8160
8161 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
8162}
8163
8164bool
8165AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8166 const OperandInfoTy &Op,
8167 const OperandInfoTy &Stream) {
8168 using namespace llvm::AMDGPU::SendMsg;
8169
8170 // Validation strictness depends on whether message is specified
8171 // in a symbolic or in a numeric form. In the latter case
8172 // only encoding possibility is checked.
8173 bool Strict = Msg.IsSymbolic;
8174
8175 if (Strict) {
8176 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8177 Error(Msg.Loc, "specified message id is not supported on this GPU");
8178 return false;
8179 }
8180 } else {
8181 if (!isValidMsgId(Msg.Val, getSTI())) {
8182 Error(Msg.Loc, "invalid message id");
8183 return false;
8184 }
8185 }
8186 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
8187 if (Op.IsDefined) {
8188 Error(Op.Loc, "message does not support operations");
8189 } else {
8190 Error(Msg.Loc, "missing message operation");
8191 }
8192 return false;
8193 }
8194 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
8195 if (Op.Val == OPR_ID_UNSUPPORTED)
8196 Error(Op.Loc, "specified operation id is not supported on this GPU");
8197 else
8198 Error(Op.Loc, "invalid operation id");
8199 return false;
8200 }
8201 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
8202 Stream.IsDefined) {
8203 Error(Stream.Loc, "message operation does not support streams");
8204 return false;
8205 }
8206 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
8207 Error(Stream.Loc, "invalid message stream id");
8208 return false;
8209 }
8210 return true;
8211}
8212
8213ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8214 using namespace llvm::AMDGPU::SendMsg;
8215
8216 int64_t ImmVal = 0;
8217 SMLoc Loc = getLoc();
8218
8219 if (trySkipId("sendmsg", AsmToken::LParen)) {
8220 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8221 OperandInfoTy Op(OP_NONE_);
8222 OperandInfoTy Stream(STREAM_ID_NONE_);
8223 if (parseSendMsgBody(Msg, Op, Stream) &&
8224 validateSendMsg(Msg, Op, Stream)) {
8225 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
8226 } else {
8227 return ParseStatus::Failure;
8228 }
8229 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
8230 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8231 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8232 } else {
8233 return ParseStatus::Failure;
8234 }
8235
8236 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8237 return ParseStatus::Success;
8238}
8239
8240bool AMDGPUOperand::isSendMsg() const {
8241 return isImmTy(ImmTySendMsg);
8242}
8243
8244//===----------------------------------------------------------------------===//
8245// v_interp
8246//===----------------------------------------------------------------------===//
8247
8248ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8249 StringRef Str;
8250 SMLoc S = getLoc();
8251
8252 if (!parseId(Str))
8253 return ParseStatus::NoMatch;
8254
8255 int Slot = StringSwitch<int>(Str)
8256 .Case("p10", 0)
8257 .Case("p20", 1)
8258 .Case("p0", 2)
8259 .Default(-1);
8260
8261 if (Slot == -1)
8262 return Error(S, "invalid interpolation slot");
8263
8264 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8265 AMDGPUOperand::ImmTyInterpSlot));
8266 return ParseStatus::Success;
8267}
8268
8269ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8270 StringRef Str;
8271 SMLoc S = getLoc();
8272
8273 if (!parseId(Str))
8274 return ParseStatus::NoMatch;
8275
8276 if (!Str.starts_with("attr"))
8277 return Error(S, "invalid interpolation attribute");
8278
8279 StringRef Chan = Str.take_back(2);
8280 int AttrChan = StringSwitch<int>(Chan)
8281 .Case(".x", 0)
8282 .Case(".y", 1)
8283 .Case(".z", 2)
8284 .Case(".w", 3)
8285 .Default(-1);
8286 if (AttrChan == -1)
8287 return Error(S, "invalid or missing interpolation attribute channel");
8288
8289 Str = Str.drop_back(2).drop_front(4);
8290
8291 uint8_t Attr;
8292 if (Str.getAsInteger(10, Attr))
8293 return Error(S, "invalid or missing interpolation attribute number");
8294
8295 if (Attr > 32)
8296 return Error(S, "out of bounds interpolation attribute number");
8297
8298 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8299
8300 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8301 AMDGPUOperand::ImmTyInterpAttr));
8302 Operands.push_back(AMDGPUOperand::CreateImm(
8303 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8304 return ParseStatus::Success;
8305}
8306
8307//===----------------------------------------------------------------------===//
8308// exp
8309//===----------------------------------------------------------------------===//
8310
8311ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8312 using namespace llvm::AMDGPU::Exp;
8313
8314 StringRef Str;
8315 SMLoc S = getLoc();
8316
8317 if (!parseId(Str))
8318 return ParseStatus::NoMatch;
8319
8320 unsigned Id = getTgtId(Str);
8321 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8322 return Error(S, (Id == ET_INVALID)
8323 ? "invalid exp target"
8324 : "exp target is not supported on this GPU");
8325
8326 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8327 AMDGPUOperand::ImmTyExpTgt));
8328 return ParseStatus::Success;
8329}
8330
8331//===----------------------------------------------------------------------===//
8332// parser helpers
8333//===----------------------------------------------------------------------===//
8334
8335bool
8336AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8337 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8338}
8339
8340bool
8341AMDGPUAsmParser::isId(const StringRef Id) const {
8342 return isId(getToken(), Id);
8343}
8344
8345bool
8346AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8347 return getTokenKind() == Kind;
8348}
8349
8350StringRef AMDGPUAsmParser::getId() const {
8351 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8352}
8353
8354bool
8355AMDGPUAsmParser::trySkipId(const StringRef Id) {
8356 if (isId(Id)) {
8357 lex();
8358 return true;
8359 }
8360 return false;
8361}
8362
8363bool
8364AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8365 if (isToken(AsmToken::Identifier)) {
8366 StringRef Tok = getTokenStr();
8367 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8368 lex();
8369 return true;
8370 }
8371 }
8372 return false;
8373}
8374
8375bool
8376AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8377 if (isId(Id) && peekToken().is(Kind)) {
8378 lex();
8379 lex();
8380 return true;
8381 }
8382 return false;
8383}
8384
8385bool
8386AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8387 if (isToken(Kind)) {
8388 lex();
8389 return true;
8390 }
8391 return false;
8392}
8393
8394bool
8395AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8396 const StringRef ErrMsg) {
8397 if (!trySkipToken(Kind)) {
8398 Error(getLoc(), ErrMsg);
8399 return false;
8400 }
8401 return true;
8402}
8403
8404bool
8405AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8406 SMLoc S = getLoc();
8407
8408 const MCExpr *Expr;
8409 if (Parser.parseExpression(Expr))
8410 return false;
8411
8412 if (Expr->evaluateAsAbsolute(Imm))
8413 return true;
8414
8415 if (Expected.empty()) {
8416 Error(S, "expected absolute expression");
8417 } else {
8418 Error(S, Twine("expected ", Expected) +
8419 Twine(" or an absolute expression"));
8420 }
8421 return false;
8422}
8423
8424bool
8425AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8426 SMLoc S = getLoc();
8427
8428 const MCExpr *Expr;
8429 if (Parser.parseExpression(Expr))
8430 return false;
8431
8432 int64_t IntVal;
8433 if (Expr->evaluateAsAbsolute(IntVal)) {
8434 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8435 } else {
8436 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8437 }
8438 return true;
8439}
8440
8441bool
8442AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8443 if (isToken(AsmToken::String)) {
8444 Val = getToken().getStringContents();
8445 lex();
8446 return true;
8447 }
8448 Error(getLoc(), ErrMsg);
8449 return false;
8450}
8451
8452bool
8453AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8454 if (isToken(AsmToken::Identifier)) {
8455 Val = getTokenStr();
8456 lex();
8457 return true;
8458 }
8459 if (!ErrMsg.empty())
8460 Error(getLoc(), ErrMsg);
8461 return false;
8462}
8463
8464AsmToken
8465AMDGPUAsmParser::getToken() const {
8466 return Parser.getTok();
8467}
8468
8469AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8470 return isToken(AsmToken::EndOfStatement)
8471 ? getToken()
8472 : getLexer().peekTok(ShouldSkipSpace);
8473}
8474
8475void
8476AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8477 auto TokCount = getLexer().peekTokens(Tokens);
8478
8479 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8480 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8481}
8482
8484AMDGPUAsmParser::getTokenKind() const {
8485 return getLexer().getKind();
8486}
8487
8488SMLoc
8489AMDGPUAsmParser::getLoc() const {
8490 return getToken().getLoc();
8491}
8492
8493StringRef
8494AMDGPUAsmParser::getTokenStr() const {
8495 return getToken().getString();
8496}
8497
8498void
8499AMDGPUAsmParser::lex() {
8500 Parser.Lex();
8501}
8502
8503SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8504 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8505}
8506
8507// Returns one of the given locations that comes later in the source.
8508SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8509 return a.getPointer() < b.getPointer() ? b : a;
8510}
8511
8512SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8513 int MCOpIdx) const {
8514 for (const auto &Op : Operands) {
8515 const auto TargetOp = static_cast<AMDGPUOperand &>(*Op);
8516 if (TargetOp.getMCOpIdx() == MCOpIdx)
8517 return TargetOp.getStartLoc();
8518 }
8519 llvm_unreachable("No such MC operand!");
8520}
8521
8522SMLoc
8523AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8524 const OperandVector &Operands) const {
8525 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8526 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8527 if (Test(Op))
8528 return Op.getStartLoc();
8529 }
8530 return getInstLoc(Operands);
8531}
8532
8533SMLoc
8534AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8535 const OperandVector &Operands) const {
8536 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8537 return getOperandLoc(Test, Operands);
8538}
8539
8540ParseStatus
8541AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8542 if (!trySkipToken(AsmToken::LCurly))
8543 return ParseStatus::NoMatch;
8544
8545 bool First = true;
8546 while (!trySkipToken(AsmToken::RCurly)) {
8547 if (!First &&
8548 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8549 return ParseStatus::Failure;
8550
8551 StringRef Id = getTokenStr();
8552 SMLoc IdLoc = getLoc();
8553 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8554 !skipToken(AsmToken::Colon, "colon expected"))
8555 return ParseStatus::Failure;
8556
8557 const auto *I =
8558 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8559 if (I == Fields.end())
8560 return Error(IdLoc, "unknown field");
8561 if ((*I)->IsDefined)
8562 return Error(IdLoc, "duplicate field");
8563
8564 // TODO: Support symbolic values.
8565 (*I)->Loc = getLoc();
8566 if (!parseExpr((*I)->Val))
8567 return ParseStatus::Failure;
8568 (*I)->IsDefined = true;
8569
8570 First = false;
8571 }
8572 return ParseStatus::Success;
8573}
8574
8575bool AMDGPUAsmParser::validateStructuredOpFields(
8577 return all_of(Fields, [this](const StructuredOpField *F) {
8578 return F->validate(*this);
8579 });
8580}
8581
8582//===----------------------------------------------------------------------===//
8583// swizzle
8584//===----------------------------------------------------------------------===//
8585
8587static unsigned
8588encodeBitmaskPerm(const unsigned AndMask,
8589 const unsigned OrMask,
8590 const unsigned XorMask) {
8591 using namespace llvm::AMDGPU::Swizzle;
8592
8593 return BITMASK_PERM_ENC |
8594 (AndMask << BITMASK_AND_SHIFT) |
8595 (OrMask << BITMASK_OR_SHIFT) |
8596 (XorMask << BITMASK_XOR_SHIFT);
8597}
8598
8599bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8600 const unsigned MaxVal,
8601 const Twine &ErrMsg, SMLoc &Loc) {
8602 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8603 return false;
8604 }
8605 Loc = getLoc();
8606 if (!parseExpr(Op)) {
8607 return false;
8608 }
8609 if (Op < MinVal || Op > MaxVal) {
8610 Error(Loc, ErrMsg);
8611 return false;
8612 }
8613
8614 return true;
8615}
8616
8617bool
8618AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8619 const unsigned MinVal,
8620 const unsigned MaxVal,
8621 const StringRef ErrMsg) {
8622 SMLoc Loc;
8623 for (unsigned i = 0; i < OpNum; ++i) {
8624 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8625 return false;
8626 }
8627
8628 return true;
8629}
8630
8631bool
8632AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8633 using namespace llvm::AMDGPU::Swizzle;
8634
8635 int64_t Lane[LANE_NUM];
8636 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8637 "expected a 2-bit lane id")) {
8639 for (unsigned I = 0; I < LANE_NUM; ++I) {
8640 Imm |= Lane[I] << (LANE_SHIFT * I);
8641 }
8642 return true;
8643 }
8644 return false;
8645}
8646
8647bool
8648AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8649 using namespace llvm::AMDGPU::Swizzle;
8650
8651 SMLoc Loc;
8652 int64_t GroupSize;
8653 int64_t LaneIdx;
8654
8655 if (!parseSwizzleOperand(GroupSize,
8656 2, 32,
8657 "group size must be in the interval [2,32]",
8658 Loc)) {
8659 return false;
8660 }
8661 if (!isPowerOf2_64(GroupSize)) {
8662 Error(Loc, "group size must be a power of two");
8663 return false;
8664 }
8665 if (parseSwizzleOperand(LaneIdx,
8666 0, GroupSize - 1,
8667 "lane id must be in the interval [0,group size - 1]",
8668 Loc)) {
8669 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8670 return true;
8671 }
8672 return false;
8673}
8674
8675bool
8676AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8677 using namespace llvm::AMDGPU::Swizzle;
8678
8679 SMLoc Loc;
8680 int64_t GroupSize;
8681
8682 if (!parseSwizzleOperand(GroupSize,
8683 2, 32,
8684 "group size must be in the interval [2,32]",
8685 Loc)) {
8686 return false;
8687 }
8688 if (!isPowerOf2_64(GroupSize)) {
8689 Error(Loc, "group size must be a power of two");
8690 return false;
8691 }
8692
8693 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8694 return true;
8695}
8696
8697bool
8698AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8699 using namespace llvm::AMDGPU::Swizzle;
8700
8701 SMLoc Loc;
8702 int64_t GroupSize;
8703
8704 if (!parseSwizzleOperand(GroupSize,
8705 1, 16,
8706 "group size must be in the interval [1,16]",
8707 Loc)) {
8708 return false;
8709 }
8710 if (!isPowerOf2_64(GroupSize)) {
8711 Error(Loc, "group size must be a power of two");
8712 return false;
8713 }
8714
8715 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8716 return true;
8717}
8718
8719bool
8720AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8721 using namespace llvm::AMDGPU::Swizzle;
8722
8723 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8724 return false;
8725 }
8726
8727 StringRef Ctl;
8728 SMLoc StrLoc = getLoc();
8729 if (!parseString(Ctl)) {
8730 return false;
8731 }
8732 if (Ctl.size() != BITMASK_WIDTH) {
8733 Error(StrLoc, "expected a 5-character mask");
8734 return false;
8735 }
8736
8737 unsigned AndMask = 0;
8738 unsigned OrMask = 0;
8739 unsigned XorMask = 0;
8740
8741 for (size_t i = 0; i < Ctl.size(); ++i) {
8742 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8743 switch(Ctl[i]) {
8744 default:
8745 Error(StrLoc, "invalid mask");
8746 return false;
8747 case '0':
8748 break;
8749 case '1':
8750 OrMask |= Mask;
8751 break;
8752 case 'p':
8753 AndMask |= Mask;
8754 break;
8755 case 'i':
8756 AndMask |= Mask;
8757 XorMask |= Mask;
8758 break;
8759 }
8760 }
8761
8762 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8763 return true;
8764}
8765
8766bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8767 using namespace llvm::AMDGPU::Swizzle;
8768
8769 if (!AMDGPU::isGFX9Plus(getSTI())) {
8770 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
8771 return false;
8772 }
8773
8774 int64_t Swizzle;
8775 SMLoc Loc;
8776 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8777 "FFT swizzle must be in the interval [0," +
8778 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8779 Loc))
8780 return false;
8781
8782 Imm = FFT_MODE_ENC | Swizzle;
8783 return true;
8784}
8785
8786bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8787 using namespace llvm::AMDGPU::Swizzle;
8788
8789 if (!AMDGPU::isGFX9Plus(getSTI())) {
8790 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
8791 return false;
8792 }
8793
8794 SMLoc Loc;
8795 int64_t Direction;
8796
8797 if (!parseSwizzleOperand(Direction, 0, 1,
8798 "direction must be 0 (left) or 1 (right)", Loc))
8799 return false;
8800
8801 int64_t RotateSize;
8802 if (!parseSwizzleOperand(
8803 RotateSize, 0, ROTATE_MAX_SIZE,
8804 "number of threads to rotate must be in the interval [0," +
8805 Twine(ROTATE_MAX_SIZE) + Twine(']'),
8806 Loc))
8807 return false;
8808
8810 (RotateSize << ROTATE_SIZE_SHIFT);
8811 return true;
8812}
8813
8814bool
8815AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8816
8817 SMLoc OffsetLoc = getLoc();
8818
8819 if (!parseExpr(Imm, "a swizzle macro")) {
8820 return false;
8821 }
8822 if (!isUInt<16>(Imm)) {
8823 Error(OffsetLoc, "expected a 16-bit offset");
8824 return false;
8825 }
8826 return true;
8827}
8828
8829bool
8830AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8831 using namespace llvm::AMDGPU::Swizzle;
8832
8833 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8834
8835 SMLoc ModeLoc = getLoc();
8836 bool Ok = false;
8837
8838 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8839 Ok = parseSwizzleQuadPerm(Imm);
8840 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8841 Ok = parseSwizzleBitmaskPerm(Imm);
8842 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8843 Ok = parseSwizzleBroadcast(Imm);
8844 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8845 Ok = parseSwizzleSwap(Imm);
8846 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8847 Ok = parseSwizzleReverse(Imm);
8848 } else if (trySkipId(IdSymbolic[ID_FFT])) {
8849 Ok = parseSwizzleFFT(Imm);
8850 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8851 Ok = parseSwizzleRotate(Imm);
8852 } else {
8853 Error(ModeLoc, "expected a swizzle mode");
8854 }
8855
8856 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8857 }
8858
8859 return false;
8860}
8861
8862ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8863 SMLoc S = getLoc();
8864 int64_t Imm = 0;
8865
8866 if (trySkipId("offset")) {
8867
8868 bool Ok = false;
8869 if (skipToken(AsmToken::Colon, "expected a colon")) {
8870 if (trySkipId("swizzle")) {
8871 Ok = parseSwizzleMacro(Imm);
8872 } else {
8873 Ok = parseSwizzleOffset(Imm);
8874 }
8875 }
8876
8877 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8878
8880 }
8881 return ParseStatus::NoMatch;
8882}
8883
8884bool
8885AMDGPUOperand::isSwizzle() const {
8886 return isImmTy(ImmTySwizzle);
8887}
8888
8889//===----------------------------------------------------------------------===//
8890// VGPR Index Mode
8891//===----------------------------------------------------------------------===//
8892
8893int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8894
8895 using namespace llvm::AMDGPU::VGPRIndexMode;
8896
8897 if (trySkipToken(AsmToken::RParen)) {
8898 return OFF;
8899 }
8900
8901 int64_t Imm = 0;
8902
8903 while (true) {
8904 unsigned Mode = 0;
8905 SMLoc S = getLoc();
8906
8907 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8908 if (trySkipId(IdSymbolic[ModeId])) {
8909 Mode = 1 << ModeId;
8910 break;
8911 }
8912 }
8913
8914 if (Mode == 0) {
8915 Error(S, (Imm == 0)?
8916 "expected a VGPR index mode or a closing parenthesis" :
8917 "expected a VGPR index mode");
8918 return UNDEF;
8919 }
8920
8921 if (Imm & Mode) {
8922 Error(S, "duplicate VGPR index mode");
8923 return UNDEF;
8924 }
8925 Imm |= Mode;
8926
8927 if (trySkipToken(AsmToken::RParen))
8928 break;
8929 if (!skipToken(AsmToken::Comma,
8930 "expected a comma or a closing parenthesis"))
8931 return UNDEF;
8932 }
8933
8934 return Imm;
8935}
8936
8937ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8938
8939 using namespace llvm::AMDGPU::VGPRIndexMode;
8940
8941 int64_t Imm = 0;
8942 SMLoc S = getLoc();
8943
8944 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8945 Imm = parseGPRIdxMacro();
8946 if (Imm == UNDEF)
8947 return ParseStatus::Failure;
8948 } else {
8949 if (getParser().parseAbsoluteExpression(Imm))
8950 return ParseStatus::Failure;
8951 if (Imm < 0 || !isUInt<4>(Imm))
8952 return Error(S, "invalid immediate: only 4-bit values are legal");
8953 }
8954
8955 Operands.push_back(
8956 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8957 return ParseStatus::Success;
8958}
8959
8960bool AMDGPUOperand::isGPRIdxMode() const {
8961 return isImmTy(ImmTyGprIdxMode);
8962}
8963
8964//===----------------------------------------------------------------------===//
8965// sopp branch targets
8966//===----------------------------------------------------------------------===//
8967
8968ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8969
8970 // Make sure we are not parsing something
8971 // that looks like a label or an expression but is not.
8972 // This will improve error messages.
8973 if (isRegister() || isModifier())
8974 return ParseStatus::NoMatch;
8975
8976 if (!parseExpr(Operands))
8977 return ParseStatus::Failure;
8978
8979 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8980 assert(Opr.isImm() || Opr.isExpr());
8981 SMLoc Loc = Opr.getStartLoc();
8982
8983 // Currently we do not support arbitrary expressions as branch targets.
8984 // Only labels and absolute expressions are accepted.
8985 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8986 Error(Loc, "expected an absolute expression or a label");
8987 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8988 Error(Loc, "expected a 16-bit signed jump offset");
8989 }
8990
8991 return ParseStatus::Success;
8992}
8993
8994//===----------------------------------------------------------------------===//
8995// Boolean holding registers
8996//===----------------------------------------------------------------------===//
8997
8998ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8999 return parseReg(Operands);
9000}
9001
9002//===----------------------------------------------------------------------===//
9003// mubuf
9004//===----------------------------------------------------------------------===//
9005
9006void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9007 const OperandVector &Operands,
9008 bool IsAtomic) {
9009 OptionalImmIndexMap OptionalIdx;
9010 unsigned FirstOperandIdx = 1;
9011 bool IsAtomicReturn = false;
9012
9013 if (IsAtomic) {
9014 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
9016 }
9017
9018 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
9019 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9020
9021 // Add the register arguments
9022 if (Op.isReg()) {
9023 Op.addRegOperands(Inst, 1);
9024 // Insert a tied src for atomic return dst.
9025 // This cannot be postponed as subsequent calls to
9026 // addImmOperands rely on correct number of MC operands.
9027 if (IsAtomicReturn && i == FirstOperandIdx)
9028 Op.addRegOperands(Inst, 1);
9029 continue;
9030 }
9031
9032 // Handle the case where soffset is an immediate
9033 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9034 Op.addImmOperands(Inst, 1);
9035 continue;
9036 }
9037
9038 // Handle tokens like 'offen' which are sometimes hard-coded into the
9039 // asm string. There are no MCInst operands for these.
9040 if (Op.isToken()) {
9041 continue;
9042 }
9043 assert(Op.isImm());
9044
9045 // Handle optional arguments
9046 OptionalIdx[Op.getImmTy()] = i;
9047 }
9048
9049 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
9050 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
9051 // Parse a dummy operand as a placeholder for the SWZ operand. This enforces
9052 // agreement between MCInstrDesc.getNumOperands and MCInst.getNumOperands.
9054}
9055
9056//===----------------------------------------------------------------------===//
9057// smrd
9058//===----------------------------------------------------------------------===//
9059
9060bool AMDGPUOperand::isSMRDOffset8() const {
9061 return isImmLiteral() && isUInt<8>(getImm());
9062}
9063
9064bool AMDGPUOperand::isSMEMOffset() const {
9065 // Offset range is checked later by validator.
9066 return isImmLiteral();
9067}
9068
9069bool AMDGPUOperand::isSMRDLiteralOffset() const {
9070 // 32-bit literals are only supported on CI and we only want to use them
9071 // when the offset is > 8-bits.
9072 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
9073}
9074
9075//===----------------------------------------------------------------------===//
9076// vop3
9077//===----------------------------------------------------------------------===//
9078
9079static bool ConvertOmodMul(int64_t &Mul) {
9080 if (Mul != 1 && Mul != 2 && Mul != 4)
9081 return false;
9082
9083 Mul >>= 1;
9084 return true;
9085}
9086
9087static bool ConvertOmodDiv(int64_t &Div) {
9088 if (Div == 1) {
9089 Div = 0;
9090 return true;
9091 }
9092
9093 if (Div == 2) {
9094 Div = 3;
9095 return true;
9096 }
9097
9098 return false;
9099}
9100
9101// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9102// This is intentional and ensures compatibility with sp3.
9103// See bug 35397 for details.
9104bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9105 if (BoundCtrl == 0 || BoundCtrl == 1) {
9106 if (!isGFX11Plus())
9107 BoundCtrl = 1;
9108 return true;
9109 }
9110 return false;
9111}
9112
9113void AMDGPUAsmParser::onBeginOfFile() {
9114 if (!getParser().getStreamer().getTargetStreamer() ||
9115 getSTI().getTargetTriple().getArch() == Triple::r600)
9116 return;
9117
9118 if (!getTargetStreamer().getTargetID())
9119 getTargetStreamer().initializeTargetID(getSTI(),
9120 getSTI().getFeatureString());
9121
9122 if (isHsaAbi(getSTI()))
9123 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9124}
9125
9126/// Parse AMDGPU specific expressions.
9127///
9128/// expr ::= or(expr, ...) |
9129/// max(expr, ...)
9130///
9131bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9132 using AGVK = AMDGPUMCExpr::VariantKind;
9133
9134 if (isToken(AsmToken::Identifier)) {
9135 StringRef TokenId = getTokenStr();
9136 AGVK VK = StringSwitch<AGVK>(TokenId)
9137 .Case("max", AGVK::AGVK_Max)
9138 .Case("or", AGVK::AGVK_Or)
9139 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
9140 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9141 .Case("alignto", AGVK::AGVK_AlignTo)
9142 .Case("occupancy", AGVK::AGVK_Occupancy)
9143 .Default(AGVK::AGVK_None);
9144
9145 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
9147 uint64_t CommaCount = 0;
9148 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9149 lex(); // Eat '('
9150 while (true) {
9151 if (trySkipToken(AsmToken::RParen)) {
9152 if (Exprs.empty()) {
9153 Error(getToken().getLoc(),
9154 "empty " + Twine(TokenId) + " expression");
9155 return true;
9156 }
9157 if (CommaCount + 1 != Exprs.size()) {
9158 Error(getToken().getLoc(),
9159 "mismatch of commas in " + Twine(TokenId) + " expression");
9160 return true;
9161 }
9162 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
9163 return false;
9164 }
9165 const MCExpr *Expr;
9166 if (getParser().parseExpression(Expr, EndLoc))
9167 return true;
9168 Exprs.push_back(Expr);
9169 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
9170 if (LastTokenWasComma)
9171 CommaCount++;
9172 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
9173 Error(getToken().getLoc(),
9174 "unexpected token in " + Twine(TokenId) + " expression");
9175 return true;
9176 }
9177 }
9178 }
9179 }
9180 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
9181}
9182
9183ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9184 StringRef Name = getTokenStr();
9185 if (Name == "mul") {
9186 return parseIntWithPrefix("mul", Operands,
9187 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
9188 }
9189
9190 if (Name == "div") {
9191 return parseIntWithPrefix("div", Operands,
9192 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
9193 }
9194
9195 return ParseStatus::NoMatch;
9196}
9197
9198// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9199// the number of src operands present, then copies that bit into src0_modifiers.
9200static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9201 int Opc = Inst.getOpcode();
9202 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9203 if (OpSelIdx == -1)
9204 return;
9205
9206 int SrcNum;
9207 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9208 AMDGPU::OpName::src2};
9209 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
9210 ++SrcNum)
9211 ;
9212 assert(SrcNum > 0);
9213
9214 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9215
9216 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
9217 if (DstIdx == -1)
9218 return;
9219
9220 const MCOperand &DstOp = Inst.getOperand(DstIdx);
9221 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
9222 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9223 if (DstOp.isReg() &&
9224 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
9226 ModVal |= SISrcMods::DST_OP_SEL;
9227 } else {
9228 if ((OpSel & (1 << SrcNum)) != 0)
9229 ModVal |= SISrcMods::DST_OP_SEL;
9230 }
9231 Inst.getOperand(ModIdx).setImm(ModVal);
9232}
9233
9234void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9235 const OperandVector &Operands) {
9236 cvtVOP3P(Inst, Operands);
9237 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9238}
9239
9240void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9241 OptionalImmIndexMap &OptionalIdx) {
9242 cvtVOP3P(Inst, Operands, OptionalIdx);
9243 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9244}
9245
9246static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9247 return
9248 // 1. This operand is input modifiers
9249 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9250 // 2. This is not last operand
9251 && Desc.NumOperands > (OpNum + 1)
9252 // 3. Next operand is register class
9253 && Desc.operands()[OpNum + 1].RegClass != -1
9254 // 4. Next register is not tied to any other operand
9255 && Desc.getOperandConstraint(OpNum + 1,
9257}
9258
9259void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) {
9260 unsigned Opc = Inst.getOpcode();
9261 constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9262 AMDGPU::OpName::src2};
9263 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9264 AMDGPU::OpName::src1_modifiers,
9265 AMDGPU::OpName::src2_modifiers};
9266 for (int J = 0; J < 3; ++J) {
9267 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9268 if (OpIdx == -1)
9269 // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but
9270 // no src1. So continue instead of break.
9271 continue;
9272
9273 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9274 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9275
9276 if ((OpSel & (1 << J)) != 0)
9277 ModVal |= SISrcMods::OP_SEL_0;
9278 // op_sel[3] is encoded in src0_modifiers.
9279 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9280 ModVal |= SISrcMods::DST_OP_SEL;
9281
9282 Inst.getOperand(ModIdx).setImm(ModVal);
9283 }
9284}
9285
9286void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9287{
9288 OptionalImmIndexMap OptionalIdx;
9289 unsigned Opc = Inst.getOpcode();
9290
9291 unsigned I = 1;
9292 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9293 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9294 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9295 }
9296
9297 for (unsigned E = Operands.size(); I != E; ++I) {
9298 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9300 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9301 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9302 Op.isInterpAttrChan()) {
9303 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9304 } else if (Op.isImmModifier()) {
9305 OptionalIdx[Op.getImmTy()] = I;
9306 } else {
9307 llvm_unreachable("unhandled operand type");
9308 }
9309 }
9310
9311 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9312 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9313 AMDGPUOperand::ImmTyHigh);
9314
9315 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9316 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9317 AMDGPUOperand::ImmTyClamp);
9318
9319 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9320 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9321 AMDGPUOperand::ImmTyOModSI);
9322
9323 // Some v_interp instructions use op_sel[3] for dst.
9324 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9325 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9326 AMDGPUOperand::ImmTyOpSel);
9327 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9328 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9329
9330 cvtOpSelHelper(Inst, OpSel);
9331 }
9332}
9333
9334void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9335{
9336 OptionalImmIndexMap OptionalIdx;
9337 unsigned Opc = Inst.getOpcode();
9338
9339 unsigned I = 1;
9340 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9341 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9342 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9343 }
9344
9345 for (unsigned E = Operands.size(); I != E; ++I) {
9346 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9348 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9349 } else if (Op.isImmModifier()) {
9350 OptionalIdx[Op.getImmTy()] = I;
9351 } else {
9352 llvm_unreachable("unhandled operand type");
9353 }
9354 }
9355
9356 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9357
9358 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9359 if (OpSelIdx != -1)
9360 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9361
9362 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9363
9364 if (OpSelIdx == -1)
9365 return;
9366
9367 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9368 cvtOpSelHelper(Inst, OpSel);
9369}
9370
9371void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9372 const OperandVector &Operands) {
9373 OptionalImmIndexMap OptionalIdx;
9374 unsigned Opc = Inst.getOpcode();
9375 unsigned I = 1;
9376 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9377
9378 const MCInstrDesc &Desc = MII.get(Opc);
9379
9380 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9381 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9382
9383 for (unsigned E = Operands.size(); I != E; ++I) {
9384 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9385 int NumOperands = Inst.getNumOperands();
9386 // The order of operands in MCInst and parsed operands are different.
9387 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9388 // indices for parsing scale values correctly.
9389 if (NumOperands == CbszOpIdx) {
9392 }
9393 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9394 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9395 } else if (Op.isImmModifier()) {
9396 OptionalIdx[Op.getImmTy()] = I;
9397 } else {
9398 Op.addRegOrImmOperands(Inst, 1);
9399 }
9400 }
9401
9402 // Insert CBSZ and BLGP operands for F8F6F4 variants
9403 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9404 if (CbszIdx != OptionalIdx.end()) {
9405 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9406 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9407 }
9408
9409 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9410 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9411 if (BlgpIdx != OptionalIdx.end()) {
9412 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9413 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9414 }
9415
9416 // Add dummy src_modifiers
9419
9420 // Handle op_sel fields
9421
9422 unsigned OpSel = 0;
9423 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9424 if (OpselIdx != OptionalIdx.end()) {
9425 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9426 .getImm();
9427 }
9428
9429 unsigned OpSelHi = 0;
9430 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9431 if (OpselHiIdx != OptionalIdx.end()) {
9432 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9433 .getImm();
9434 }
9435 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9436 AMDGPU::OpName::src1_modifiers};
9437
9438 for (unsigned J = 0; J < 2; ++J) {
9439 unsigned ModVal = 0;
9440 if (OpSel & (1 << J))
9441 ModVal |= SISrcMods::OP_SEL_0;
9442 if (OpSelHi & (1 << J))
9443 ModVal |= SISrcMods::OP_SEL_1;
9444
9445 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9446 Inst.getOperand(ModIdx).setImm(ModVal);
9447 }
9448}
9449
9450void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9451 OptionalImmIndexMap &OptionalIdx) {
9452 unsigned Opc = Inst.getOpcode();
9453
9454 unsigned I = 1;
9455 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9456 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9457 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9458 }
9459
9460 for (unsigned E = Operands.size(); I != E; ++I) {
9461 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9463 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9464 } else if (Op.isImmModifier()) {
9465 OptionalIdx[Op.getImmTy()] = I;
9466 } else {
9467 Op.addRegOrImmOperands(Inst, 1);
9468 }
9469 }
9470
9471 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))
9472 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9473 AMDGPUOperand::ImmTyScaleSel);
9474
9475 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9476 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9477 AMDGPUOperand::ImmTyClamp);
9478
9479 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9480 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9481 Inst.addOperand(Inst.getOperand(0));
9482 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9483 AMDGPUOperand::ImmTyByteSel);
9484 }
9485
9486 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9487 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9488 AMDGPUOperand::ImmTyOModSI);
9489
9490 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9491 // it has src2 register operand that is tied to dst operand
9492 // we don't allow modifiers for this operand in assembler so src2_modifiers
9493 // should be 0.
9494 if (isMAC(Opc)) {
9495 auto *it = Inst.begin();
9496 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9497 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9498 ++it;
9499 // Copy the operand to ensure it's not invalidated when Inst grows.
9500 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9501 }
9502}
9503
9504void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9505 OptionalImmIndexMap OptionalIdx;
9506 cvtVOP3(Inst, Operands, OptionalIdx);
9507}
9508
9509void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9510 OptionalImmIndexMap &OptIdx) {
9511 const int Opc = Inst.getOpcode();
9512 const MCInstrDesc &Desc = MII.get(Opc);
9513
9514 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9515
9516 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9517 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9518 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9519 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9520 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9521 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9522 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9523 Inst.addOperand(Inst.getOperand(0));
9524 }
9525
9526 // Adding vdst_in operand is already covered for these DPP instructions in
9527 // cvtVOP3DPP.
9528 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
9529 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9530 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9531 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9532 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9533 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9534 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9535 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9536 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9537 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9538 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9539 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp_gfx1250 ||
9540 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp8_gfx1250 ||
9541 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9542 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9543 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
9544 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
9545 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
9546 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
9547 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
9548 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
9549 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
9550 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
9551 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
9552 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
9553 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
9554 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
9555 Inst.addOperand(Inst.getOperand(0));
9556 }
9557
9558 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9559 if (BitOp3Idx != -1) {
9560 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9561 }
9562
9563 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9564 // instruction, and then figure out where to actually put the modifiers
9565
9566 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9567 if (OpSelIdx != -1) {
9568 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9569 }
9570
9571 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9572 if (OpSelHiIdx != -1) {
9573 int DefaultVal = IsPacked ? -1 : 0;
9574 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9575 DefaultVal);
9576 }
9577
9578 int MatrixAFMTIdx =
9579 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
9580 if (MatrixAFMTIdx != -1) {
9581 addOptionalImmOperand(Inst, Operands, OptIdx,
9582 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9583 }
9584
9585 int MatrixBFMTIdx =
9586 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
9587 if (MatrixBFMTIdx != -1) {
9588 addOptionalImmOperand(Inst, Operands, OptIdx,
9589 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9590 }
9591
9592 int MatrixAScaleIdx =
9593 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
9594 if (MatrixAScaleIdx != -1) {
9595 addOptionalImmOperand(Inst, Operands, OptIdx,
9596 AMDGPUOperand::ImmTyMatrixAScale, 0);
9597 }
9598
9599 int MatrixBScaleIdx =
9600 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
9601 if (MatrixBScaleIdx != -1) {
9602 addOptionalImmOperand(Inst, Operands, OptIdx,
9603 AMDGPUOperand::ImmTyMatrixBScale, 0);
9604 }
9605
9606 int MatrixAScaleFmtIdx =
9607 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9608 if (MatrixAScaleFmtIdx != -1) {
9609 addOptionalImmOperand(Inst, Operands, OptIdx,
9610 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9611 }
9612
9613 int MatrixBScaleFmtIdx =
9614 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9615 if (MatrixBScaleFmtIdx != -1) {
9616 addOptionalImmOperand(Inst, Operands, OptIdx,
9617 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9618 }
9619
9620 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9621 addOptionalImmOperand(Inst, Operands, OptIdx,
9622 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9623
9624 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9625 addOptionalImmOperand(Inst, Operands, OptIdx,
9626 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9627
9628 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9629 if (NegLoIdx != -1)
9630 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9631
9632 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9633 if (NegHiIdx != -1)
9634 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9635
9636 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9637 AMDGPU::OpName::src2};
9638 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9639 AMDGPU::OpName::src1_modifiers,
9640 AMDGPU::OpName::src2_modifiers};
9641
9642 unsigned OpSel = 0;
9643 unsigned OpSelHi = 0;
9644 unsigned NegLo = 0;
9645 unsigned NegHi = 0;
9646
9647 if (OpSelIdx != -1)
9648 OpSel = Inst.getOperand(OpSelIdx).getImm();
9649
9650 if (OpSelHiIdx != -1)
9651 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9652
9653 if (NegLoIdx != -1)
9654 NegLo = Inst.getOperand(NegLoIdx).getImm();
9655
9656 if (NegHiIdx != -1)
9657 NegHi = Inst.getOperand(NegHiIdx).getImm();
9658
9659 for (int J = 0; J < 3; ++J) {
9660 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9661 if (OpIdx == -1)
9662 break;
9663
9664 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9665
9666 if (ModIdx == -1)
9667 continue;
9668
9669 uint32_t ModVal = 0;
9670
9671 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9672 if (SrcOp.isReg() && getMRI()
9673 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9674 .contains(SrcOp.getReg())) {
9675 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9676 if (VGPRSuffixIsHi)
9677 ModVal |= SISrcMods::OP_SEL_0;
9678 } else {
9679 if ((OpSel & (1 << J)) != 0)
9680 ModVal |= SISrcMods::OP_SEL_0;
9681 }
9682
9683 if ((OpSelHi & (1 << J)) != 0)
9684 ModVal |= SISrcMods::OP_SEL_1;
9685
9686 if ((NegLo & (1 << J)) != 0)
9687 ModVal |= SISrcMods::NEG;
9688
9689 if ((NegHi & (1 << J)) != 0)
9690 ModVal |= SISrcMods::NEG_HI;
9691
9692 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9693 }
9694}
9695
9696void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9697 OptionalImmIndexMap OptIdx;
9698 cvtVOP3(Inst, Operands, OptIdx);
9699 cvtVOP3P(Inst, Operands, OptIdx);
9700}
9701
9703 unsigned i, unsigned Opc,
9704 AMDGPU::OpName OpName) {
9705 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9706 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9707 else
9708 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9709}
9710
9711void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9712 unsigned Opc = Inst.getOpcode();
9713
9714 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9715 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9716 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9717 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9718 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9719
9720 OptionalImmIndexMap OptIdx;
9721 for (unsigned i = 5; i < Operands.size(); ++i) {
9722 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9723 OptIdx[Op.getImmTy()] = i;
9724 }
9725
9726 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9727 addOptionalImmOperand(Inst, Operands, OptIdx,
9728 AMDGPUOperand::ImmTyIndexKey8bit);
9729
9730 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9731 addOptionalImmOperand(Inst, Operands, OptIdx,
9732 AMDGPUOperand::ImmTyIndexKey16bit);
9733
9734 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
9735 addOptionalImmOperand(Inst, Operands, OptIdx,
9736 AMDGPUOperand::ImmTyIndexKey32bit);
9737
9738 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9739 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9740
9741 cvtVOP3P(Inst, Operands, OptIdx);
9742}
9743
9744//===----------------------------------------------------------------------===//
9745// VOPD
9746//===----------------------------------------------------------------------===//
9747
9748ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9749 if (!hasVOPD(getSTI()))
9750 return ParseStatus::NoMatch;
9751
9752 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9753 SMLoc S = getLoc();
9754 lex();
9755 lex();
9756 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9757 SMLoc OpYLoc = getLoc();
9758 StringRef OpYName;
9759 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9760 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9761 return ParseStatus::Success;
9762 }
9763 return Error(OpYLoc, "expected a VOPDY instruction after ::");
9764 }
9765 return ParseStatus::NoMatch;
9766}
9767
9768// Create VOPD MCInst operands using parsed assembler operands.
9769void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9770 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9771
9772 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9773 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9775 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9776 return;
9777 }
9778 if (Op.isReg()) {
9779 Op.addRegOperands(Inst, 1);
9780 return;
9781 }
9782 if (Op.isImm()) {
9783 Op.addImmOperands(Inst, 1);
9784 return;
9785 }
9786 llvm_unreachable("Unhandled operand type in cvtVOPD");
9787 };
9788
9789 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
9790
9791 // MCInst operands are ordered as follows:
9792 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9793
9794 for (auto CompIdx : VOPD::COMPONENTS) {
9795 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9796 }
9797
9798 for (auto CompIdx : VOPD::COMPONENTS) {
9799 const auto &CInfo = InstInfo[CompIdx];
9800 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9801 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9802 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9803 if (CInfo.hasSrc2Acc())
9804 addOp(CInfo.getIndexOfDstInParsedOperands());
9805 }
9806
9807 int BitOp3Idx =
9808 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
9809 if (BitOp3Idx != -1) {
9810 OptionalImmIndexMap OptIdx;
9811 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
9812 if (Op.isImm())
9813 OptIdx[Op.getImmTy()] = Operands.size() - 1;
9814
9815 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9816 }
9817}
9818
9819//===----------------------------------------------------------------------===//
9820// dpp
9821//===----------------------------------------------------------------------===//
9822
9823bool AMDGPUOperand::isDPP8() const {
9824 return isImmTy(ImmTyDPP8);
9825}
9826
9827bool AMDGPUOperand::isDPPCtrl() const {
9828 using namespace AMDGPU::DPP;
9829
9830 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
9831 if (result) {
9832 int64_t Imm = getImm();
9833 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9834 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9835 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9836 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9837 (Imm == DppCtrl::WAVE_SHL1) ||
9838 (Imm == DppCtrl::WAVE_ROL1) ||
9839 (Imm == DppCtrl::WAVE_SHR1) ||
9840 (Imm == DppCtrl::WAVE_ROR1) ||
9841 (Imm == DppCtrl::ROW_MIRROR) ||
9842 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9843 (Imm == DppCtrl::BCAST15) ||
9844 (Imm == DppCtrl::BCAST31) ||
9845 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9846 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9847 }
9848 return false;
9849}
9850
9851//===----------------------------------------------------------------------===//
9852// mAI
9853//===----------------------------------------------------------------------===//
9854
9855bool AMDGPUOperand::isBLGP() const {
9856 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
9857}
9858
9859bool AMDGPUOperand::isS16Imm() const {
9860 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
9861}
9862
9863bool AMDGPUOperand::isU16Imm() const {
9864 return isImmLiteral() && isUInt<16>(getImm());
9865}
9866
9867//===----------------------------------------------------------------------===//
9868// dim
9869//===----------------------------------------------------------------------===//
9870
9871bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9872 // We want to allow "dim:1D" etc.,
9873 // but the initial 1 is tokenized as an integer.
9874 std::string Token;
9875 if (isToken(AsmToken::Integer)) {
9876 SMLoc Loc = getToken().getEndLoc();
9877 Token = std::string(getTokenStr());
9878 lex();
9879 if (getLoc() != Loc)
9880 return false;
9881 }
9882
9883 StringRef Suffix;
9884 if (!parseId(Suffix))
9885 return false;
9886 Token += Suffix;
9887
9888 StringRef DimId = Token;
9889 DimId.consume_front("SQ_RSRC_IMG_");
9890
9891 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
9892 if (!DimInfo)
9893 return false;
9894
9895 Encoding = DimInfo->Encoding;
9896 return true;
9897}
9898
9899ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9900 if (!isGFX10Plus())
9901 return ParseStatus::NoMatch;
9902
9903 SMLoc S = getLoc();
9904
9905 if (!trySkipId("dim", AsmToken::Colon))
9906 return ParseStatus::NoMatch;
9907
9908 unsigned Encoding;
9909 SMLoc Loc = getLoc();
9910 if (!parseDimId(Encoding))
9911 return Error(Loc, "invalid dim value");
9912
9913 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9914 AMDGPUOperand::ImmTyDim));
9915 return ParseStatus::Success;
9916}
9917
9918//===----------------------------------------------------------------------===//
9919// dpp
9920//===----------------------------------------------------------------------===//
9921
9922ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9923 SMLoc S = getLoc();
9924
9925 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9926 return ParseStatus::NoMatch;
9927
9928 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9929
9930 int64_t Sels[8];
9931
9932 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9933 return ParseStatus::Failure;
9934
9935 for (size_t i = 0; i < 8; ++i) {
9936 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9937 return ParseStatus::Failure;
9938
9939 SMLoc Loc = getLoc();
9940 if (getParser().parseAbsoluteExpression(Sels[i]))
9941 return ParseStatus::Failure;
9942 if (0 > Sels[i] || 7 < Sels[i])
9943 return Error(Loc, "expected a 3-bit value");
9944 }
9945
9946 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9947 return ParseStatus::Failure;
9948
9949 unsigned DPP8 = 0;
9950 for (size_t i = 0; i < 8; ++i)
9951 DPP8 |= (Sels[i] << (i * 3));
9952
9953 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9954 return ParseStatus::Success;
9955}
9956
9957bool
9958AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9959 const OperandVector &Operands) {
9960 if (Ctrl == "row_newbcast")
9961 return isGFX90A();
9962
9963 if (Ctrl == "row_share" ||
9964 Ctrl == "row_xmask")
9965 return isGFX10Plus();
9966
9967 if (Ctrl == "wave_shl" ||
9968 Ctrl == "wave_shr" ||
9969 Ctrl == "wave_rol" ||
9970 Ctrl == "wave_ror" ||
9971 Ctrl == "row_bcast")
9972 return isVI() || isGFX9();
9973
9974 return Ctrl == "row_mirror" ||
9975 Ctrl == "row_half_mirror" ||
9976 Ctrl == "quad_perm" ||
9977 Ctrl == "row_shl" ||
9978 Ctrl == "row_shr" ||
9979 Ctrl == "row_ror";
9980}
9981
9982int64_t
9983AMDGPUAsmParser::parseDPPCtrlPerm() {
9984 // quad_perm:[%d,%d,%d,%d]
9985
9986 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9987 return -1;
9988
9989 int64_t Val = 0;
9990 for (int i = 0; i < 4; ++i) {
9991 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9992 return -1;
9993
9994 int64_t Temp;
9995 SMLoc Loc = getLoc();
9996 if (getParser().parseAbsoluteExpression(Temp))
9997 return -1;
9998 if (Temp < 0 || Temp > 3) {
9999 Error(Loc, "expected a 2-bit value");
10000 return -1;
10001 }
10002
10003 Val += (Temp << i * 2);
10004 }
10005
10006 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10007 return -1;
10008
10009 return Val;
10010}
10011
10012int64_t
10013AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10014 using namespace AMDGPU::DPP;
10015
10016 // sel:%d
10017
10018 int64_t Val;
10019 SMLoc Loc = getLoc();
10020
10021 if (getParser().parseAbsoluteExpression(Val))
10022 return -1;
10023
10024 struct DppCtrlCheck {
10025 int64_t Ctrl;
10026 int Lo;
10027 int Hi;
10028 };
10029
10030 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
10031 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10032 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10033 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10034 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10035 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10036 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10037 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10038 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10039 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10040 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10041 .Default({-1, 0, 0});
10042
10043 bool Valid;
10044 if (Check.Ctrl == -1) {
10045 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
10046 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10047 } else {
10048 Valid = Check.Lo <= Val && Val <= Check.Hi;
10049 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
10050 }
10051
10052 if (!Valid) {
10053 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
10054 return -1;
10055 }
10056
10057 return Val;
10058}
10059
10060ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10061 using namespace AMDGPU::DPP;
10062
10063 if (!isToken(AsmToken::Identifier) ||
10064 !isSupportedDPPCtrl(getTokenStr(), Operands))
10065 return ParseStatus::NoMatch;
10066
10067 SMLoc S = getLoc();
10068 int64_t Val = -1;
10069 StringRef Ctrl;
10070
10071 parseId(Ctrl);
10072
10073 if (Ctrl == "row_mirror") {
10074 Val = DppCtrl::ROW_MIRROR;
10075 } else if (Ctrl == "row_half_mirror") {
10076 Val = DppCtrl::ROW_HALF_MIRROR;
10077 } else {
10078 if (skipToken(AsmToken::Colon, "expected a colon")) {
10079 if (Ctrl == "quad_perm") {
10080 Val = parseDPPCtrlPerm();
10081 } else {
10082 Val = parseDPPCtrlSel(Ctrl);
10083 }
10084 }
10085 }
10086
10087 if (Val == -1)
10088 return ParseStatus::Failure;
10089
10090 Operands.push_back(
10091 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10092 return ParseStatus::Success;
10093}
10094
10095void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10096 bool IsDPP8) {
10097 OptionalImmIndexMap OptionalIdx;
10098 unsigned Opc = Inst.getOpcode();
10099 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10100
10101 // MAC instructions are special because they have 'old'
10102 // operand which is not tied to dst (but assumed to be).
10103 // They also have dummy unused src2_modifiers.
10104 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
10105 int Src2ModIdx =
10106 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
10107 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10108 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
10109
10110 unsigned I = 1;
10111 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10112 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10113 }
10114
10115 int Fi = 0;
10116 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
10117 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10118 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10119 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10120 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
10121
10122 for (unsigned E = Operands.size(); I != E; ++I) {
10123
10124 if (IsMAC) {
10125 int NumOperands = Inst.getNumOperands();
10126 if (OldIdx == NumOperands) {
10127 // Handle old operand
10128 constexpr int DST_IDX = 0;
10129 Inst.addOperand(Inst.getOperand(DST_IDX));
10130 } else if (Src2ModIdx == NumOperands) {
10131 // Add unused dummy src2_modifiers
10133 }
10134 }
10135
10136 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10137 Inst.addOperand(Inst.getOperand(0));
10138 }
10139
10140 if (IsVOP3CvtSrDpp) {
10141 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10143 Inst.addOperand(MCOperand::createReg(MCRegister()));
10144 }
10145 }
10146
10147 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10149 if (TiedTo != -1) {
10150 assert((unsigned)TiedTo < Inst.getNumOperands());
10151 // handle tied old or src2 for MAC instructions
10152 Inst.addOperand(Inst.getOperand(TiedTo));
10153 }
10154 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10155 // Add the register arguments
10156 if (IsDPP8 && Op.isDppFI()) {
10157 Fi = Op.getImm();
10158 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10159 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10160 } else if (Op.isReg()) {
10161 Op.addRegOperands(Inst, 1);
10162 } else if (Op.isImm() &&
10163 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10164 Op.addImmOperands(Inst, 1);
10165 } else if (Op.isImm()) {
10166 OptionalIdx[Op.getImmTy()] = I;
10167 } else {
10168 llvm_unreachable("unhandled operand type");
10169 }
10170 }
10171
10172 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10173 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10174 AMDGPUOperand::ImmTyClamp);
10175
10176 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
10177 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10178 Inst.addOperand(Inst.getOperand(0));
10179 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10180 AMDGPUOperand::ImmTyByteSel);
10181 }
10182
10183 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10184 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
10185
10186 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10187 cvtVOP3P(Inst, Operands, OptionalIdx);
10188 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10189 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10190 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
10191 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
10192 }
10193
10194 if (IsDPP8) {
10195 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
10196 using namespace llvm::AMDGPU::DPP;
10197 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10198 } else {
10199 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
10200 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10201 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10202 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10203
10204 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
10205 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10206 AMDGPUOperand::ImmTyDppFI);
10207 }
10208}
10209
10210void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10211 OptionalImmIndexMap OptionalIdx;
10212
10213 unsigned I = 1;
10214 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10215 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10216 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10217 }
10218
10219 int Fi = 0;
10220 for (unsigned E = Operands.size(); I != E; ++I) {
10221 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10223 if (TiedTo != -1) {
10224 assert((unsigned)TiedTo < Inst.getNumOperands());
10225 // handle tied old or src2 for MAC instructions
10226 Inst.addOperand(Inst.getOperand(TiedTo));
10227 }
10228 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10229 // Add the register arguments
10230 if (Op.isReg() && validateVccOperand(Op.getReg())) {
10231 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10232 // Skip it.
10233 continue;
10234 }
10235
10236 if (IsDPP8) {
10237 if (Op.isDPP8()) {
10238 Op.addImmOperands(Inst, 1);
10239 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10240 Op.addRegWithFPInputModsOperands(Inst, 2);
10241 } else if (Op.isDppFI()) {
10242 Fi = Op.getImm();
10243 } else if (Op.isReg()) {
10244 Op.addRegOperands(Inst, 1);
10245 } else {
10246 llvm_unreachable("Invalid operand type");
10247 }
10248 } else {
10250 Op.addRegWithFPInputModsOperands(Inst, 2);
10251 } else if (Op.isReg()) {
10252 Op.addRegOperands(Inst, 1);
10253 } else if (Op.isDPPCtrl()) {
10254 Op.addImmOperands(Inst, 1);
10255 } else if (Op.isImm()) {
10256 // Handle optional arguments
10257 OptionalIdx[Op.getImmTy()] = I;
10258 } else {
10259 llvm_unreachable("Invalid operand type");
10260 }
10261 }
10262 }
10263
10264 if (IsDPP8) {
10265 using namespace llvm::AMDGPU::DPP;
10266 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10267 } else {
10268 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10269 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10270 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10271 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
10272 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10273 AMDGPUOperand::ImmTyDppFI);
10274 }
10275 }
10276}
10277
10278//===----------------------------------------------------------------------===//
10279// sdwa
10280//===----------------------------------------------------------------------===//
10281
10282ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10283 StringRef Prefix,
10284 AMDGPUOperand::ImmTy Type) {
10285 return parseStringOrIntWithPrefix(
10286 Operands, Prefix,
10287 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10288 Type);
10289}
10290
10291ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10292 return parseStringOrIntWithPrefix(
10293 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10294 AMDGPUOperand::ImmTySDWADstUnused);
10295}
10296
10297void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10298 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
10299}
10300
10301void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10302 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
10303}
10304
10305void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10306 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
10307}
10308
10309void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10310 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
10311}
10312
10313void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10314 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
10315}
10316
10317void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10318 uint64_t BasicInstType,
10319 bool SkipDstVcc,
10320 bool SkipSrcVcc) {
10321 using namespace llvm::AMDGPU::SDWA;
10322
10323 OptionalImmIndexMap OptionalIdx;
10324 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10325 bool SkippedVcc = false;
10326
10327 unsigned I = 1;
10328 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10329 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10330 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10331 }
10332
10333 for (unsigned E = Operands.size(); I != E; ++I) {
10334 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10335 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10336 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10337 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10338 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10339 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10340 // Skip VCC only if we didn't skip it on previous iteration.
10341 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10342 if (BasicInstType == SIInstrFlags::VOP2 &&
10343 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10344 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10345 SkippedVcc = true;
10346 continue;
10347 }
10348 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
10349 SkippedVcc = true;
10350 continue;
10351 }
10352 }
10354 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10355 } else if (Op.isImm()) {
10356 // Handle optional arguments
10357 OptionalIdx[Op.getImmTy()] = I;
10358 } else {
10359 llvm_unreachable("Invalid operand type");
10360 }
10361 SkippedVcc = false;
10362 }
10363
10364 const unsigned Opc = Inst.getOpcode();
10365 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10366 Opc != AMDGPU::V_NOP_sdwa_vi) {
10367 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10368 switch (BasicInstType) {
10369 case SIInstrFlags::VOP1:
10370 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10371 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10372 AMDGPUOperand::ImmTyClamp, 0);
10373
10374 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10375 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10376 AMDGPUOperand::ImmTyOModSI, 0);
10377
10378 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10379 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10380 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10381
10382 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10383 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10384 AMDGPUOperand::ImmTySDWADstUnused,
10385 DstUnused::UNUSED_PRESERVE);
10386
10387 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10388 break;
10389
10390 case SIInstrFlags::VOP2:
10391 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10392 AMDGPUOperand::ImmTyClamp, 0);
10393
10394 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10395 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10396
10397 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10398 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10399 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10400 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10401 break;
10402
10403 case SIInstrFlags::VOPC:
10404 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10405 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10406 AMDGPUOperand::ImmTyClamp, 0);
10407 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10408 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10409 break;
10410
10411 default:
10412 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10413 }
10414 }
10415
10416 // special case v_mac_{f16, f32}:
10417 // it has src2 register operand that is tied to dst operand
10418 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10419 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10420 auto *it = Inst.begin();
10421 std::advance(
10422 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10423 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10424 }
10425}
10426
10427/// Force static initialization.
10428extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10433
10434#define GET_MATCHER_IMPLEMENTATION
10435#define GET_MNEMONIC_SPELL_CHECKER
10436#define GET_MNEMONIC_CHECKER
10437#include "AMDGPUGenAsmMatcher.inc"
10438
10439ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10440 unsigned MCK) {
10441 switch (MCK) {
10442 case MCK_addr64:
10443 return parseTokenOp("addr64", Operands);
10444 case MCK_done:
10445 return parseTokenOp("done", Operands);
10446 case MCK_idxen:
10447 return parseTokenOp("idxen", Operands);
10448 case MCK_lds:
10449 return parseTokenOp("lds", Operands);
10450 case MCK_offen:
10451 return parseTokenOp("offen", Operands);
10452 case MCK_off:
10453 return parseTokenOp("off", Operands);
10454 case MCK_row_95_en:
10455 return parseTokenOp("row_en", Operands);
10456 case MCK_gds:
10457 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10458 case MCK_tfe:
10459 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10460 }
10461 return tryCustomParseOperand(Operands, MCK);
10462}
10463
10464// This function should be defined after auto-generated include so that we have
10465// MatchClassKind enum defined
10466unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10467 unsigned Kind) {
10468 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10469 // But MatchInstructionImpl() expects to meet token and fails to validate
10470 // operand. This method checks if we are given immediate operand but expect to
10471 // get corresponding token.
10472 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10473 switch (Kind) {
10474 case MCK_addr64:
10475 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10476 case MCK_gds:
10477 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10478 case MCK_lds:
10479 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10480 case MCK_idxen:
10481 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10482 case MCK_offen:
10483 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10484 case MCK_tfe:
10485 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10486 case MCK_SSrc_b32:
10487 // When operands have expression values, they will return true for isToken,
10488 // because it is not possible to distinguish between a token and an
10489 // expression at parse time. MatchInstructionImpl() will always try to
10490 // match an operand as a token, when isToken returns true, and when the
10491 // name of the expression is not a valid token, the match will fail,
10492 // so we need to handle it here.
10493 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10494 case MCK_SSrc_f32:
10495 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10496 case MCK_SOPPBrTarget:
10497 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10498 case MCK_VReg32OrOff:
10499 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10500 case MCK_InterpSlot:
10501 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10502 case MCK_InterpAttr:
10503 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10504 case MCK_InterpAttrChan:
10505 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10506 case MCK_SReg_64:
10507 case MCK_SReg_64_XEXEC:
10508 // Null is defined as a 32-bit register but
10509 // it should also be enabled with 64-bit operands or larger.
10510 // The following code enables it for SReg_64 and larger operands
10511 // used as source and destination. Remaining source
10512 // operands are handled in isInlinableImm.
10513 case MCK_SReg_96:
10514 case MCK_SReg_128:
10515 case MCK_SReg_256:
10516 case MCK_SReg_512:
10517 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10518 default:
10519 return Match_InvalidOperand;
10520 }
10521}
10522
10523//===----------------------------------------------------------------------===//
10524// endpgm
10525//===----------------------------------------------------------------------===//
10526
10527ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10528 SMLoc S = getLoc();
10529 int64_t Imm = 0;
10530
10531 if (!parseExpr(Imm)) {
10532 // The operand is optional, if not present default to 0
10533 Imm = 0;
10534 }
10535
10536 if (!isUInt<16>(Imm))
10537 return Error(S, "expected a 16-bit value");
10538
10539 Operands.push_back(
10540 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10541 return ParseStatus::Success;
10542}
10543
10544bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10545
10546//===----------------------------------------------------------------------===//
10547// Split Barrier
10548//===----------------------------------------------------------------------===//
10549
10550bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
@ Default
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:231
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
unsigned unsigned DefaultVal
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
BinaryOperator * Mul
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5975
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
Register getReg() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
SMLoc getLoc() const
Definition MCInst.h:208
void setLoc(SMLoc loc)
Definition MCInst.h:207
unsigned getOpcode() const
Definition MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232
void addOperand(const MCOperand Op)
Definition MCInst.h:215
iterator begin()
Definition MCInst.h:227
size_t size() const
Definition MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Definition MCInstrInfo.h:80
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
void setImm(int64_t Val)
Definition MCInst.h:89
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
bool isImm() const
Definition MCInst.h:66
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
const MCExpr * getExpr() const
Definition MCInst.h:118
bool isExpr() const
Definition MCInst.h:69
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
void setRedefinable(bool Value)
Mark this symbol as redefinable.
Definition MCSymbol.h:210
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Definition Register.h:100
Represents a location in source code.
Definition SMLoc.h:22
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:35
constexpr const char * getPointer() const
Definition SMLoc.h:33
constexpr bool isValid() const
Definition SMLoc.h:28
SMLoc Start
Definition SMLoc.h:49
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:864
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:667
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:273
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:637
bool contains(StringRef key) const
Check if the set contains the given key.
Definition StringSet.h:60
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
static constexpr CustomOperand Operands[]
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:232
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:202
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:209
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:223
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:225
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:211
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:206
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:201
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:207
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:210
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:217
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:212
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:205
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:226
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:237
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:238
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:213
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:204
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:218
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:224
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:214
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:239
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:221
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:203
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:229
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1430
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ OPERAND_IMMEDIATE
Definition MCInstrDesc.h:61
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
@ Valid
The data is already valid.
Context & getContext() const
Definition BasicBlock.h:99
bool isNull(StringRef S)
Definition YAMLTraits.h:570
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition Error.h:1113
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
StringMapEntry< Value * > ValueName
Definition Value.h:56
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:206
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
void PrintError(const Twine &Msg)
Definition Error.cpp:104
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:267
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
Op::Description Desc
Target & getTheR600Target()
The target for R600 GPUs.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:27
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
#define N
RegisterKind Kind
StringLiteral Name
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...