LLVM 23.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/MC/MCSymbol.h"
47#include <optional>
48
49using namespace llvm;
50using namespace llvm::AMDGPU;
51using namespace llvm::amdhsa;
52
53namespace {
54
55class AMDGPUAsmParser;
56
57enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
58
59//===----------------------------------------------------------------------===//
60// Operand
61//===----------------------------------------------------------------------===//
62
63class AMDGPUOperand : public MCParsedAsmOperand {
64 enum KindTy {
65 Token,
66 Immediate,
67 Register,
68 Expression
69 } Kind;
70
71 SMLoc StartLoc, EndLoc;
72 const AMDGPUAsmParser *AsmParser;
73
74public:
75 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
76 : Kind(Kind_), AsmParser(AsmParser_) {}
77
78 using Ptr = std::unique_ptr<AMDGPUOperand>;
79
80 struct Modifiers {
81 bool Abs = false;
82 bool Neg = false;
83 bool Sext = false;
84 LitModifier Lit = LitModifier::None;
85
86 bool hasFPModifiers() const { return Abs || Neg; }
87 bool hasIntModifiers() const { return Sext; }
88 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
89 bool isForcedLit() const { return Lit == LitModifier::Lit; }
90 bool isForcedLit64() const { return Lit == LitModifier::Lit64; }
91
92 int64_t getFPModifiersOperand() const {
93 int64_t Operand = 0;
94 Operand |= Abs ? SISrcMods::ABS : 0u;
95 Operand |= Neg ? SISrcMods::NEG : 0u;
96 return Operand;
97 }
98
99 int64_t getIntModifiersOperand() const {
100 int64_t Operand = 0;
101 Operand |= Sext ? SISrcMods::SEXT : 0u;
102 return Operand;
103 }
104
105 int64_t getModifiersOperand() const {
106 assert(!(hasFPModifiers() && hasIntModifiers())
107 && "fp and int modifiers should not be used simultaneously");
108 if (hasFPModifiers())
109 return getFPModifiersOperand();
110 if (hasIntModifiers())
111 return getIntModifiersOperand();
112 return 0;
113 }
114
115 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
116 };
117
118 enum ImmTy {
119 ImmTyNone,
120 ImmTyGDS,
121 ImmTyLDS,
122 ImmTyOffen,
123 ImmTyIdxen,
124 ImmTyAddr64,
125 ImmTyOffset,
126 ImmTyInstOffset,
127 ImmTyOffset0,
128 ImmTyOffset1,
129 ImmTySMEMOffsetMod,
130 ImmTyCPol,
131 ImmTyTFE,
132 ImmTyIsAsync,
133 ImmTyD16,
134 ImmTyClamp,
135 ImmTyOModSI,
136 ImmTySDWADstSel,
137 ImmTySDWASrc0Sel,
138 ImmTySDWASrc1Sel,
139 ImmTySDWADstUnused,
140 ImmTyDMask,
141 ImmTyDim,
142 ImmTyUNorm,
143 ImmTyDA,
144 ImmTyR128A16,
145 ImmTyA16,
146 ImmTyLWE,
147 ImmTyExpTgt,
148 ImmTyExpCompr,
149 ImmTyExpVM,
150 ImmTyDone,
151 ImmTyRowEn,
152 ImmTyFORMAT,
153 ImmTyHwreg,
154 ImmTyOff,
155 ImmTySendMsg,
156 ImmTyWaitEvent,
157 ImmTyInterpSlot,
158 ImmTyInterpAttr,
159 ImmTyInterpAttrChan,
160 ImmTyOpSel,
161 ImmTyOpSelHi,
162 ImmTyNegLo,
163 ImmTyNegHi,
164 ImmTyIndexKey8bit,
165 ImmTyIndexKey16bit,
166 ImmTyIndexKey32bit,
167 ImmTyDPP8,
168 ImmTyDppCtrl,
169 ImmTyDppRowMask,
170 ImmTyDppBankMask,
171 ImmTyDppBoundCtrl,
172 ImmTyDppFI,
173 ImmTySwizzle,
174 ImmTyGprIdxMode,
175 ImmTyHigh,
176 ImmTyBLGP,
177 ImmTyCBSZ,
178 ImmTyABID,
179 ImmTyEndpgm,
180 ImmTyWaitVDST,
181 ImmTyWaitEXP,
182 ImmTyWaitVAVDst,
183 ImmTyWaitVMVSrc,
184 ImmTyBitOp3,
185 ImmTyMatrixAFMT,
186 ImmTyMatrixBFMT,
187 ImmTyMatrixAScale,
188 ImmTyMatrixBScale,
189 ImmTyMatrixAScaleFmt,
190 ImmTyMatrixBScaleFmt,
191 ImmTyMatrixAReuse,
192 ImmTyMatrixBReuse,
193 ImmTyScaleSel,
194 ImmTyByteSel,
195 };
196
197private:
198 struct TokOp {
199 const char *Data;
200 unsigned Length;
201 };
202
203 struct ImmOp {
204 int64_t Val;
205 ImmTy Type;
206 bool IsFPImm;
207 Modifiers Mods;
208 };
209
210 struct RegOp {
211 MCRegister RegNo;
212 Modifiers Mods;
213 };
214
215 union {
216 TokOp Tok;
217 ImmOp Imm;
218 RegOp Reg;
219 const MCExpr *Expr;
220 };
221
222 // The index of the associated MCInst operand.
223 mutable int MCOpIdx = -1;
224
225public:
226 bool isToken() const override { return Kind == Token; }
227
228 bool isSymbolRefExpr() const {
229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
230 }
231
232 bool isImm() const override {
233 return Kind == Immediate;
234 }
235
236 bool isInlinableImm(MVT type) const;
237 bool isLiteralImm(MVT type) const;
238
239 bool isRegKind() const {
240 return Kind == Register;
241 }
242
243 bool isReg() const override {
244 return isRegKind() && !hasModifiers();
245 }
246
247 bool isRegOrInline(unsigned RCID, MVT type) const {
248 return isRegClass(RCID) || isInlinableImm(type);
249 }
250
251 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
252 return isRegOrInline(RCID, type) || isLiteralImm(type);
253 }
254
255 bool isRegOrImmWithInt16InputMods() const {
256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
257 }
258
259 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
261 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
262 }
263
264 bool isRegOrImmWithInt32InputMods() const {
265 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
266 }
267
268 bool isRegOrInlineImmWithInt16InputMods() const {
269 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
270 }
271
272 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
273 return isRegOrInline(
274 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
275 }
276
277 bool isRegOrInlineImmWithInt32InputMods() const {
278 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
279 }
280
281 bool isRegOrImmWithInt64InputMods() const {
282 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
283 }
284
285 bool isRegOrImmWithFP16InputMods() const {
286 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
287 }
288
289 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
291 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
292 }
293
294 bool isRegOrImmWithFP32InputMods() const {
295 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
296 }
297
298 bool isRegOrImmWithFP64InputMods() const {
299 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
300 }
301
302 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
303 return isRegOrInline(
304 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
305 }
306
307 bool isRegOrInlineImmWithFP32InputMods() const {
308 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
309 }
310
311 bool isRegOrInlineImmWithFP64InputMods() const {
312 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
313 }
314
315 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
316
317 bool isVRegWithFP32InputMods() const {
318 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
319 }
320
321 bool isVRegWithFP64InputMods() const {
322 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
323 }
324
325 bool isPackedFP16InputMods() const {
326 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
327 }
328
329 bool isPackedVGPRFP32InputMods() const {
330 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
331 }
332
333 bool isVReg() const {
334 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
335 isRegClass(AMDGPU::VReg_64RegClassID) ||
336 isRegClass(AMDGPU::VReg_96RegClassID) ||
337 isRegClass(AMDGPU::VReg_128RegClassID) ||
338 isRegClass(AMDGPU::VReg_160RegClassID) ||
339 isRegClass(AMDGPU::VReg_192RegClassID) ||
340 isRegClass(AMDGPU::VReg_256RegClassID) ||
341 isRegClass(AMDGPU::VReg_512RegClassID) ||
342 isRegClass(AMDGPU::VReg_1024RegClassID);
343 }
344
345 bool isVReg32() const {
346 return isRegClass(AMDGPU::VGPR_32RegClassID);
347 }
348
349 bool isVReg32OrOff() const {
350 return isOff() || isVReg32();
351 }
352
353 bool isNull() const {
354 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
355 }
356
357 bool isAV_LdSt_32_Align2_RegOp() const {
358 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
359 isRegClass(AMDGPU::AGPR_32RegClassID);
360 }
361
362 bool isVRegWithInputMods() const;
363 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
364 template <bool IsFake16> bool isT16VRegWithInputMods() const;
365
366 bool isSDWAOperand(MVT type) const;
367 bool isSDWAFP16Operand() const;
368 bool isSDWAFP32Operand() const;
369 bool isSDWAInt16Operand() const;
370 bool isSDWAInt32Operand() const;
371
372 bool isImmTy(ImmTy ImmT) const {
373 return isImm() && Imm.Type == ImmT;
374 }
375
376 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
377
378 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
379
380 bool isImmModifier() const {
381 return isImm() && Imm.Type != ImmTyNone;
382 }
383
384 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
385 bool isDim() const { return isImmTy(ImmTyDim); }
386 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
387 bool isOff() const { return isImmTy(ImmTyOff); }
388 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
389 bool isOffen() const { return isImmTy(ImmTyOffen); }
390 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
391 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
392 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
393 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
394 bool isGDS() const { return isImmTy(ImmTyGDS); }
395 bool isLDS() const { return isImmTy(ImmTyLDS); }
396 bool isCPol() const { return isImmTy(ImmTyCPol); }
397 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
398 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
399 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
400 bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
401 bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
402 bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
403 bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
404 bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
405 bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
406 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
407 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
408 bool isTFE() const { return isImmTy(ImmTyTFE); }
409 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
410 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
411 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
412 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
413 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
414 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
415 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
416 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
417 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
418 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
419 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
420 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
421 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
422 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
423 bool isDone() const { return isImmTy(ImmTyDone); }
424 bool isRowEn() const { return isImmTy(ImmTyRowEn); }
425
426 bool isRegOrImm() const {
427 return isReg() || isImm();
428 }
429
430 bool isRegClass(unsigned RCID) const;
431
432 bool isInlineValue() const;
433
434 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
435 return isRegOrInline(RCID, type) && !hasModifiers();
436 }
437
438 bool isSCSrcB16() const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
440 }
441
442 bool isSCSrcV2B16() const {
443 return isSCSrcB16();
444 }
445
446 bool isSCSrc_b32() const {
447 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
448 }
449
450 bool isSCSrc_b64() const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
452 }
453
454 bool isBoolReg() const;
455
456 bool isSCSrcF16() const {
457 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
458 }
459
460 bool isSCSrcV2F16() const {
461 return isSCSrcF16();
462 }
463
464 bool isSCSrcF32() const {
465 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
466 }
467
468 bool isSCSrcF64() const {
469 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
470 }
471
472 bool isSSrc_b32() const {
473 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
474 }
475
476 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
477
478 bool isSSrcV2B16() const {
479 llvm_unreachable("cannot happen");
480 return isSSrc_b16();
481 }
482
483 bool isSSrc_b64() const {
484 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
485 // See isVSrc64().
486 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
487 (((const MCTargetAsmParser *)AsmParser)
488 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
489 isExpr());
490 }
491
492 bool isSSrc_f32() const {
493 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
494 }
495
496 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
497
498 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
499
500 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
501
502 bool isSSrcV2F16() const {
503 llvm_unreachable("cannot happen");
504 return isSSrc_f16();
505 }
506
507 bool isSSrcV2FP32() const {
508 llvm_unreachable("cannot happen");
509 return isSSrc_f32();
510 }
511
512 bool isSCSrcV2FP32() const {
513 llvm_unreachable("cannot happen");
514 return isSCSrcF32();
515 }
516
517 bool isSSrcV2INT32() const {
518 llvm_unreachable("cannot happen");
519 return isSSrc_b32();
520 }
521
522 bool isSCSrcV2INT32() const {
523 llvm_unreachable("cannot happen");
524 return isSCSrc_b32();
525 }
526
527 bool isSSrcOrLds_b32() const {
528 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
529 isLiteralImm(MVT::i32) || isExpr();
530 }
531
532 bool isVCSrc_b32() const {
533 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
534 }
535
536 bool isVCSrc_b32_Lo256() const {
537 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
538 }
539
540 bool isVCSrc_b64_Lo256() const {
541 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
542 }
543
544 bool isVCSrc_b64() const {
545 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
546 }
547
548 bool isVCSrcT_b16() const {
549 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
550 }
551
552 bool isVCSrcTB16_Lo128() const {
553 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
554 }
555
556 bool isVCSrcFake16B16_Lo128() const {
557 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
558 }
559
560 bool isVCSrc_b16() const {
561 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
562 }
563
564 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
565
566 bool isVCSrc_f32() const {
567 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
568 }
569
570 bool isVCSrc_f64() const {
571 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
572 }
573
574 bool isVCSrcTBF16() const {
575 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
576 }
577
578 bool isVCSrcT_f16() const {
579 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
580 }
581
582 bool isVCSrcT_bf16() const {
583 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
584 }
585
586 bool isVCSrcTBF16_Lo128() const {
587 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
588 }
589
590 bool isVCSrcTF16_Lo128() const {
591 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
592 }
593
594 bool isVCSrcFake16BF16_Lo128() const {
595 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
596 }
597
598 bool isVCSrcFake16F16_Lo128() const {
599 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
600 }
601
602 bool isVCSrc_bf16() const {
603 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
604 }
605
606 bool isVCSrc_f16() const {
607 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
608 }
609
610 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
611
612 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
613
614 bool isVSrc_b32() const {
615 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
616 }
617
618 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
619
620 bool isVSrc_v2b64() const {
621 return isRegOrInlineNoMods(AMDGPU::VS_128RegClassID, MVT::i64) ||
622 isLiteralImm(MVT::i64);
623 }
624
625 bool isVSrc_v2f64() const {
626 return isRegOrInlineNoMods(AMDGPU::VS_128RegClassID, MVT::f64) ||
627 isLiteralImm(MVT::f64);
628 }
629
630 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
631
632 bool isVSrcT_b16_Lo128() const {
633 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
634 }
635
636 bool isVSrcFake16_b16_Lo128() const {
637 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
638 }
639
640 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
641
642 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
643
644 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
645
646 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
647
648 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
649
650 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
651
652 bool isVSrc_f32() const {
653 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
654 }
655
656 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
657
658 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
659
660 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
661
662 bool isVSrcT_bf16_Lo128() const {
663 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
664 }
665
666 bool isVSrcT_f16_Lo128() const {
667 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
668 }
669
670 bool isVSrcFake16_bf16_Lo128() const {
671 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
672 }
673
674 bool isVSrcFake16_f16_Lo128() const {
675 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
676 }
677
678 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
679
680 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
681
682 bool isVSrc_v2bf16() const {
683 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
684 }
685
686 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
687
688 bool isVSrc_v2f16_splat() const { return isVSrc_v2f16(); }
689
690 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
691
692 bool isVISrcB32() const {
693 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
694 }
695
696 bool isVISrcB16() const {
697 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
698 }
699
700 bool isVISrcV2B16() const {
701 return isVISrcB16();
702 }
703
704 bool isVISrcF32() const {
705 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
706 }
707
708 bool isVISrcF16() const {
709 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
710 }
711
712 bool isVISrcV2F16() const {
713 return isVISrcF16() || isVISrcB32();
714 }
715
716 bool isVISrc_64_bf16() const {
717 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
718 }
719
720 bool isVISrc_64_f16() const {
721 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
722 }
723
724 bool isVISrc_64_b32() const {
725 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
726 }
727
728 bool isVISrc_64B64() const {
729 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
730 }
731
732 bool isVISrc_64_f64() const {
733 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
734 }
735
736 bool isVISrc_64V2FP32() const {
737 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
738 }
739
740 bool isVISrc_64V2INT32() const {
741 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
742 }
743
744 bool isVISrc_256_b32() const {
745 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
746 }
747
748 bool isVISrc_256_f32() const {
749 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
750 }
751
752 bool isVISrc_256B64() const {
753 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
754 }
755
756 bool isVISrc_256_f64() const {
757 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
758 }
759
760 bool isVISrc_512_f64() const {
761 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
762 }
763
764 bool isVISrc_128B16() const {
765 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
766 }
767
768 bool isVISrc_128V2B16() const {
769 return isVISrc_128B16();
770 }
771
772 bool isVISrc_128_b32() const {
773 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
774 }
775
776 bool isVISrc_128_f32() const {
777 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
778 }
779
780 bool isVISrc_256V2FP32() const {
781 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
782 }
783
784 bool isVISrc_256V2INT32() const {
785 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
786 }
787
788 bool isVISrc_512_b32() const {
789 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
790 }
791
792 bool isVISrc_512B16() const {
793 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
794 }
795
796 bool isVISrc_512V2B16() const {
797 return isVISrc_512B16();
798 }
799
800 bool isVISrc_512_f32() const {
801 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
802 }
803
804 bool isVISrc_512F16() const {
805 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
806 }
807
808 bool isVISrc_512V2F16() const {
809 return isVISrc_512F16() || isVISrc_512_b32();
810 }
811
812 bool isVISrc_1024_b32() const {
813 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
814 }
815
816 bool isVISrc_1024B16() const {
817 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
818 }
819
820 bool isVISrc_1024V2B16() const {
821 return isVISrc_1024B16();
822 }
823
824 bool isVISrc_1024_f32() const {
825 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
826 }
827
828 bool isVISrc_1024F16() const {
829 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
830 }
831
832 bool isVISrc_1024V2F16() const {
833 return isVISrc_1024F16() || isVISrc_1024_b32();
834 }
835
836 bool isAISrcB32() const {
837 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
838 }
839
840 bool isAISrcB16() const {
841 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
842 }
843
844 bool isAISrcV2B16() const {
845 return isAISrcB16();
846 }
847
848 bool isAISrcF32() const {
849 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
850 }
851
852 bool isAISrcF16() const {
853 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
854 }
855
856 bool isAISrcV2F16() const {
857 return isAISrcF16() || isAISrcB32();
858 }
859
860 bool isAISrc_64B64() const {
861 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
862 }
863
864 bool isAISrc_64_f64() const {
865 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
866 }
867
868 bool isAISrc_128_b32() const {
869 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
870 }
871
872 bool isAISrc_128B16() const {
873 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
874 }
875
876 bool isAISrc_128V2B16() const {
877 return isAISrc_128B16();
878 }
879
880 bool isAISrc_128_f32() const {
881 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
882 }
883
884 bool isAISrc_128F16() const {
885 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
886 }
887
888 bool isAISrc_128V2F16() const {
889 return isAISrc_128F16() || isAISrc_128_b32();
890 }
891
892 bool isVISrc_128_bf16() const {
893 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
894 }
895
896 bool isVISrc_128_f16() const {
897 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
898 }
899
900 bool isVISrc_128V2F16() const {
901 return isVISrc_128_f16() || isVISrc_128_b32();
902 }
903
904 bool isAISrc_256B64() const {
905 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
906 }
907
908 bool isAISrc_256_f64() const {
909 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
910 }
911
912 bool isAISrc_512_b32() const {
913 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
914 }
915
916 bool isAISrc_512B16() const {
917 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
918 }
919
920 bool isAISrc_512V2B16() const {
921 return isAISrc_512B16();
922 }
923
924 bool isAISrc_512_f32() const {
925 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
926 }
927
928 bool isAISrc_512F16() const {
929 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
930 }
931
932 bool isAISrc_512V2F16() const {
933 return isAISrc_512F16() || isAISrc_512_b32();
934 }
935
936 bool isAISrc_1024_b32() const {
937 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
938 }
939
940 bool isAISrc_1024B16() const {
941 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
942 }
943
944 bool isAISrc_1024V2B16() const {
945 return isAISrc_1024B16();
946 }
947
948 bool isAISrc_1024_f32() const {
949 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
950 }
951
952 bool isAISrc_1024F16() const {
953 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
954 }
955
956 bool isAISrc_1024V2F16() const {
957 return isAISrc_1024F16() || isAISrc_1024_b32();
958 }
959
960 bool isKImmFP32() const {
961 return isLiteralImm(MVT::f32);
962 }
963
964 bool isKImmFP16() const {
965 return isLiteralImm(MVT::f16);
966 }
967
968 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
969
970 bool isMem() const override {
971 return false;
972 }
973
974 bool isExpr() const {
975 return Kind == Expression;
976 }
977
978 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
979
980 bool isSWaitCnt() const;
981 bool isDepCtr() const;
982 bool isSDelayALU() const;
983 bool isHwreg() const;
984 bool isSendMsg() const;
985 bool isWaitEvent() const;
986 bool isSplitBarrier() const;
987 bool isSwizzle() const;
988 bool isSMRDOffset8() const;
989 bool isSMEMOffset() const;
990 bool isSMRDLiteralOffset() const;
991 bool isDPP8() const;
992 bool isDPPCtrl() const;
993 bool isBLGP() const;
994 bool isGPRIdxMode() const;
995 bool isS16Imm() const;
996 bool isU16Imm() const;
997 bool isEndpgm() const;
998
999 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
1000 return [this, P]() { return P(*this); };
1001 }
1002
1003 StringRef getToken() const {
1004 assert(isToken());
1005 return StringRef(Tok.Data, Tok.Length);
1006 }
1007
1008 int64_t getImm() const {
1009 assert(isImm());
1010 return Imm.Val;
1011 }
1012
1013 void setImm(int64_t Val) {
1014 assert(isImm());
1015 Imm.Val = Val;
1016 }
1017
1018 ImmTy getImmTy() const {
1019 assert(isImm());
1020 return Imm.Type;
1021 }
1022
1023 MCRegister getReg() const override {
1024 assert(isRegKind());
1025 return Reg.RegNo;
1026 }
1027
1028 SMLoc getStartLoc() const override {
1029 return StartLoc;
1030 }
1031
1032 SMLoc getEndLoc() const override {
1033 return EndLoc;
1034 }
1035
1036 SMRange getLocRange() const {
1037 return SMRange(StartLoc, EndLoc);
1038 }
1039
1040 int getMCOpIdx() const { return MCOpIdx; }
1041
1042 Modifiers getModifiers() const {
1043 assert(isRegKind() || isImmTy(ImmTyNone));
1044 return isRegKind() ? Reg.Mods : Imm.Mods;
1045 }
1046
1047 void setModifiers(Modifiers Mods) {
1048 assert(isRegKind() || isImmTy(ImmTyNone));
1049 if (isRegKind())
1050 Reg.Mods = Mods;
1051 else
1052 Imm.Mods = Mods;
1053 }
1054
1055 bool hasModifiers() const {
1056 return getModifiers().hasModifiers();
1057 }
1058
1059 bool hasFPModifiers() const {
1060 return getModifiers().hasFPModifiers();
1061 }
1062
1063 bool hasIntModifiers() const {
1064 return getModifiers().hasIntModifiers();
1065 }
1066
1067 bool isForcedLit() const {
1068 return isImmLiteral() && getModifiers().isForcedLit();
1069 }
1070
1071 bool isForcedLit64() const {
1072 return isImmLiteral() && getModifiers().isForcedLit64();
1073 }
1074
1075 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1076
1077 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1078
1079 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1080
1081 void addRegOperands(MCInst &Inst, unsigned N) const;
1082
1083 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1084 if (isRegKind())
1085 addRegOperands(Inst, N);
1086 else
1087 addImmOperands(Inst, N);
1088 }
1089
1090 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1091 Modifiers Mods = getModifiers();
1092 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1093 if (isRegKind()) {
1094 addRegOperands(Inst, N);
1095 } else {
1096 addImmOperands(Inst, N, false);
1097 }
1098 }
1099
1100 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1101 assert(!hasIntModifiers());
1102 addRegOrImmWithInputModsOperands(Inst, N);
1103 }
1104
1105 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1106 assert(!hasFPModifiers());
1107 addRegOrImmWithInputModsOperands(Inst, N);
1108 }
1109
1110 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1111 Modifiers Mods = getModifiers();
1112 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1113 assert(isRegKind());
1114 addRegOperands(Inst, N);
1115 }
1116
1117 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1118 assert(!hasIntModifiers());
1119 addRegWithInputModsOperands(Inst, N);
1120 }
1121
1122 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1123 assert(!hasFPModifiers());
1124 addRegWithInputModsOperands(Inst, N);
1125 }
1126
1127 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1128 // clang-format off
1129 switch (Type) {
1130 case ImmTyNone: OS << "None"; break;
1131 case ImmTyGDS: OS << "GDS"; break;
1132 case ImmTyLDS: OS << "LDS"; break;
1133 case ImmTyOffen: OS << "Offen"; break;
1134 case ImmTyIdxen: OS << "Idxen"; break;
1135 case ImmTyAddr64: OS << "Addr64"; break;
1136 case ImmTyOffset: OS << "Offset"; break;
1137 case ImmTyInstOffset: OS << "InstOffset"; break;
1138 case ImmTyOffset0: OS << "Offset0"; break;
1139 case ImmTyOffset1: OS << "Offset1"; break;
1140 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1141 case ImmTyCPol: OS << "CPol"; break;
1142 case ImmTyIndexKey8bit: OS << "index_key"; break;
1143 case ImmTyIndexKey16bit: OS << "index_key"; break;
1144 case ImmTyIndexKey32bit: OS << "index_key"; break;
1145 case ImmTyTFE: OS << "TFE"; break;
1146 case ImmTyIsAsync: OS << "IsAsync"; break;
1147 case ImmTyD16: OS << "D16"; break;
1148 case ImmTyFORMAT: OS << "FORMAT"; break;
1149 case ImmTyClamp: OS << "Clamp"; break;
1150 case ImmTyOModSI: OS << "OModSI"; break;
1151 case ImmTyDPP8: OS << "DPP8"; break;
1152 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1153 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1154 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1155 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1156 case ImmTyDppFI: OS << "DppFI"; break;
1157 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1158 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1159 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1160 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1161 case ImmTyDMask: OS << "DMask"; break;
1162 case ImmTyDim: OS << "Dim"; break;
1163 case ImmTyUNorm: OS << "UNorm"; break;
1164 case ImmTyDA: OS << "DA"; break;
1165 case ImmTyR128A16: OS << "R128A16"; break;
1166 case ImmTyA16: OS << "A16"; break;
1167 case ImmTyLWE: OS << "LWE"; break;
1168 case ImmTyOff: OS << "Off"; break;
1169 case ImmTyExpTgt: OS << "ExpTgt"; break;
1170 case ImmTyExpCompr: OS << "ExpCompr"; break;
1171 case ImmTyExpVM: OS << "ExpVM"; break;
1172 case ImmTyDone: OS << "Done"; break;
1173 case ImmTyRowEn: OS << "RowEn"; break;
1174 case ImmTyHwreg: OS << "Hwreg"; break;
1175 case ImmTySendMsg: OS << "SendMsg"; break;
1176 case ImmTyWaitEvent: OS << "WaitEvent"; break;
1177 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1178 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1179 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1180 case ImmTyOpSel: OS << "OpSel"; break;
1181 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1182 case ImmTyNegLo: OS << "NegLo"; break;
1183 case ImmTyNegHi: OS << "NegHi"; break;
1184 case ImmTySwizzle: OS << "Swizzle"; break;
1185 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1186 case ImmTyHigh: OS << "High"; break;
1187 case ImmTyBLGP: OS << "BLGP"; break;
1188 case ImmTyCBSZ: OS << "CBSZ"; break;
1189 case ImmTyABID: OS << "ABID"; break;
1190 case ImmTyEndpgm: OS << "Endpgm"; break;
1191 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1192 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1193 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1194 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1195 case ImmTyBitOp3: OS << "BitOp3"; break;
1196 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1197 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1198 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1199 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1200 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1201 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1202 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1203 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1204 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1205 case ImmTyByteSel: OS << "ByteSel" ; break;
1206 }
1207 // clang-format on
1208 }
1209
1210 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1211 switch (Kind) {
1212 case Register:
1213 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1214 << " mods: " << Reg.Mods << '>';
1215 break;
1216 case Immediate:
1217 OS << '<' << getImm();
1218 if (getImmTy() != ImmTyNone) {
1219 OS << " type: "; printImmTy(OS, getImmTy());
1220 }
1221 OS << " mods: " << Imm.Mods << '>';
1222 break;
1223 case Token:
1224 OS << '\'' << getToken() << '\'';
1225 break;
1226 case Expression:
1227 OS << "<expr ";
1228 MAI.printExpr(OS, *Expr);
1229 OS << '>';
1230 break;
1231 }
1232 }
1233
1234 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1235 int64_t Val, SMLoc Loc,
1236 ImmTy Type = ImmTyNone,
1237 bool IsFPImm = false) {
1238 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1239 Op->Imm.Val = Val;
1240 Op->Imm.IsFPImm = IsFPImm;
1241 Op->Imm.Type = Type;
1242 Op->Imm.Mods = Modifiers();
1243 Op->StartLoc = Loc;
1244 Op->EndLoc = Loc;
1245 return Op;
1246 }
1247
1248 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1249 StringRef Str, SMLoc Loc,
1250 bool HasExplicitEncodingSize = true) {
1251 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1252 Res->Tok.Data = Str.data();
1253 Res->Tok.Length = Str.size();
1254 Res->StartLoc = Loc;
1255 Res->EndLoc = Loc;
1256 return Res;
1257 }
1258
1259 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1260 MCRegister Reg, SMLoc S, SMLoc E) {
1261 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1262 Op->Reg.RegNo = Reg;
1263 Op->Reg.Mods = Modifiers();
1264 Op->StartLoc = S;
1265 Op->EndLoc = E;
1266 return Op;
1267 }
1268
1269 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1270 const class MCExpr *Expr, SMLoc S) {
1271 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1272 Op->Expr = Expr;
1273 Op->StartLoc = S;
1274 Op->EndLoc = S;
1275 return Op;
1276 }
1277};
1278
1279raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1280 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1281 return OS;
1282}
1283
1284//===----------------------------------------------------------------------===//
1285// AsmParser
1286//===----------------------------------------------------------------------===//
1287
1288// TODO: define GET_SUBTARGET_FEATURE_NAME
1289#define GET_REGISTER_MATCHER
1290#include "AMDGPUGenAsmMatcher.inc"
1291#undef GET_REGISTER_MATCHER
1292#undef GET_SUBTARGET_FEATURE_NAME
1293
1294// Holds info related to the current kernel, e.g. count of SGPRs used.
1295// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1296// .amdgpu_hsa_kernel or at EOF.
1297class KernelScopeInfo {
1298 int SgprIndexUnusedMin = -1;
1299 int VgprIndexUnusedMin = -1;
1300 int AgprIndexUnusedMin = -1;
1301 MCContext *Ctx = nullptr;
1302 MCSubtargetInfo const *MSTI = nullptr;
1303
1304 void usesSgprAt(int i) {
1305 if (i >= SgprIndexUnusedMin) {
1306 SgprIndexUnusedMin = ++i;
1307 if (Ctx) {
1308 MCSymbol* const Sym =
1309 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1310 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1311 }
1312 }
1313 }
1314
1315 void usesVgprAt(int i) {
1316 if (i >= VgprIndexUnusedMin) {
1317 VgprIndexUnusedMin = ++i;
1318 if (Ctx) {
1319 MCSymbol* const Sym =
1320 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1321 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1322 VgprIndexUnusedMin);
1323 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1324 }
1325 }
1326 }
1327
1328 void usesAgprAt(int i) {
1329 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1330 if (!hasMAIInsts(*MSTI))
1331 return;
1332
1333 if (i >= AgprIndexUnusedMin) {
1334 AgprIndexUnusedMin = ++i;
1335 if (Ctx) {
1336 MCSymbol* const Sym =
1337 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1338 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1339
1340 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1341 MCSymbol* const vSym =
1342 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1343 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1344 VgprIndexUnusedMin);
1345 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1346 }
1347 }
1348 }
1349
1350public:
1351 KernelScopeInfo() = default;
1352
1353 void initialize(MCContext &Context) {
1354 Ctx = &Context;
1355 MSTI = Ctx->getSubtargetInfo();
1356
1357 usesSgprAt(SgprIndexUnusedMin = -1);
1358 usesVgprAt(VgprIndexUnusedMin = -1);
1359 if (hasMAIInsts(*MSTI)) {
1360 usesAgprAt(AgprIndexUnusedMin = -1);
1361 }
1362 }
1363
1364 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1365 unsigned RegWidth) {
1366 switch (RegKind) {
1367 case IS_SGPR:
1368 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1369 break;
1370 case IS_AGPR:
1371 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1372 break;
1373 case IS_VGPR:
1374 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1375 break;
1376 default:
1377 break;
1378 }
1379 }
1380};
1381
1382class AMDGPUAsmParser : public MCTargetAsmParser {
1383 MCAsmParser &Parser;
1384
1385 unsigned ForcedEncodingSize = 0;
1386 bool ForcedDPP = false;
1387 bool ForcedSDWA = false;
1388 KernelScopeInfo KernelScope;
1389 const unsigned HwMode;
1390
1391 /// @name Auto-generated Match Functions
1392 /// {
1393
1394#define GET_ASSEMBLER_HEADER
1395#include "AMDGPUGenAsmMatcher.inc"
1396
1397 /// }
1398
1399 /// Get size of register operand
1400 unsigned getRegOperandSize(const MCInstrDesc &Desc, unsigned OpNo) const {
1401 assert(OpNo < Desc.NumOperands);
1402 int16_t RCID = MII.getOpRegClassID(Desc.operands()[OpNo], HwMode);
1403 return getRegBitWidth(RCID) / 8;
1404 }
1405
1406 std::optional<AMDGPU::InfoSectionData> InfoData;
1407
1408private:
1409 void createConstantSymbol(StringRef Id, int64_t Val);
1410
1411 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1412 bool OutOfRangeError(SMRange Range);
1413 /// Calculate VGPR/SGPR blocks required for given target, reserved
1414 /// registers, and user-specified NextFreeXGPR values.
1415 ///
1416 /// \param Features [in] Target features, used for bug corrections.
1417 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1418 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1419 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1420 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1421 /// descriptor field, if valid.
1422 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1423 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1424 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1425 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1426 /// \param VGPRBlocks [out] Result VGPR block count.
1427 /// \param SGPRBlocks [out] Result SGPR block count.
1428 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1429 const MCExpr *FlatScrUsed, bool XNACKUsed,
1430 std::optional<bool> EnableWavefrontSize32,
1431 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1432 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1433 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1434 bool ParseDirectiveAMDGCNTarget();
1435 bool ParseDirectiveAMDHSACodeObjectVersion();
1436 bool ParseDirectiveAMDHSAKernel();
1437 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1438 bool ParseDirectiveAMDKernelCodeT();
1439 // TODO: Possibly make subtargetHasRegister const.
1440 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1441 bool ParseDirectiveAMDGPUHsaKernel();
1442
1443 bool ParseDirectiveISAVersion();
1444 bool ParseDirectiveHSAMetadata();
1445 bool ParseDirectivePALMetadataBegin();
1446 bool ParseDirectivePALMetadata();
1447 bool ParseDirectiveAMDGPULDS();
1448 bool ParseDirectiveAMDGPUInfo();
1449
1450 /// Common code to parse out a block of text (typically YAML) between start and
1451 /// end directives.
1452 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1453 const char *AssemblerDirectiveEnd,
1454 std::string &CollectString);
1455
1456 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1457 RegisterKind RegKind, MCRegister Reg1,
1458 RegisterKind RegKind1, SMLoc Loc);
1459 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1460 unsigned &RegNum, unsigned &RegWidth,
1461 bool RestoreOnFailure = false);
1462 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1463 unsigned &RegNum, unsigned &RegWidth,
1464 SmallVectorImpl<AsmToken> &Tokens);
1465 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1466 unsigned &RegWidth,
1467 SmallVectorImpl<AsmToken> &Tokens);
1468 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1469 unsigned &RegWidth,
1470 SmallVectorImpl<AsmToken> &Tokens);
1471 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1472 unsigned &RegWidth,
1473 SmallVectorImpl<AsmToken> &Tokens);
1474 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1475 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1476 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1477
1478 bool isRegister();
1479 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1480 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1481 void initializeGprCountSymbol(RegisterKind RegKind);
1482 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1483 unsigned RegWidth);
1484 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1485 bool IsAtomic);
1486
1487public:
1488 enum OperandMode {
1489 OperandMode_Default,
1490 OperandMode_NSA,
1491 };
1492
1493 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1494
1495 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1496 const MCInstrInfo &MII)
1497 : MCTargetAsmParser(STI, MII), Parser(_Parser),
1498 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1500
1501 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1502
1503 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1504 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1505 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1506 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1507 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1508 } else {
1509 createConstantSymbol(".option.machine_version_major", ISA.Major);
1510 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1511 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1512 }
1513 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1514 initializeGprCountSymbol(IS_VGPR);
1515 initializeGprCountSymbol(IS_SGPR);
1516 } else
1517 KernelScope.initialize(getContext());
1518
1519 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1520 createConstantSymbol(Symbol, Code);
1521
1522 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1523 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1524 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1525 }
1526
1527 bool hasMIMG_R128() const {
1528 return AMDGPU::hasMIMG_R128(getSTI());
1529 }
1530
1531 bool hasPackedD16() const {
1532 return AMDGPU::hasPackedD16(getSTI());
1533 }
1534
1535 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1536
1537 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1538
1539 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1540
1541 bool isSI() const {
1542 return AMDGPU::isSI(getSTI());
1543 }
1544
1545 bool isCI() const {
1546 return AMDGPU::isCI(getSTI());
1547 }
1548
1549 bool isVI() const {
1550 return AMDGPU::isVI(getSTI());
1551 }
1552
1553 bool isGFX9() const {
1554 return AMDGPU::isGFX9(getSTI());
1555 }
1556
1557 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1558 bool isGFX90A() const {
1559 return AMDGPU::isGFX90A(getSTI());
1560 }
1561
1562 bool isGFX940() const {
1563 return AMDGPU::isGFX940(getSTI());
1564 }
1565
1566 bool isGFX9Plus() const {
1567 return AMDGPU::isGFX9Plus(getSTI());
1568 }
1569
1570 bool isGFX10() const {
1571 return AMDGPU::isGFX10(getSTI());
1572 }
1573
1574 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1575
1576 bool isGFX11() const {
1577 return AMDGPU::isGFX11(getSTI());
1578 }
1579
1580 bool isGFX11Plus() const {
1581 return AMDGPU::isGFX11Plus(getSTI());
1582 }
1583
1584 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1585
1586 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1587
1588 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1589
1590 bool isGFX1250Plus() const { return AMDGPU::isGFX1250Plus(getSTI()); }
1591
1592 bool isGFX13() const { return AMDGPU::isGFX13(getSTI()); }
1593
1594 bool isGFX13Plus() const { return AMDGPU::isGFX13Plus(getSTI()); }
1595
1596 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1597
1598 bool isGFX10_BEncoding() const {
1599 return AMDGPU::isGFX10_BEncoding(getSTI());
1600 }
1601
1602 bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
1603
1604 bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
1605
1606 bool hasInv2PiInlineImm() const {
1607 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1608 }
1609
1610 bool has64BitLiterals() const {
1611 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1612 }
1613
1614 bool hasFlatOffsets() const {
1615 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1616 }
1617
1618 bool hasTrue16Insts() const {
1619 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1620 }
1621
1622 bool hasArchitectedFlatScratch() const {
1623 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1624 }
1625
1626 bool hasSGPR102_SGPR103() const {
1627 return !isVI() && !isGFX9();
1628 }
1629
1630 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1631
1632 bool hasIntClamp() const {
1633 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1634 }
1635
1636 bool hasPartialNSAEncoding() const {
1637 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1638 }
1639
1640 bool hasGloballyAddressableScratch() const {
1641 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1642 }
1643
1644 unsigned getNSAMaxSize(bool HasSampler = false) const {
1645 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1646 }
1647
1648 unsigned getMaxNumUserSGPRs() const {
1649 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1650 }
1651
1652 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1653
1654 AMDGPUTargetStreamer &getTargetStreamer() {
1655 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1656 return static_cast<AMDGPUTargetStreamer &>(TS);
1657 }
1658
1659 MCContext &getContext() const {
1660 // We need this const_cast because for some reason getContext() is not const
1661 // in MCAsmParser.
1662 return const_cast<AMDGPUAsmParser *>(this)->MCTargetAsmParser::getContext();
1663 }
1664
1665 const MCRegisterInfo *getMRI() const {
1666 return getContext().getRegisterInfo();
1667 }
1668
1669 const MCInstrInfo *getMII() const {
1670 return &MII;
1671 }
1672
1673 // FIXME: This should not be used. Instead, should use queries derived from
1674 // getAvailableFeatures().
1675 const FeatureBitset &getFeatureBits() const {
1676 return getSTI().getFeatureBits();
1677 }
1678
1679 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1680 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1681 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1682
1683 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1684 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1685 bool isForcedDPP() const { return ForcedDPP; }
1686 bool isForcedSDWA() const { return ForcedSDWA; }
1687 ArrayRef<unsigned> getMatchedVariants() const;
1688 StringRef getMatchedVariantName() const;
1689
1690 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1691 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1692 bool RestoreOnFailure);
1693 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1694 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1695 SMLoc &EndLoc) override;
1696 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1697 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1698 unsigned Kind) override;
1699 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1700 OperandVector &Operands, MCStreamer &Out,
1701 uint64_t &ErrorInfo,
1702 bool MatchingInlineAsm) override;
1703 bool ParseDirective(AsmToken DirectiveID) override;
1704 void onEndOfFile() override;
1705 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1706 OperandMode Mode = OperandMode_Default);
1707 StringRef parseMnemonicSuffix(StringRef Name);
1708 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1709 SMLoc NameLoc, OperandVector &Operands) override;
1710 //bool ProcessInstruction(MCInst &Inst);
1711
1712 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1713
1714 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1715
1716 ParseStatus
1717 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1718 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1719 std::function<bool(int64_t &)> ConvertResult = nullptr);
1720
1721 ParseStatus parseOperandArrayWithPrefix(
1722 const char *Prefix, OperandVector &Operands,
1723 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1724 bool (*ConvertResult)(int64_t &) = nullptr);
1725
1726 ParseStatus
1727 parseNamedBit(StringRef Name, OperandVector &Operands,
1728 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1729 bool IgnoreNegative = false);
1730 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1731 ParseStatus parseCPol(OperandVector &Operands);
1732 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1733 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1734 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1735 SMLoc &StringLoc);
1736 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1737 StringRef Name,
1738 ArrayRef<const char *> Ids,
1739 int64_t &IntVal);
1740 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1741 StringRef Name,
1742 ArrayRef<const char *> Ids,
1743 AMDGPUOperand::ImmTy Type);
1744
1745 bool isModifier();
1746 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1747 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1748 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1749 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1750 bool parseSP3NegModifier();
1751 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1752 LitModifier Lit = LitModifier::None);
1753 ParseStatus parseReg(OperandVector &Operands);
1754 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1755 LitModifier Lit = LitModifier::None);
1756 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1757 bool AllowImm = true);
1758 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1759 bool AllowImm = true);
1760 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1761 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1762 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1763 ParseStatus tryParseIndexKey(OperandVector &Operands,
1764 AMDGPUOperand::ImmTy ImmTy);
1765 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1766 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1767 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1768 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1769 AMDGPUOperand::ImmTy Type);
1770 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1771 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1772 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1773 AMDGPUOperand::ImmTy Type);
1774 ParseStatus parseMatrixAScale(OperandVector &Operands);
1775 ParseStatus parseMatrixBScale(OperandVector &Operands);
1776 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1777 AMDGPUOperand::ImmTy Type);
1778 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1779 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1780
1781 ParseStatus parseDfmtNfmt(int64_t &Format);
1782 ParseStatus parseUfmt(int64_t &Format);
1783 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1784 int64_t &Format);
1785 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1786 int64_t &Format);
1787 ParseStatus parseFORMAT(OperandVector &Operands);
1788 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1789 ParseStatus parseNumericFormat(int64_t &Format);
1790 ParseStatus parseFlatOffset(OperandVector &Operands);
1791 ParseStatus parseR128A16(OperandVector &Operands);
1792 ParseStatus parseBLGP(OperandVector &Operands);
1793 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1794 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1795
1796 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1797
1798 bool parseCnt(int64_t &IntVal);
1799 ParseStatus parseSWaitCnt(OperandVector &Operands);
1800
1801 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1802 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1803 ParseStatus parseDepCtr(OperandVector &Operands);
1804
1805 bool parseDelay(int64_t &Delay);
1806 ParseStatus parseSDelayALU(OperandVector &Operands);
1807
1808 ParseStatus parseHwreg(OperandVector &Operands);
1809
1810private:
1811 struct OperandInfoTy {
1812 SMLoc Loc;
1813 int64_t Val;
1814 bool IsSymbolic = false;
1815 bool IsDefined = false;
1816
1817 constexpr OperandInfoTy(int64_t Val) : Val(Val) {}
1818 };
1819
1820 struct StructuredOpField : OperandInfoTy {
1821 StringLiteral Id;
1822 StringLiteral Desc;
1823 unsigned Width;
1824 bool IsDefined = false;
1825
1826 constexpr StructuredOpField(StringLiteral Id, StringLiteral Desc,
1827 unsigned Width, int64_t Default)
1828 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1829 virtual ~StructuredOpField() = default;
1830
1831 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1832 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1833 return false;
1834 }
1835
1836 virtual bool validate(AMDGPUAsmParser &Parser) const {
1837 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1838 return Error(Parser, "not supported on this GPU");
1839 if (!isUIntN(Width, Val))
1840 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1841 return true;
1842 }
1843 };
1844
1845 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1846 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1847
1848 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1849 bool validateSendMsg(const OperandInfoTy &Msg,
1850 const OperandInfoTy &Op,
1851 const OperandInfoTy &Stream);
1852
1853 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1854 OperandInfoTy &Width);
1855
1856 const AMDGPUOperand &findMCOperand(const OperandVector &Operands,
1857 int MCOpIdx) const;
1858
1859 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1860
1861 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1862 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1863 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1864
1865 SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1866 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1867 const OperandVector &Operands) const;
1868 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1869 const OperandVector &Operands) const;
1870 SMLoc getInstLoc(const OperandVector &Operands) const;
1871
1872 bool validateInstruction(const MCInst &Inst, SMLoc IDLoc,
1873 const OperandVector &Operands);
1874 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1875 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1876 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1877 bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1878 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1879 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1880 bool AsVOPD3);
1881 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1882 bool tryVOPD(const MCInst &Inst);
1883 bool tryVOPD3(const MCInst &Inst);
1884 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1885
1886 bool validateIntClampSupported(const MCInst &Inst);
1887 bool validateMIMGAtomicDMask(const MCInst &Inst);
1888 bool validateMIMGGatherDMask(const MCInst &Inst);
1889 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1890 bool validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc);
1891 bool validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc);
1892 bool validateMIMGD16(const MCInst &Inst);
1893 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1894 bool validateTensorR128(const MCInst &Inst);
1895 bool validateMIMGMSAA(const MCInst &Inst);
1896 bool validateOpSel(const MCInst &Inst);
1897 bool validateTrue16OpSel(const MCInst &Inst);
1898 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1899 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1900 bool validateVccOperand(MCRegister Reg) const;
1901 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1902 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1903 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1904 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1905 bool validateAGPRLdSt(const MCInst &Inst) const;
1906 bool validateVGPRAlign(const MCInst &Inst) const;
1907 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1908 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1909 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1910 bool validateDivScale(const MCInst &Inst);
1911 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1912 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1913 SMLoc IDLoc);
1914 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1915 const unsigned CPol);
1916 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1917 bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1918 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1919 unsigned getConstantBusLimit(unsigned Opcode) const;
1920 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1921 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1922 MCRegister findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1923
1924 bool isSupportedMnemo(StringRef Mnemo,
1925 const FeatureBitset &FBS);
1926 bool isSupportedMnemo(StringRef Mnemo,
1927 const FeatureBitset &FBS,
1928 ArrayRef<unsigned> Variants);
1929 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1930
1931 bool isId(const StringRef Id) const;
1932 bool isId(const AsmToken &Token, const StringRef Id) const;
1933 bool isToken(const AsmToken::TokenKind Kind) const;
1934 StringRef getId() const;
1935 bool trySkipId(const StringRef Id);
1936 bool trySkipId(const StringRef Pref, const StringRef Id);
1937 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1938 bool trySkipToken(const AsmToken::TokenKind Kind);
1939 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1940 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1941 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1942
1943 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1944 AsmToken::TokenKind getTokenKind() const;
1945 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1946 bool parseExpr(OperandVector &Operands);
1947 StringRef getTokenStr() const;
1948 AsmToken peekToken(bool ShouldSkipSpace = true);
1949 AsmToken getToken() const;
1950 SMLoc getLoc() const;
1951 void lex();
1952
1953public:
1954 void onBeginOfFile() override;
1955 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1956
1957 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1958
1959 ParseStatus parseExpTgt(OperandVector &Operands);
1960 ParseStatus parseSendMsg(OperandVector &Operands);
1961 ParseStatus parseWaitEvent(OperandVector &Operands);
1962 ParseStatus parseInterpSlot(OperandVector &Operands);
1963 ParseStatus parseInterpAttr(OperandVector &Operands);
1964 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1965 ParseStatus parseBoolReg(OperandVector &Operands);
1966
1967 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1968 const unsigned MaxVal, const Twine &ErrMsg,
1969 SMLoc &Loc);
1970 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1971 const unsigned MinVal,
1972 const unsigned MaxVal,
1973 const StringRef ErrMsg);
1974 ParseStatus parseSwizzle(OperandVector &Operands);
1975 bool parseSwizzleOffset(int64_t &Imm);
1976 bool parseSwizzleMacro(int64_t &Imm);
1977 bool parseSwizzleQuadPerm(int64_t &Imm);
1978 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1979 bool parseSwizzleBroadcast(int64_t &Imm);
1980 bool parseSwizzleSwap(int64_t &Imm);
1981 bool parseSwizzleReverse(int64_t &Imm);
1982 bool parseSwizzleFFT(int64_t &Imm);
1983 bool parseSwizzleRotate(int64_t &Imm);
1984
1985 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1986 int64_t parseGPRIdxMacro();
1987
1988 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1989 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1990
1991 ParseStatus parseOModSI(OperandVector &Operands);
1992
1993 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1994 OptionalImmIndexMap &OptionalIdx);
1995 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1996 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1997 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1998 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1999 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
2000
2001 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
2002 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
2003 OptionalImmIndexMap &OptionalIdx);
2004 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
2005 OptionalImmIndexMap &OptionalIdx);
2006
2007 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
2008 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
2009 void cvtOpSelHelper(MCInst &Inst, unsigned OpSel);
2010
2011 bool parseDimId(unsigned &Encoding);
2012 ParseStatus parseDim(OperandVector &Operands);
2013 bool convertDppBoundCtrl(int64_t &BoundCtrl);
2014 ParseStatus parseDPP8(OperandVector &Operands);
2015 ParseStatus parseDPPCtrl(OperandVector &Operands);
2016 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
2017 int64_t parseDPPCtrlSel(StringRef Ctrl);
2018 int64_t parseDPPCtrlPerm();
2019 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
2020 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
2021 cvtDPP(Inst, Operands, true);
2022 }
2023 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
2024 bool IsDPP8 = false);
2025 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
2026 cvtVOP3DPP(Inst, Operands, true);
2027 }
2028
2029 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
2030 AMDGPUOperand::ImmTy Type);
2031 ParseStatus parseSDWADstUnused(OperandVector &Operands);
2032 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
2033 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
2034 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
2035 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
2036 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
2037
2038 enum class SDWAInstType : unsigned { VOP1 = 0, VOP2 = 1, VOPC = 2 };
2039
2040 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
2041 SDWAInstType BasicInstType, bool SkipDstVcc = false,
2042 bool SkipSrcVcc = false);
2043
2044 ParseStatus parseEndpgm(OperandVector &Operands);
2045
2046 ParseStatus parseVOPD(OperandVector &Operands);
2047};
2048
2049} // end anonymous namespace
2050
2051// May be called with integer type with equivalent bitwidth.
2052static const fltSemantics *getFltSemantics(unsigned Size) {
2053 switch (Size) {
2054 case 4:
2055 return &APFloat::IEEEsingle();
2056 case 8:
2057 return &APFloat::IEEEdouble();
2058 case 2:
2059 return &APFloat::IEEEhalf();
2060 default:
2061 llvm_unreachable("unsupported fp type");
2062 }
2063}
2064
2066 return getFltSemantics(VT.getScalarSizeInBits() / 8);
2067}
2068
2070 switch (OperandType) {
2071 // When floating-point immediate is used as operand of type i16, the 32-bit
2072 // representation of the constant truncated to the 16 LSBs should be used.
2087 return &APFloat::IEEEsingle();
2096 return &APFloat::IEEEdouble();
2104 return &APFloat::IEEEhalf();
2109 return &APFloat::BFloat();
2110 default:
2111 llvm_unreachable("unsupported fp type");
2112 }
2113}
2114
2115//===----------------------------------------------------------------------===//
2116// Operand
2117//===----------------------------------------------------------------------===//
2118
2119static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2120 bool Lost;
2121
2122 // Convert literal to single precision
2125 &Lost);
2126 // We allow precision lost but not overflow or underflow
2127 if (Status != APFloat::opOK &&
2128 Lost &&
2129 ((Status & APFloat::opOverflow) != 0 ||
2130 (Status & APFloat::opUnderflow) != 0)) {
2131 return false;
2132 }
2133
2134 return true;
2135}
2136
2137static bool isSafeTruncation(int64_t Val, unsigned Size) {
2138 return isUIntN(Size, Val) || isIntN(Size, Val);
2139}
2140
2141static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2142 if (VT.getScalarType() == MVT::i16)
2143 return isInlinableLiteral32(Val, HasInv2Pi);
2144
2145 if (VT.getScalarType() == MVT::f16)
2146 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2147
2148 assert(VT.getScalarType() == MVT::bf16);
2149
2150 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2151}
2152
2153bool AMDGPUOperand::isInlinableImm(MVT type) const {
2154
2155 // This is a hack to enable named inline values like
2156 // shared_base with both 32-bit and 64-bit operands.
2157 // Note that these values are defined as
2158 // 32-bit operands only.
2159 if (isInlineValue()) {
2160 return true;
2161 }
2162
2163 if (!isImmTy(ImmTyNone)) {
2164 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2165 return false;
2166 }
2167
2168 if (getModifiers().Lit != LitModifier::None)
2169 return false;
2170
2171 // TODO: We should avoid using host float here. It would be better to
2172 // check the float bit values which is what a few other places do.
2173 // We've had bot failures before due to weird NaN support on mips hosts.
2174
2175 APInt Literal(64, Imm.Val);
2176
2177 if (Imm.IsFPImm) { // We got fp literal token
2178 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2180 AsmParser->hasInv2PiInlineImm());
2181 }
2182
2183 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2184 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2185 return false;
2186
2187 if (type.getScalarSizeInBits() == 16) {
2188 bool Lost = false;
2189 switch (type.getScalarType().SimpleTy) {
2190 default:
2191 llvm_unreachable("unknown 16-bit type");
2192 case MVT::bf16:
2193 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2194 &Lost);
2195 break;
2196 case MVT::f16:
2197 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2198 &Lost);
2199 break;
2200 case MVT::i16:
2201 FPLiteral.convert(APFloatBase::IEEEsingle(),
2202 APFloat::rmNearestTiesToEven, &Lost);
2203 break;
2204 }
2205 // We need to use 32-bit representation here because when a floating-point
2206 // inline constant is used as an i16 operand, its 32-bit representation
2207 // representation will be used. We will need the 32-bit value to check if
2208 // it is FP inline constant.
2209 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2210 return isInlineableLiteralOp16(ImmVal, type,
2211 AsmParser->hasInv2PiInlineImm());
2212 }
2213
2214 // Check if single precision literal is inlinable
2216 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2217 AsmParser->hasInv2PiInlineImm());
2218 }
2219
2220 // We got int literal token.
2221 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2223 AsmParser->hasInv2PiInlineImm());
2224 }
2225
2226 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2227 return false;
2228 }
2229
2230 if (type.getScalarSizeInBits() == 16) {
2232 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2233 type, AsmParser->hasInv2PiInlineImm());
2234 }
2235
2237 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2238 AsmParser->hasInv2PiInlineImm());
2239}
2240
2241bool AMDGPUOperand::isLiteralImm(MVT type) const {
2242 // Check that this immediate can be added as literal
2243 if (!isImmTy(ImmTyNone)) {
2244 return false;
2245 }
2246
2247 bool Allow64Bit =
2248 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2249
2250 if (!Imm.IsFPImm) {
2251 // We got int literal token.
2252
2253 if (type == MVT::f64 && hasFPModifiers()) {
2254 // Cannot apply fp modifiers to int literals preserving the same semantics
2255 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2256 // disable these cases.
2257 return false;
2258 }
2259
2260 unsigned Size = type.getSizeInBits();
2261 if (Size == 64) {
2262 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2263 return true;
2264 Size = 32;
2265 }
2266
2267 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2268 // types.
2269 return isSafeTruncation(Imm.Val, Size);
2270 }
2271
2272 // We got fp literal token
2273 if (type == MVT::f64) { // Expected 64-bit fp operand
2274 // We would set low 64-bits of literal to zeroes but we accept this literals
2275 return true;
2276 }
2277
2278 if (type == MVT::i64) { // Expected 64-bit int operand
2279 // We don't allow fp literals in 64-bit integer instructions. It is
2280 // unclear how we should encode them.
2281 return false;
2282 }
2283
2284 // We allow fp literals with f16x2 operands assuming that the specified
2285 // literal goes into the lower half and the upper half is zero. We also
2286 // require that the literal may be losslessly converted to f16.
2287 //
2288 // For i16x2 operands, we assume that the specified literal is encoded as a
2289 // single-precision float. This is pretty odd, but it matches SP3 and what
2290 // happens in hardware.
2291 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2292 : (type == MVT::v2i16) ? MVT::f32
2293 : (type == MVT::v2f32) ? MVT::f32
2294 : type;
2295
2296 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2297 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2298}
2299
2300bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2301 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2302}
2303
2304bool AMDGPUOperand::isVRegWithInputMods() const {
2305 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2306 // GFX90A allows DPP on 64-bit operands.
2307 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2308 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2309}
2310
2311template <bool IsFake16>
2312bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2313 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2314 : AMDGPU::VGPR_16_Lo128RegClassID);
2315}
2316
2317template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2318 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2319 : AMDGPU::VGPR_16RegClassID);
2320}
2321
2322bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2323 if (AsmParser->isVI())
2324 return isVReg32();
2325 if (AsmParser->isGFX9Plus())
2326 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2327 return false;
2328}
2329
2330bool AMDGPUOperand::isSDWAFP16Operand() const {
2331 return isSDWAOperand(MVT::f16);
2332}
2333
2334bool AMDGPUOperand::isSDWAFP32Operand() const {
2335 return isSDWAOperand(MVT::f32);
2336}
2337
2338bool AMDGPUOperand::isSDWAInt16Operand() const {
2339 return isSDWAOperand(MVT::i16);
2340}
2341
2342bool AMDGPUOperand::isSDWAInt32Operand() const {
2343 return isSDWAOperand(MVT::i32);
2344}
2345
2346bool AMDGPUOperand::isBoolReg() const {
2347 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2348 (AsmParser->isWave32() && isSCSrc_b32()));
2349}
2350
2351uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2352{
2353 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2354 assert(Size == 2 || Size == 4 || Size == 8);
2355
2356 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2357
2358 if (Imm.Mods.Abs) {
2359 Val &= ~FpSignMask;
2360 }
2361 if (Imm.Mods.Neg) {
2362 Val ^= FpSignMask;
2363 }
2364
2365 return Val;
2366}
2367
2368void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2369 MCOpIdx = Inst.getNumOperands();
2370
2371 if (isExpr()) {
2373 return;
2374 }
2375
2376 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2377 Inst.getNumOperands())) {
2378 addLiteralImmOperand(Inst, Imm.Val,
2379 ApplyModifiers &
2380 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2381 } else {
2382 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2384 }
2385}
2386
2387void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2388 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2389 auto OpNum = Inst.getNumOperands();
2390 // Check that this operand accepts literals
2391 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2392
2393 if (ApplyModifiers) {
2394 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2395 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2396 Val = applyInputFPModifiers(Val, Size);
2397 }
2398
2399 APInt Literal(64, Val);
2400 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2401
2402 bool CanUse64BitLiterals =
2403 AsmParser->has64BitLiterals() &&
2404 !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
2405 LitModifier Lit = getModifiers().Lit;
2406 MCContext &Ctx = AsmParser->getContext();
2407
2408 if (Imm.IsFPImm) { // We got fp literal token
2409 switch (OpTy) {
2417 if (Lit == LitModifier::None &&
2419 AsmParser->hasInv2PiInlineImm())) {
2420 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2421 return;
2422 }
2423
2424 // Non-inlineable
2425 if (AMDGPU::isSISrcFPOperand(InstDesc,
2426 OpNum)) { // Expected 64-bit fp operand
2427 bool HasMandatoryLiteral =
2428 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2429 // For fp operands we check if low 32 bits are zeros
2430 if (Literal.getLoBits(32) != 0 &&
2431 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2432 !HasMandatoryLiteral) {
2433 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2434 Inst.getLoc(),
2435 "Can't encode literal as exact 64-bit floating-point operand. "
2436 "Low 32-bits will be set to zero");
2437 Val &= 0xffffffff00000000u;
2438 }
2439
2440 if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 ||
2443 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2444 (isInt<32>(Val) || isUInt<32>(Val))) {
2445 // The floating-point operand will be verbalized as an
2446 // integer one. If that integer happens to fit 32 bits, on
2447 // re-assembling it will be intepreted as the high half of
2448 // the actual value, so we have to wrap it into lit64().
2449 Lit = LitModifier::Lit64;
2450 } else if (Lit == LitModifier::Lit) {
2451 // For FP64 operands lit() specifies the high half of the value.
2452 Val = Hi_32(Val);
2453 }
2454 }
2455 break;
2456 }
2457
2458 // We don't allow fp literals in 64-bit integer instructions. It is
2459 // unclear how we should encode them. This case should be checked earlier
2460 // in predicate methods (isLiteralImm())
2461 llvm_unreachable("fp literal in 64-bit integer instruction.");
2462
2464 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2465 (isInt<32>(Val) || isUInt<32>(Val)))
2466 Lit = LitModifier::Lit64;
2467 break;
2468
2473 if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2474 Literal == 0x3fc45f306725feed) {
2475 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2476 // loss of precision. The constant represents ideomatic fp32 value of
2477 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2478 // bits. Prevent rounding below.
2479 Inst.addOperand(MCOperand::createImm(0x3e22));
2480 return;
2481 }
2482 [[fallthrough]];
2483
2505 bool lost;
2506 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2507 // Convert literal to single precision
2508 FPLiteral.convert(*getOpFltSemantics(OpTy),
2509 APFloat::rmNearestTiesToEven, &lost);
2510 // We allow precision lost but not overflow or underflow. This should be
2511 // checked earlier in isLiteralImm()
2512
2513 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2514 break;
2515 }
2516 default:
2517 llvm_unreachable("invalid operand size");
2518 }
2519
2520 if (Lit != LitModifier::None) {
2521 Inst.addOperand(
2523 } else {
2525 }
2526 return;
2527 }
2528
2529 // We got int literal token.
2530 // Only sign extend inline immediates.
2531 switch (OpTy) {
2546 break;
2547
2551 if (Lit == LitModifier::None &&
2552 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2554 return;
2555 }
2556
2557 // When the 32 MSBs are not zero (effectively means it can't be safely
2558 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2559 // the lit modifier is explicitly used, we need to truncate it to the 32
2560 // LSBs.
2561 if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit)
2562 Val = Lo_32(Val);
2563 break;
2564
2569 if (Lit == LitModifier::None &&
2570 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2572 return;
2573 }
2574
2575 // If the target doesn't support 64-bit literals, we need to use the
2576 // constant as the high 32 MSBs of a double-precision floating point value.
2577 if (!AsmParser->has64BitLiterals()) {
2578 Val = static_cast<uint64_t>(Val) << 32;
2579 } else {
2580 // Now the target does support 64-bit literals, there are two cases
2581 // where we still want to use src_literal encoding:
2582 // 1) explicitly forced by using lit modifier;
2583 // 2) the value is a valid 32-bit representation (signed or unsigned),
2584 // meanwhile not forced by lit64 modifier.
2585 if (Lit == LitModifier::Lit ||
2586 (Lit != LitModifier::Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2587 Val = static_cast<uint64_t>(Val) << 32;
2588 }
2589
2590 // For FP64 operands lit() specifies the high half of the value.
2591 if (Lit == LitModifier::Lit)
2592 Val = Hi_32(Val);
2593 break;
2594
2606 break;
2607
2609 if ((isInt<32>(Val) || isUInt<32>(Val)) && Lit != LitModifier::Lit64)
2610 Val <<= 32;
2611 break;
2612
2613 default:
2614 llvm_unreachable("invalid operand type");
2615 }
2616
2617 if (Lit != LitModifier::None) {
2618 Inst.addOperand(
2620 } else {
2622 }
2623}
2624
2625void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2626 MCOpIdx = Inst.getNumOperands();
2627 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2628}
2629
2630bool AMDGPUOperand::isInlineValue() const {
2631 return isRegKind() && ::isInlineValue(getReg());
2632}
2633
2634//===----------------------------------------------------------------------===//
2635// AsmParser
2636//===----------------------------------------------------------------------===//
2637
2638void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2639 // TODO: make those pre-defined variables read-only.
2640 // Currently there is none suitable machinery in the core llvm-mc for this.
2641 // MCSymbol::isRedefinable is intended for another purpose, and
2642 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2643 MCContext &Ctx = getContext();
2644 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2646}
2647
2648static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2649 if (Is == IS_VGPR) {
2650 switch (RegWidth) {
2651 default: return -1;
2652 case 32:
2653 return AMDGPU::VGPR_32RegClassID;
2654 case 64:
2655 return AMDGPU::VReg_64RegClassID;
2656 case 96:
2657 return AMDGPU::VReg_96RegClassID;
2658 case 128:
2659 return AMDGPU::VReg_128RegClassID;
2660 case 160:
2661 return AMDGPU::VReg_160RegClassID;
2662 case 192:
2663 return AMDGPU::VReg_192RegClassID;
2664 case 224:
2665 return AMDGPU::VReg_224RegClassID;
2666 case 256:
2667 return AMDGPU::VReg_256RegClassID;
2668 case 288:
2669 return AMDGPU::VReg_288RegClassID;
2670 case 320:
2671 return AMDGPU::VReg_320RegClassID;
2672 case 352:
2673 return AMDGPU::VReg_352RegClassID;
2674 case 384:
2675 return AMDGPU::VReg_384RegClassID;
2676 case 512:
2677 return AMDGPU::VReg_512RegClassID;
2678 case 1024:
2679 return AMDGPU::VReg_1024RegClassID;
2680 }
2681 } else if (Is == IS_TTMP) {
2682 switch (RegWidth) {
2683 default: return -1;
2684 case 32:
2685 return AMDGPU::TTMP_32RegClassID;
2686 case 64:
2687 return AMDGPU::TTMP_64RegClassID;
2688 case 128:
2689 return AMDGPU::TTMP_128RegClassID;
2690 case 256:
2691 return AMDGPU::TTMP_256RegClassID;
2692 case 512:
2693 return AMDGPU::TTMP_512RegClassID;
2694 }
2695 } else if (Is == IS_SGPR) {
2696 switch (RegWidth) {
2697 default: return -1;
2698 case 32:
2699 return AMDGPU::SGPR_32RegClassID;
2700 case 64:
2701 return AMDGPU::SGPR_64RegClassID;
2702 case 96:
2703 return AMDGPU::SGPR_96RegClassID;
2704 case 128:
2705 return AMDGPU::SGPR_128RegClassID;
2706 case 160:
2707 return AMDGPU::SGPR_160RegClassID;
2708 case 192:
2709 return AMDGPU::SGPR_192RegClassID;
2710 case 224:
2711 return AMDGPU::SGPR_224RegClassID;
2712 case 256:
2713 return AMDGPU::SGPR_256RegClassID;
2714 case 288:
2715 return AMDGPU::SGPR_288RegClassID;
2716 case 320:
2717 return AMDGPU::SGPR_320RegClassID;
2718 case 352:
2719 return AMDGPU::SGPR_352RegClassID;
2720 case 384:
2721 return AMDGPU::SGPR_384RegClassID;
2722 case 512:
2723 return AMDGPU::SGPR_512RegClassID;
2724 }
2725 } else if (Is == IS_AGPR) {
2726 switch (RegWidth) {
2727 default: return -1;
2728 case 32:
2729 return AMDGPU::AGPR_32RegClassID;
2730 case 64:
2731 return AMDGPU::AReg_64RegClassID;
2732 case 96:
2733 return AMDGPU::AReg_96RegClassID;
2734 case 128:
2735 return AMDGPU::AReg_128RegClassID;
2736 case 160:
2737 return AMDGPU::AReg_160RegClassID;
2738 case 192:
2739 return AMDGPU::AReg_192RegClassID;
2740 case 224:
2741 return AMDGPU::AReg_224RegClassID;
2742 case 256:
2743 return AMDGPU::AReg_256RegClassID;
2744 case 288:
2745 return AMDGPU::AReg_288RegClassID;
2746 case 320:
2747 return AMDGPU::AReg_320RegClassID;
2748 case 352:
2749 return AMDGPU::AReg_352RegClassID;
2750 case 384:
2751 return AMDGPU::AReg_384RegClassID;
2752 case 512:
2753 return AMDGPU::AReg_512RegClassID;
2754 case 1024:
2755 return AMDGPU::AReg_1024RegClassID;
2756 }
2757 }
2758 return -1;
2759}
2760
2763 .Case("exec", AMDGPU::EXEC)
2764 .Case("vcc", AMDGPU::VCC)
2765 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2766 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2767 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2768 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2769 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2770 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2771 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2772 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2773 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2774 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2775 .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2776 .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2777 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2778 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2779 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2780 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2781 .Case("m0", AMDGPU::M0)
2782 .Case("vccz", AMDGPU::SRC_VCCZ)
2783 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2784 .Case("execz", AMDGPU::SRC_EXECZ)
2785 .Case("src_execz", AMDGPU::SRC_EXECZ)
2786 .Case("scc", AMDGPU::SRC_SCC)
2787 .Case("src_scc", AMDGPU::SRC_SCC)
2788 .Case("tba", AMDGPU::TBA)
2789 .Case("tma", AMDGPU::TMA)
2790 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2791 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2792 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2793 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2794 .Case("vcc_lo", AMDGPU::VCC_LO)
2795 .Case("vcc_hi", AMDGPU::VCC_HI)
2796 .Case("exec_lo", AMDGPU::EXEC_LO)
2797 .Case("exec_hi", AMDGPU::EXEC_HI)
2798 .Case("tma_lo", AMDGPU::TMA_LO)
2799 .Case("tma_hi", AMDGPU::TMA_HI)
2800 .Case("tba_lo", AMDGPU::TBA_LO)
2801 .Case("tba_hi", AMDGPU::TBA_HI)
2802 .Case("pc", AMDGPU::PC_REG)
2803 .Case("null", AMDGPU::SGPR_NULL)
2804 .Default(AMDGPU::NoRegister);
2805}
2806
2807bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2808 SMLoc &EndLoc, bool RestoreOnFailure) {
2809 auto R = parseRegister();
2810 if (!R) return true;
2811 assert(R->isReg());
2812 RegNo = R->getReg();
2813 StartLoc = R->getStartLoc();
2814 EndLoc = R->getEndLoc();
2815 return false;
2816}
2817
2818bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2819 SMLoc &EndLoc) {
2820 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2821}
2822
2823ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2824 SMLoc &EndLoc) {
2825 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2826 bool PendingErrors = getParser().hasPendingError();
2827 getParser().clearPendingErrors();
2828 if (PendingErrors)
2829 return ParseStatus::Failure;
2830 if (Result)
2831 return ParseStatus::NoMatch;
2832 return ParseStatus::Success;
2833}
2834
2835bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2836 RegisterKind RegKind,
2837 MCRegister Reg1,
2838 RegisterKind RegKind1, SMLoc Loc) {
2839 // Allow VCC_LO/HI at the end of SGPR lists.
2840 if (RegKind == IS_SGPR) {
2841 unsigned RegIdx = (Reg - AMDGPU::SGPR0) + RegWidth / 32;
2842 if ((RegIdx == 106 && Reg1 == AMDGPU::VCC_LO) ||
2843 (RegIdx == 107 && Reg1 == AMDGPU::VCC_HI)) {
2844 RegWidth += 32;
2845 return true;
2846 }
2847 }
2848
2849 if (RegKind != RegKind1) {
2850 Error(Loc, "registers in a list must be of the same kind");
2851 return MCRegister();
2852 }
2853
2854 switch (RegKind) {
2855 case IS_SPECIAL:
2856 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2857 Reg = AMDGPU::EXEC;
2858 RegWidth = 64;
2859 return true;
2860 }
2861 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2862 Reg = AMDGPU::FLAT_SCR;
2863 RegWidth = 64;
2864 return true;
2865 }
2866 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2867 Reg = AMDGPU::XNACK_MASK;
2868 RegWidth = 64;
2869 return true;
2870 }
2871 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2872 Reg = AMDGPU::VCC;
2873 RegWidth = 64;
2874 return true;
2875 }
2876 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2877 Reg = AMDGPU::TBA;
2878 RegWidth = 64;
2879 return true;
2880 }
2881 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2882 Reg = AMDGPU::TMA;
2883 RegWidth = 64;
2884 return true;
2885 }
2886 Error(Loc, "register does not fit in the list");
2887 return false;
2888 case IS_VGPR:
2889 case IS_SGPR:
2890 case IS_AGPR:
2891 case IS_TTMP:
2892 if (Reg1 != Reg + RegWidth / 32) {
2893 Error(Loc, "registers in a list must have consecutive indices");
2894 return false;
2895 }
2896 RegWidth += 32;
2897 return true;
2898 default:
2899 llvm_unreachable("unexpected register kind");
2900 }
2901}
2902
2903struct RegInfo {
2905 RegisterKind Kind;
2906};
2907
2908static constexpr RegInfo RegularRegisters[] = {
2909 {{"v"}, IS_VGPR},
2910 {{"s"}, IS_SGPR},
2911 {{"ttmp"}, IS_TTMP},
2912 {{"acc"}, IS_AGPR},
2913 {{"a"}, IS_AGPR},
2914};
2915
2916static bool isRegularReg(RegisterKind Kind) {
2917 return Kind == IS_VGPR ||
2918 Kind == IS_SGPR ||
2919 Kind == IS_TTMP ||
2920 Kind == IS_AGPR;
2921}
2922
2924 for (const RegInfo &Reg : RegularRegisters)
2925 if (Str.starts_with(Reg.Name))
2926 return &Reg;
2927 return nullptr;
2928}
2929
2930static bool getRegNum(StringRef Str, unsigned& Num) {
2931 return !Str.getAsInteger(10, Num);
2932}
2933
2934bool
2935AMDGPUAsmParser::isRegister(const AsmToken &Token,
2936 const AsmToken &NextToken) const {
2937
2938 // A list of consecutive registers: [s0,s1,s2,s3]
2939 if (Token.is(AsmToken::LBrac))
2940 return true;
2941
2942 if (!Token.is(AsmToken::Identifier))
2943 return false;
2944
2945 // A single register like s0 or a range of registers like s[0:1]
2946
2947 StringRef Str = Token.getString();
2948 const RegInfo *Reg = getRegularRegInfo(Str);
2949 if (Reg) {
2950 StringRef RegName = Reg->Name;
2951 StringRef RegSuffix = Str.substr(RegName.size());
2952 if (!RegSuffix.empty()) {
2953 RegSuffix.consume_back(".l");
2954 RegSuffix.consume_back(".h");
2955 unsigned Num;
2956 // A single register with an index: rXX
2957 if (getRegNum(RegSuffix, Num))
2958 return true;
2959 } else {
2960 // A range of registers: r[XX:YY].
2961 if (NextToken.is(AsmToken::LBrac))
2962 return true;
2963 }
2964 }
2965
2966 return getSpecialRegForName(Str).isValid();
2967}
2968
2969bool
2970AMDGPUAsmParser::isRegister()
2971{
2972 return isRegister(getToken(), peekToken());
2973}
2974
2975MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2976 unsigned SubReg, unsigned RegWidth,
2977 SMLoc Loc) {
2978 assert(isRegularReg(RegKind));
2979
2980 unsigned AlignSize = 1;
2981 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2982 // SGPR and TTMP registers must be aligned.
2983 // Max required alignment is 4 dwords.
2984 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2985 }
2986
2987 if (RegNum % AlignSize != 0) {
2988 Error(Loc, "invalid register alignment");
2989 return MCRegister();
2990 }
2991
2992 unsigned RegIdx = RegNum / AlignSize;
2993 int RCID = getRegClass(RegKind, RegWidth);
2994 if (RCID == -1) {
2995 Error(Loc, "invalid or unsupported register size");
2996 return MCRegister();
2997 }
2998
2999 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3000 const MCRegisterClass RC = TRI->getRegClass(RCID);
3001 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
3002 Error(Loc, "register index is out of range");
3003 return AMDGPU::NoRegister;
3004 }
3005
3006 if (RegKind == IS_VGPR && !isGFX1250Plus() && RegIdx + RegWidth / 32 > 256) {
3007 Error(Loc, "register index is out of range");
3008 return MCRegister();
3009 }
3010
3011 MCRegister Reg = RC.getRegister(RegIdx);
3012
3013 if (SubReg) {
3014 Reg = TRI->getSubReg(Reg, SubReg);
3015
3016 // Currently all regular registers have their .l and .h subregisters, so
3017 // we should never need to generate an error here.
3018 assert(Reg && "Invalid subregister!");
3019 }
3020
3021 return Reg;
3022}
3023
3024bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
3025 unsigned &SubReg) {
3026 int64_t RegLo, RegHi;
3027 if (!skipToken(AsmToken::LBrac, "missing register index"))
3028 return false;
3029
3030 SMLoc FirstIdxLoc = getLoc();
3031 SMLoc SecondIdxLoc;
3032
3033 if (!parseExpr(RegLo))
3034 return false;
3035
3036 if (trySkipToken(AsmToken::Colon)) {
3037 SecondIdxLoc = getLoc();
3038 if (!parseExpr(RegHi))
3039 return false;
3040 } else {
3041 RegHi = RegLo;
3042 }
3043
3044 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
3045 return false;
3046
3047 if (!isUInt<32>(RegLo)) {
3048 Error(FirstIdxLoc, "invalid register index");
3049 return false;
3050 }
3051
3052 if (!isUInt<32>(RegHi)) {
3053 Error(SecondIdxLoc, "invalid register index");
3054 return false;
3055 }
3056
3057 if (RegLo > RegHi) {
3058 Error(FirstIdxLoc, "first register index should not exceed second index");
3059 return false;
3060 }
3061
3062 if (RegHi == RegLo) {
3063 StringRef RegSuffix = getTokenStr();
3064 if (RegSuffix == ".l") {
3065 SubReg = AMDGPU::lo16;
3066 lex();
3067 } else if (RegSuffix == ".h") {
3068 SubReg = AMDGPU::hi16;
3069 lex();
3070 }
3071 }
3072
3073 Num = static_cast<unsigned>(RegLo);
3074 RegWidth = 32 * ((RegHi - RegLo) + 1);
3075
3076 return true;
3077}
3078
3079MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3080 unsigned &RegNum,
3081 unsigned &RegWidth,
3082 SmallVectorImpl<AsmToken> &Tokens) {
3083 assert(isToken(AsmToken::Identifier));
3084 MCRegister Reg = getSpecialRegForName(getTokenStr());
3085 if (Reg) {
3086 RegNum = 0;
3087 RegWidth = 32;
3088 RegKind = IS_SPECIAL;
3089 Tokens.push_back(getToken());
3090 lex(); // skip register name
3091 }
3092 return Reg;
3093}
3094
3095MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3096 unsigned &RegNum,
3097 unsigned &RegWidth,
3098 SmallVectorImpl<AsmToken> &Tokens) {
3099 assert(isToken(AsmToken::Identifier));
3100 StringRef RegName = getTokenStr();
3101 auto Loc = getLoc();
3102
3103 const RegInfo *RI = getRegularRegInfo(RegName);
3104 if (!RI) {
3105 Error(Loc, "invalid register name");
3106 return MCRegister();
3107 }
3108
3109 Tokens.push_back(getToken());
3110 lex(); // skip register name
3111
3112 RegKind = RI->Kind;
3113 StringRef RegSuffix = RegName.substr(RI->Name.size());
3114 unsigned SubReg = NoSubRegister;
3115 bool IsRange = false;
3116 if (!RegSuffix.empty()) {
3117 if (RegSuffix.consume_back(".l"))
3118 SubReg = AMDGPU::lo16;
3119 else if (RegSuffix.consume_back(".h"))
3120 SubReg = AMDGPU::hi16;
3121
3122 // Single 32-bit register: vXX.
3123 if (!getRegNum(RegSuffix, RegNum)) {
3124 Error(Loc, "invalid register index");
3125 return MCRegister();
3126 }
3127 RegWidth = 32;
3128 } else {
3129 // Range of registers: v[XX:YY]. ":YY" is optional.
3130 IsRange = true;
3131 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3132 return MCRegister();
3133 }
3134
3135 // Do not allow vcc_lo/hi be referred as s106/107.
3136 MCRegister Reg = getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3137 const MCRegisterInfo &TRI = *getContext().getRegisterInfo();
3138 if (RegKind == IS_SGPR && IsRange
3139 ? (TRI.isSubRegister(Reg, VCC_LO) || TRI.isSubRegister(Reg, VCC_HI))
3140 : (Reg == VCC_LO || Reg == VCC_HI)) {
3141 Error(Loc, "register index is out of range");
3142 return MCRegister();
3143 }
3144
3145 return Reg;
3146}
3147
3148MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3149 unsigned &RegNum, unsigned &RegWidth,
3150 SmallVectorImpl<AsmToken> &Tokens) {
3151 MCRegister Reg;
3152 auto ListLoc = getLoc();
3153
3154 if (!skipToken(AsmToken::LBrac,
3155 "expected a register or a list of registers")) {
3156 return MCRegister();
3157 }
3158
3159 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3160
3161 auto Loc = getLoc();
3162 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3163 return MCRegister();
3164 if (RegWidth != 32) {
3165 Error(Loc, "expected a single 32-bit register");
3166 return MCRegister();
3167 }
3168
3169 for (; trySkipToken(AsmToken::Comma); ) {
3170 RegisterKind NextRegKind;
3171 MCRegister NextReg;
3172 unsigned NextRegNum, NextRegWidth;
3173 Loc = getLoc();
3174
3175 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3176 NextRegNum, NextRegWidth,
3177 Tokens)) {
3178 return MCRegister();
3179 }
3180 if (NextRegWidth != 32) {
3181 Error(Loc, "expected a single 32-bit register");
3182 return MCRegister();
3183 }
3184 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, NextRegKind,
3185 Loc))
3186 return MCRegister();
3187 }
3188
3189 if (!skipToken(AsmToken::RBrac,
3190 "expected a comma or a closing square bracket")) {
3191 return MCRegister();
3192 }
3193
3194 if (isRegularReg(RegKind))
3195 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3196
3197 return Reg;
3198}
3199
3200bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3201 MCRegister &Reg, unsigned &RegNum,
3202 unsigned &RegWidth,
3203 SmallVectorImpl<AsmToken> &Tokens) {
3204 auto Loc = getLoc();
3205 Reg = MCRegister();
3206
3207 if (isToken(AsmToken::Identifier)) {
3208 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3209 if (!Reg)
3210 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3211 } else {
3212 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3213 }
3214
3215 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3216 if (!Reg) {
3217 assert(Parser.hasPendingError());
3218 return false;
3219 }
3220
3221 if (!subtargetHasRegister(*TRI, Reg)) {
3222 if (Reg == AMDGPU::SGPR_NULL) {
3223 Error(Loc, "'null' operand is not supported on this GPU");
3224 } else {
3226 " register not available on this GPU");
3227 }
3228 return false;
3229 }
3230
3231 return true;
3232}
3233
3234bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3235 MCRegister &Reg, unsigned &RegNum,
3236 unsigned &RegWidth,
3237 bool RestoreOnFailure /*=false*/) {
3238 Reg = MCRegister();
3239
3241 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3242 if (RestoreOnFailure) {
3243 while (!Tokens.empty()) {
3244 getLexer().UnLex(Tokens.pop_back_val());
3245 }
3246 }
3247 return true;
3248 }
3249 return false;
3250}
3251
3252std::optional<StringRef>
3253AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3254 switch (RegKind) {
3255 case IS_VGPR:
3256 return StringRef(".amdgcn.next_free_vgpr");
3257 case IS_SGPR:
3258 return StringRef(".amdgcn.next_free_sgpr");
3259 default:
3260 return std::nullopt;
3261 }
3262}
3263
3264void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3265 auto SymbolName = getGprCountSymbolName(RegKind);
3266 assert(SymbolName && "initializing invalid register kind");
3267 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3269 Sym->setRedefinable(true);
3270}
3271
3272bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3273 unsigned DwordRegIndex,
3274 unsigned RegWidth) {
3275 // Symbols are only defined for GCN targets
3276 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3277 return true;
3278
3279 auto SymbolName = getGprCountSymbolName(RegKind);
3280 if (!SymbolName)
3281 return true;
3282 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3283
3284 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3285 int64_t OldCount;
3286
3287 if (!Sym->isVariable())
3288 return !Error(getLoc(),
3289 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3290 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3291 return !Error(
3292 getLoc(),
3293 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3294
3295 if (OldCount <= NewMax)
3297
3298 return true;
3299}
3300
3301std::unique_ptr<AMDGPUOperand>
3302AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3303 const auto &Tok = getToken();
3304 SMLoc StartLoc = Tok.getLoc();
3305 SMLoc EndLoc = Tok.getEndLoc();
3306 RegisterKind RegKind;
3307 MCRegister Reg;
3308 unsigned RegNum, RegWidth;
3309
3310 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3311 return nullptr;
3312 }
3313 if (isHsaAbi(getSTI())) {
3314 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3315 return nullptr;
3316 } else
3317 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3318 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3319}
3320
3321ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3322 bool HasSP3AbsModifier, LitModifier Lit) {
3323 // TODO: add syntactic sugar for 1/(2*PI)
3324
3325 if (isRegister() || isModifier())
3326 return ParseStatus::NoMatch;
3327
3328 if (Lit == LitModifier::None) {
3329 if (trySkipId("lit"))
3330 Lit = LitModifier::Lit;
3331 else if (trySkipId("lit64"))
3332 Lit = LitModifier::Lit64;
3333
3334 if (Lit != LitModifier::None) {
3335 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3336 return ParseStatus::Failure;
3337 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, Lit);
3338 if (S.isSuccess() &&
3339 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3340 return ParseStatus::Failure;
3341 return S;
3342 }
3343 }
3344
3345 const auto& Tok = getToken();
3346 const auto& NextTok = peekToken();
3347 bool IsReal = Tok.is(AsmToken::Real);
3348 SMLoc S = getLoc();
3349 bool Negate = false;
3350
3351 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3352 lex();
3353 IsReal = true;
3354 Negate = true;
3355 }
3356
3357 AMDGPUOperand::Modifiers Mods;
3358 Mods.Lit = Lit;
3359
3360 if (IsReal) {
3361 // Floating-point expressions are not supported.
3362 // Can only allow floating-point literals with an
3363 // optional sign.
3364
3365 StringRef Num = getTokenStr();
3366 lex();
3367
3368 APFloat RealVal(APFloat::IEEEdouble());
3369 auto roundMode = APFloat::rmNearestTiesToEven;
3370 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3371 return ParseStatus::Failure;
3372 if (Negate)
3373 RealVal.changeSign();
3374
3375 Operands.push_back(
3376 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3377 AMDGPUOperand::ImmTyNone, true));
3378 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3379 Op.setModifiers(Mods);
3380
3381 return ParseStatus::Success;
3382
3383 } else {
3384 int64_t IntVal;
3385 const MCExpr *Expr;
3386 SMLoc S = getLoc();
3387
3388 if (HasSP3AbsModifier) {
3389 // This is a workaround for handling expressions
3390 // as arguments of SP3 'abs' modifier, for example:
3391 // |1.0|
3392 // |-1|
3393 // |1+x|
3394 // This syntax is not compatible with syntax of standard
3395 // MC expressions (due to the trailing '|').
3396 SMLoc EndLoc;
3397 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3398 return ParseStatus::Failure;
3399 } else {
3400 if (Parser.parseExpression(Expr))
3401 return ParseStatus::Failure;
3402 }
3403
3404 if (Expr->evaluateAsAbsolute(IntVal)) {
3405 if (Lit == LitModifier::Lit && !isInt<32>(IntVal) && !isUInt<32>(IntVal))
3406 return Error(S, "literal value out of range");
3407 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3408 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3409 Op.setModifiers(Mods);
3410 } else {
3411 if (Lit != LitModifier::None)
3412 return ParseStatus::NoMatch;
3413 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3414 }
3415
3416 return ParseStatus::Success;
3417 }
3418
3419 return ParseStatus::NoMatch;
3420}
3421
3422ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3423 if (!isRegister())
3424 return ParseStatus::NoMatch;
3425
3426 if (auto R = parseRegister()) {
3427 assert(R->isReg());
3428 Operands.push_back(std::move(R));
3429 return ParseStatus::Success;
3430 }
3431 return ParseStatus::Failure;
3432}
3433
3434ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3435 bool HasSP3AbsMod, LitModifier Lit) {
3436 ParseStatus Res = parseReg(Operands);
3437 if (!Res.isNoMatch())
3438 return Res;
3439 if (isModifier())
3440 return ParseStatus::NoMatch;
3441 return parseImm(Operands, HasSP3AbsMod, Lit);
3442}
3443
3444bool
3445AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3446 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3447 const auto &str = Token.getString();
3448 return str == "abs" || str == "neg" || str == "sext";
3449 }
3450 return false;
3451}
3452
3453bool
3454AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3455 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3456}
3457
3458bool
3459AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3460 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3461}
3462
3463bool
3464AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3465 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3466}
3467
3468// Check if this is an operand modifier or an opcode modifier
3469// which may look like an expression but it is not. We should
3470// avoid parsing these modifiers as expressions. Currently
3471// recognized sequences are:
3472// |...|
3473// abs(...)
3474// neg(...)
3475// sext(...)
3476// -reg
3477// -|...|
3478// -abs(...)
3479// name:...
3480//
3481bool
3482AMDGPUAsmParser::isModifier() {
3483
3484 AsmToken Tok = getToken();
3485 AsmToken NextToken[2];
3486 peekTokens(NextToken);
3487
3488 return isOperandModifier(Tok, NextToken[0]) ||
3489 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3490 isOpcodeModifierWithVal(Tok, NextToken[0]);
3491}
3492
3493// Check if the current token is an SP3 'neg' modifier.
3494// Currently this modifier is allowed in the following context:
3495//
3496// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3497// 2. Before an 'abs' modifier: -abs(...)
3498// 3. Before an SP3 'abs' modifier: -|...|
3499//
3500// In all other cases "-" is handled as a part
3501// of an expression that follows the sign.
3502//
3503// Note: When "-" is followed by an integer literal,
3504// this is interpreted as integer negation rather
3505// than a floating-point NEG modifier applied to N.
3506// Beside being contr-intuitive, such use of floating-point
3507// NEG modifier would have resulted in different meaning
3508// of integer literals used with VOP1/2/C and VOP3,
3509// for example:
3510// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3511// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3512// Negative fp literals with preceding "-" are
3513// handled likewise for uniformity
3514//
3515bool
3516AMDGPUAsmParser::parseSP3NegModifier() {
3517
3518 AsmToken NextToken[2];
3519 peekTokens(NextToken);
3520
3521 if (isToken(AsmToken::Minus) &&
3522 (isRegister(NextToken[0], NextToken[1]) ||
3523 NextToken[0].is(AsmToken::Pipe) ||
3524 isId(NextToken[0], "abs"))) {
3525 lex();
3526 return true;
3527 }
3528
3529 return false;
3530}
3531
3532ParseStatus
3533AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3534 bool AllowImm) {
3535 bool Neg, SP3Neg;
3536 bool Abs, SP3Abs;
3537 SMLoc Loc;
3538
3539 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3540 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3541 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3542
3543 SP3Neg = parseSP3NegModifier();
3544
3545 Loc = getLoc();
3546 Neg = trySkipId("neg");
3547 if (Neg && SP3Neg)
3548 return Error(Loc, "expected register or immediate");
3549 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3550 return ParseStatus::Failure;
3551
3552 Abs = trySkipId("abs");
3553 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3554 return ParseStatus::Failure;
3555
3556 LitModifier Lit = LitModifier::None;
3557 if (trySkipId("lit")) {
3558 Lit = LitModifier::Lit;
3559 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3560 return ParseStatus::Failure;
3561 } else if (trySkipId("lit64")) {
3562 Lit = LitModifier::Lit64;
3563 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3564 return ParseStatus::Failure;
3565 if (!has64BitLiterals())
3566 return Error(Loc, "lit64 is not supported on this GPU");
3567 }
3568
3569 Loc = getLoc();
3570 SP3Abs = trySkipToken(AsmToken::Pipe);
3571 if (Abs && SP3Abs)
3572 return Error(Loc, "expected register or immediate");
3573
3574 ParseStatus Res;
3575 if (AllowImm) {
3576 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3577 } else {
3578 Res = parseReg(Operands);
3579 }
3580 if (!Res.isSuccess())
3581 return (SP3Neg || Neg || SP3Abs || Abs || Lit != LitModifier::None)
3583 : Res;
3584
3585 if (Lit != LitModifier::None && !Operands.back()->isImm())
3586 Error(Loc, "expected immediate with lit modifier");
3587
3588 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3589 return ParseStatus::Failure;
3590 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3591 return ParseStatus::Failure;
3592 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3593 return ParseStatus::Failure;
3594 if (Lit != LitModifier::None &&
3595 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3596 return ParseStatus::Failure;
3597
3598 AMDGPUOperand::Modifiers Mods;
3599 Mods.Abs = Abs || SP3Abs;
3600 Mods.Neg = Neg || SP3Neg;
3601 Mods.Lit = Lit;
3602
3603 if (Mods.hasFPModifiers() || Lit != LitModifier::None) {
3604 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3605 if (Op.isExpr())
3606 return Error(Op.getStartLoc(), "expected an absolute expression");
3607 Op.setModifiers(Mods);
3608 }
3609 return ParseStatus::Success;
3610}
3611
3612ParseStatus
3613AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3614 bool AllowImm) {
3615 bool Sext = trySkipId("sext");
3616 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3617 return ParseStatus::Failure;
3618
3619 ParseStatus Res;
3620 if (AllowImm) {
3621 Res = parseRegOrImm(Operands);
3622 } else {
3623 Res = parseReg(Operands);
3624 }
3625 if (!Res.isSuccess())
3626 return Sext ? ParseStatus::Failure : Res;
3627
3628 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3629 return ParseStatus::Failure;
3630
3631 AMDGPUOperand::Modifiers Mods;
3632 Mods.Sext = Sext;
3633
3634 if (Mods.hasIntModifiers()) {
3635 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3636 if (Op.isExpr())
3637 return Error(Op.getStartLoc(), "expected an absolute expression");
3638 Op.setModifiers(Mods);
3639 }
3640
3641 return ParseStatus::Success;
3642}
3643
3644ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3645 return parseRegOrImmWithFPInputMods(Operands, false);
3646}
3647
3648ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3649 return parseRegOrImmWithIntInputMods(Operands, false);
3650}
3651
3652ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3653 auto Loc = getLoc();
3654 if (trySkipId("off")) {
3655 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3656 AMDGPUOperand::ImmTyOff, false));
3657 return ParseStatus::Success;
3658 }
3659
3660 if (!isRegister())
3661 return ParseStatus::NoMatch;
3662
3663 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3664 if (Reg) {
3665 Operands.push_back(std::move(Reg));
3666 return ParseStatus::Success;
3667 }
3668
3669 return ParseStatus::Failure;
3670}
3671
3672unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3673 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3674
3675 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3676 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3677 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3678 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3679 return Match_InvalidOperand;
3680
3681 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3682 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3683 // v_mac_f32/16 allow only dst_sel == DWORD;
3684 auto OpNum =
3685 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3686 const auto &Op = Inst.getOperand(OpNum);
3687 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3688 return Match_InvalidOperand;
3689 }
3690 }
3691
3692 // Asm can first try to match VOPD or VOPD3. By failing early here with
3693 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3694 // Checking later during validateInstruction does not give a chance to retry
3695 // parsing as a different encoding.
3696 if (tryAnotherVOPDEncoding(Inst))
3697 return Match_InvalidOperand;
3698
3699 return Match_Success;
3700}
3701
3711
3712// What asm variants we should check
3713ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3714 if (isForcedDPP() && isForcedVOP3()) {
3715 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3716 return ArrayRef(Variants);
3717 }
3718 if (getForcedEncodingSize() == 32) {
3719 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3720 return ArrayRef(Variants);
3721 }
3722
3723 if (isForcedVOP3()) {
3724 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3725 return ArrayRef(Variants);
3726 }
3727
3728 if (isForcedSDWA()) {
3729 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3731 return ArrayRef(Variants);
3732 }
3733
3734 if (isForcedDPP()) {
3735 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3736 return ArrayRef(Variants);
3737 }
3738
3739 return getAllVariants();
3740}
3741
3742StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3743 if (isForcedDPP() && isForcedVOP3())
3744 return "e64_dpp";
3745
3746 if (getForcedEncodingSize() == 32)
3747 return "e32";
3748
3749 if (isForcedVOP3())
3750 return "e64";
3751
3752 if (isForcedSDWA())
3753 return "sdwa";
3754
3755 if (isForcedDPP())
3756 return "dpp";
3757
3758 return "";
3759}
3760
3761MCRegister
3762AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3763 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3764 for (MCPhysReg Reg : Desc.implicit_uses()) {
3765 switch (Reg) {
3766 case AMDGPU::FLAT_SCR:
3767 case AMDGPU::VCC:
3768 case AMDGPU::VCC_LO:
3769 case AMDGPU::VCC_HI:
3770 case AMDGPU::M0:
3771 return Reg;
3772 default:
3773 break;
3774 }
3775 }
3776 return MCRegister();
3777}
3778
3779// NB: This code is correct only when used to check constant
3780// bus limitations because GFX7 support no f16 inline constants.
3781// Note that there are no cases when a GFX7 opcode violates
3782// constant bus limitations due to the use of an f16 constant.
3783bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3784 unsigned OpIdx) const {
3785 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3786
3789 return false;
3790 }
3791
3792 const MCOperand &MO = Inst.getOperand(OpIdx);
3793
3794 int64_t Val = MO.isImm() ? MO.getImm() : getLitValue(MO.getExpr());
3795 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3796
3797 switch (OpSize) { // expected operand size
3798 case 8:
3799 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3800 case 4:
3801 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3802 case 2: {
3803 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3806 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3807
3811
3815
3818
3822
3825 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3826
3829 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3830
3832 return false;
3833
3834 llvm_unreachable("invalid operand type");
3835 }
3836 default:
3837 llvm_unreachable("invalid operand size");
3838 }
3839}
3840
3841unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3842 if (!isGFX10Plus())
3843 return 1;
3844
3845 switch (Opcode) {
3846 // 64-bit shift instructions can use only one scalar value input
3847 case AMDGPU::V_LSHLREV_B64_e64:
3848 case AMDGPU::V_LSHLREV_B64_gfx10:
3849 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3850 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3851 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3852 case AMDGPU::V_LSHRREV_B64_e64:
3853 case AMDGPU::V_LSHRREV_B64_gfx10:
3854 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3855 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3856 case AMDGPU::V_ASHRREV_I64_e64:
3857 case AMDGPU::V_ASHRREV_I64_gfx10:
3858 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3859 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3860 case AMDGPU::V_LSHL_B64_e64:
3861 case AMDGPU::V_LSHR_B64_e64:
3862 case AMDGPU::V_ASHR_I64_e64:
3863 return 1;
3864 default:
3865 return 2;
3866 }
3867}
3868
3869constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3871
3872// Get regular operand indices in the same order as specified
3873// in the instruction (but append mandatory literals to the end).
3875 bool AddMandatoryLiterals = false) {
3876
3877 int16_t ImmIdx =
3878 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3879
3880 if (isVOPD(Opcode)) {
3881 int16_t ImmXIdx =
3882 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3883
3884 return {getNamedOperandIdx(Opcode, OpName::src0X),
3885 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3886 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3887 getNamedOperandIdx(Opcode, OpName::src0Y),
3888 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3889 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3890 ImmXIdx,
3891 ImmIdx};
3892 }
3893
3894 return {getNamedOperandIdx(Opcode, OpName::src0),
3895 getNamedOperandIdx(Opcode, OpName::src1),
3896 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3897}
3898
3899bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3900 const MCOperand &MO = Inst.getOperand(OpIdx);
3901 if (MO.isImm())
3902 return !isInlineConstant(Inst, OpIdx);
3903 if (MO.isReg()) {
3904 auto Reg = MO.getReg();
3905 if (!Reg)
3906 return false;
3907 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3908 auto PReg = mc2PseudoReg(Reg);
3909 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3910 }
3911 return true;
3912}
3913
3914// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3915// Writelane is special in that it can use SGPR and M0 (which would normally
3916// count as using the constant bus twice - but in this case it is allowed since
3917// the lane selector doesn't count as a use of the constant bus). However, it is
3918// still required to abide by the 1 SGPR rule.
3919static bool checkWriteLane(const MCInst &Inst) {
3920 const unsigned Opcode = Inst.getOpcode();
3921 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3922 return false;
3923 const MCOperand &LaneSelOp = Inst.getOperand(2);
3924 if (!LaneSelOp.isReg())
3925 return false;
3926 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3927 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3928}
3929
3930bool AMDGPUAsmParser::validateConstantBusLimitations(
3931 const MCInst &Inst, const OperandVector &Operands) {
3932 const unsigned Opcode = Inst.getOpcode();
3933 const MCInstrDesc &Desc = MII.get(Opcode);
3934 MCRegister LastSGPR;
3935 unsigned ConstantBusUseCount = 0;
3936 unsigned NumLiterals = 0;
3937 unsigned LiteralSize;
3938
3939 if (!(Desc.TSFlags &
3942 !isVOPD(Opcode))
3943 return true;
3944
3945 if (checkWriteLane(Inst))
3946 return true;
3947
3948 // Check special imm operands (used by madmk, etc)
3949 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3950 ++NumLiterals;
3951 LiteralSize = 4;
3952 }
3953
3954 SmallDenseSet<MCRegister> SGPRsUsed;
3955 MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3956 if (SGPRUsed) {
3957 SGPRsUsed.insert(SGPRUsed);
3958 ++ConstantBusUseCount;
3959 }
3960
3961 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3962
3963 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3964
3965 for (int OpIdx : OpIndices) {
3966 if (OpIdx == -1)
3967 continue;
3968
3969 const MCOperand &MO = Inst.getOperand(OpIdx);
3970 if (usesConstantBus(Inst, OpIdx)) {
3971 if (MO.isReg()) {
3972 LastSGPR = mc2PseudoReg(MO.getReg());
3973 // Pairs of registers with a partial intersections like these
3974 // s0, s[0:1]
3975 // flat_scratch_lo, flat_scratch
3976 // flat_scratch_lo, flat_scratch_hi
3977 // are theoretically valid but they are disabled anyway.
3978 // Note that this code mimics SIInstrInfo::verifyInstruction
3979 if (SGPRsUsed.insert(LastSGPR).second) {
3980 ++ConstantBusUseCount;
3981 }
3982 } else { // Expression or a literal
3983
3984 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3985 continue; // special operand like VINTERP attr_chan
3986
3987 // An instruction may use only one literal.
3988 // This has been validated on the previous step.
3989 // See validateVOPLiteral.
3990 // This literal may be used as more than one operand.
3991 // If all these operands are of the same size,
3992 // this literal counts as one scalar value.
3993 // Otherwise it counts as 2 scalar values.
3994 // See "GFX10 Shader Programming", section 3.6.2.3.
3995
3997 if (Size < 4)
3998 Size = 4;
3999
4000 if (NumLiterals == 0) {
4001 NumLiterals = 1;
4002 LiteralSize = Size;
4003 } else if (LiteralSize != Size) {
4004 NumLiterals = 2;
4005 }
4006 }
4007 }
4008
4009 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
4010 Error(getOperandLoc(Operands, OpIdx),
4011 "invalid operand (violates constant bus restrictions)");
4012 return false;
4013 }
4014 }
4015 return true;
4016}
4017
4018std::optional<unsigned>
4019AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
4020
4021 const unsigned Opcode = Inst.getOpcode();
4022 if (!isVOPD(Opcode))
4023 return {};
4024
4025 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4026
4027 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
4028 const MCOperand &Opr = Inst.getOperand(OperandIdx);
4029 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
4030 ? Opr.getReg()
4031 : MCRegister();
4032 };
4033
4034 // On GFX1170+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
4035 // source-cache.
4036 bool SkipSrc =
4037 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1170 ||
4038 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
4039 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
4040 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx13 ||
4041 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250 ||
4042 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx13;
4043 bool AllowSameVGPR = isGFX12Plus();
4044
4045 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
4046 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
4047 int I = getNamedOperandIdx(Opcode, OpName);
4048 const MCOperand &Op = Inst.getOperand(I);
4049 if (!Op.isImm())
4050 continue;
4051 int64_t Imm = Op.getImm();
4052 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
4053 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
4054 return (unsigned)I;
4055 }
4056
4057 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
4058 OpName::vsrc2Y, OpName::imm}) {
4059 int I = getNamedOperandIdx(Opcode, OpName);
4060 if (I == -1)
4061 continue;
4062 const MCOperand &Op = Inst.getOperand(I);
4063 if (Op.isImm())
4064 return (unsigned)I;
4065 }
4066 }
4067
4068 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4069 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4070 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
4071
4072 return InvalidCompOprIdx;
4073}
4074
4075bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
4076 const OperandVector &Operands) {
4077
4078 unsigned Opcode = Inst.getOpcode();
4079 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
4080
4081 if (AsVOPD3) {
4082 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
4083 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
4084 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4085 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
4086 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
4087 }
4088 }
4089
4090 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4091 if (!InvalidCompOprIdx.has_value())
4092 return true;
4093
4094 auto CompOprIdx = *InvalidCompOprIdx;
4095 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4096 auto ParsedIdx =
4097 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
4098 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4099 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4100
4101 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4102 if (CompOprIdx == VOPD::Component::DST) {
4103 if (AsVOPD3)
4104 Error(Loc, "dst registers must be distinct");
4105 else
4106 Error(Loc, "one dst register must be even and the other odd");
4107 } else {
4108 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4109 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
4110 " operands must use different VGPR banks");
4111 }
4112
4113 return false;
4114}
4115
4116// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4117// potentially used as VOPD3 with the same operands.
4118bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4119 // First check if it fits VOPD
4120 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4121 if (!InvalidCompOprIdx.has_value())
4122 return false;
4123
4124 // Then if it fits VOPD3
4125 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4126 if (InvalidCompOprIdx.has_value()) {
4127 // If failed operand is dst it is better to show error about VOPD3
4128 // instruction as it has more capabilities and error message will be
4129 // more informative. If the dst is not legal for VOPD3, then it is not
4130 // legal for VOPD either.
4131 if (*InvalidCompOprIdx == VOPD::Component::DST)
4132 return true;
4133
4134 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4135 // with a conflict in tied implicit src2 of fmac and no asm operand to
4136 // to point to.
4137 return false;
4138 }
4139 return true;
4140}
4141
4142// \returns true is a VOPD3 instruction can be also represented as a shorter
4143// VOPD encoding.
4144bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4145 const unsigned Opcode = Inst.getOpcode();
4146 const auto &II = getVOPDInstInfo(Opcode, &MII);
4147 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4148 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4149 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4150 return false;
4151
4152 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4153 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4154 // be parsed as VOPD which does not accept src2.
4155 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4156 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4157 return false;
4158
4159 // If any modifiers are set this cannot be VOPD.
4160 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4161 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4162 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4163 int I = getNamedOperandIdx(Opcode, OpName);
4164 if (I == -1)
4165 continue;
4166 if (Inst.getOperand(I).getImm())
4167 return false;
4168 }
4169
4170 return !tryVOPD3(Inst);
4171}
4172
4173// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4174// form but switch to VOPD3 otherwise.
4175bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4176 const unsigned Opcode = Inst.getOpcode();
4177 if (!isGFX1250Plus() || !isVOPD(Opcode))
4178 return false;
4179
4180 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4181 return tryVOPD(Inst);
4182 return tryVOPD3(Inst);
4183}
4184
4185bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4186
4187 const unsigned Opc = Inst.getOpcode();
4188 const MCInstrDesc &Desc = MII.get(Opc);
4189
4190 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4191 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4192 assert(ClampIdx != -1);
4193 return Inst.getOperand(ClampIdx).getImm() == 0;
4194 }
4195
4196 return true;
4197}
4198
4201
4202bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) {
4203
4204 const unsigned Opc = Inst.getOpcode();
4205 const MCInstrDesc &Desc = MII.get(Opc);
4206
4207 if ((Desc.TSFlags & MIMGFlags) == 0)
4208 return true;
4209
4210 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4211 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4212 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4213
4214 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4215 return true;
4216
4217 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4218 return true;
4219
4220 unsigned VDataSize = getRegOperandSize(Desc, VDataIdx);
4221 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4222 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4223 if (DMask == 0)
4224 DMask = 1;
4225
4226 bool IsPackedD16 = false;
4227 unsigned DataSize =
4228 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4229 if (hasPackedD16()) {
4230 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4231 IsPackedD16 = D16Idx >= 0;
4232 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4233 DataSize = (DataSize + 1) / 2;
4234 }
4235
4236 if ((VDataSize / 4) == DataSize + TFESize)
4237 return true;
4238
4239 StringRef Modifiers;
4240 if (isGFX90A())
4241 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4242 else
4243 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4244
4245 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4246 return false;
4247}
4248
4249bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
4250 const unsigned Opc = Inst.getOpcode();
4251 const MCInstrDesc &Desc = MII.get(Opc);
4252
4253 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4254 return true;
4255
4256 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4257
4258 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4260 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4261 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4262 ? AMDGPU::OpName::srsrc
4263 : AMDGPU::OpName::rsrc;
4264 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4265 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4266 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4267
4268 assert(VAddr0Idx != -1);
4269 assert(SrsrcIdx != -1);
4270 assert(SrsrcIdx > VAddr0Idx);
4271
4272 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4273 if (BaseOpcode->BVH) {
4274 if (IsA16 == BaseOpcode->A16)
4275 return true;
4276 Error(IDLoc, "image address size does not match a16");
4277 return false;
4278 }
4279
4280 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4281 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4282 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4283 unsigned ActualAddrSize =
4284 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(Desc, VAddr0Idx) / 4;
4285
4286 unsigned ExpectedAddrSize =
4287 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4288
4289 if (IsNSA) {
4290 if (hasPartialNSAEncoding() &&
4291 ExpectedAddrSize >
4293 int VAddrLastIdx = SrsrcIdx - 1;
4294 unsigned VAddrLastSize = getRegOperandSize(Desc, VAddrLastIdx) / 4;
4295
4296 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4297 }
4298 } else {
4299 if (ExpectedAddrSize > 12)
4300 ExpectedAddrSize = 16;
4301
4302 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4303 // This provides backward compatibility for assembly created
4304 // before 160b/192b/224b types were directly supported.
4305 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4306 return true;
4307 }
4308
4309 if (ActualAddrSize == ExpectedAddrSize)
4310 return true;
4311
4312 Error(IDLoc, "image address size does not match dim and a16");
4313 return false;
4314}
4315
4316bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4317
4318 const unsigned Opc = Inst.getOpcode();
4319 const MCInstrDesc &Desc = MII.get(Opc);
4320
4321 if ((Desc.TSFlags & MIMGFlags) == 0)
4322 return true;
4323 if (!Desc.mayLoad() || !Desc.mayStore())
4324 return true; // Not atomic
4325
4326 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4327 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4328
4329 // This is an incomplete check because image_atomic_cmpswap
4330 // may only use 0x3 and 0xf while other atomic operations
4331 // may use 0x1 and 0x3. However these limitations are
4332 // verified when we check that dmask matches dst size.
4333 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4334}
4335
4336bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4337
4338 const unsigned Opc = Inst.getOpcode();
4339 const MCInstrDesc &Desc = MII.get(Opc);
4340
4341 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4342 return true;
4343
4344 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4345 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4346
4347 // GATHER4 instructions use dmask in a different fashion compared to
4348 // other MIMG instructions. The only useful DMASK values are
4349 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4350 // (red,red,red,red) etc.) The ISA document doesn't mention
4351 // this.
4352 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4353}
4354
4355bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4356 const OperandVector &Operands) {
4357 if (!isGFX10Plus())
4358 return true;
4359
4360 const unsigned Opc = Inst.getOpcode();
4361 const MCInstrDesc &Desc = MII.get(Opc);
4362
4363 if ((Desc.TSFlags & MIMGFlags) == 0)
4364 return true;
4365
4366 // image_bvh_intersect_ray instructions do not have dim
4368 return true;
4369
4370 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4371 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4372 if (Op.isDim())
4373 return true;
4374 }
4375 return false;
4376}
4377
4378bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4379 const unsigned Opc = Inst.getOpcode();
4380 const MCInstrDesc &Desc = MII.get(Opc);
4381
4382 if ((Desc.TSFlags & MIMGFlags) == 0)
4383 return true;
4384
4385 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4386 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4388
4389 if (!BaseOpcode->MSAA)
4390 return true;
4391
4392 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4393 assert(DimIdx != -1);
4394
4395 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4396 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4397
4398 return DimInfo->MSAA;
4399}
4400
4401static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4402{
4403 switch (Opcode) {
4404 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4405 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4406 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4407 return true;
4408 default:
4409 return false;
4410 }
4411}
4412
4413// movrels* opcodes should only allow VGPRS as src0.
4414// This is specified in .td description for vop1/vop3,
4415// but sdwa is handled differently. See isSDWAOperand.
4416bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4417 const OperandVector &Operands) {
4418
4419 const unsigned Opc = Inst.getOpcode();
4420 const MCInstrDesc &Desc = MII.get(Opc);
4421
4422 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4423 return true;
4424
4425 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4426 assert(Src0Idx != -1);
4427
4428 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4429 if (Src0.isReg()) {
4430 auto Reg = mc2PseudoReg(Src0.getReg());
4431 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4432 if (!isSGPR(Reg, TRI))
4433 return true;
4434 }
4435
4436 Error(getOperandLoc(Operands, Src0Idx), "source operand must be a VGPR");
4437 return false;
4438}
4439
4440bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4441 const OperandVector &Operands) {
4442
4443 const unsigned Opc = Inst.getOpcode();
4444
4445 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4446 return true;
4447
4448 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4449 assert(Src0Idx != -1);
4450
4451 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4452 if (!Src0.isReg())
4453 return true;
4454
4455 auto Reg = mc2PseudoReg(Src0.getReg());
4456 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4457 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4458 Error(getOperandLoc(Operands, Src0Idx),
4459 "source operand must be either a VGPR or an inline constant");
4460 return false;
4461 }
4462
4463 return true;
4464}
4465
4466bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4467 const OperandVector &Operands) {
4468 unsigned Opcode = Inst.getOpcode();
4469 const MCInstrDesc &Desc = MII.get(Opcode);
4470
4471 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4472 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4473 return true;
4474
4475 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4476 if (Src2Idx == -1)
4477 return true;
4478
4479 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4480 Error(getOperandLoc(Operands, Src2Idx),
4481 "inline constants are not allowed for this operand");
4482 return false;
4483 }
4484
4485 return true;
4486}
4487
4488bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4489 const OperandVector &Operands) {
4490 const unsigned Opc = Inst.getOpcode();
4491 const MCInstrDesc &Desc = MII.get(Opc);
4492
4493 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4494 return true;
4495
4496 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4497 if (BlgpIdx != -1) {
4498 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4499 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4500
4501 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4502 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4503
4504 // Validate the correct register size was used for the floating point
4505 // format operands
4506
4507 bool Success = true;
4508 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4509 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4510 Error(getOperandLoc(Operands, Src0Idx),
4511 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4512 Success = false;
4513 }
4514
4515 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4516 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4517 Error(getOperandLoc(Operands, Src1Idx),
4518 "wrong register tuple size for blgp value " + Twine(BLGP));
4519 Success = false;
4520 }
4521
4522 return Success;
4523 }
4524 }
4525
4526 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4527 if (Src2Idx == -1)
4528 return true;
4529
4530 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4531 if (!Src2.isReg())
4532 return true;
4533
4534 MCRegister Src2Reg = Src2.getReg();
4535 MCRegister DstReg = Inst.getOperand(0).getReg();
4536 if (Src2Reg == DstReg)
4537 return true;
4538
4539 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4540 if (TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[0], HwMode))
4541 .getSizeInBits() <= 128)
4542 return true;
4543
4544 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4545 Error(getOperandLoc(Operands, Src2Idx),
4546 "source 2 operand must not partially overlap with dst");
4547 return false;
4548 }
4549
4550 return true;
4551}
4552
4553bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4554 switch (Inst.getOpcode()) {
4555 default:
4556 return true;
4557 case V_DIV_SCALE_F32_gfx6_gfx7:
4558 case V_DIV_SCALE_F32_vi:
4559 case V_DIV_SCALE_F32_gfx10:
4560 case V_DIV_SCALE_F64_gfx6_gfx7:
4561 case V_DIV_SCALE_F64_vi:
4562 case V_DIV_SCALE_F64_gfx10:
4563 break;
4564 }
4565
4566 // TODO: Check that src0 = src1 or src2.
4567
4568 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4569 AMDGPU::OpName::src2_modifiers,
4570 AMDGPU::OpName::src2_modifiers}) {
4571 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4572 .getImm() &
4574 return false;
4575 }
4576 }
4577
4578 return true;
4579}
4580
4581bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4582
4583 const unsigned Opc = Inst.getOpcode();
4584 const MCInstrDesc &Desc = MII.get(Opc);
4585
4586 if ((Desc.TSFlags & MIMGFlags) == 0)
4587 return true;
4588
4589 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4590 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4591 if (isCI() || isSI())
4592 return false;
4593 }
4594
4595 return true;
4596}
4597
4598bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4599 const unsigned Opc = Inst.getOpcode();
4600 const MCInstrDesc &Desc = MII.get(Opc);
4601
4602 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4603 return true;
4604
4605 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4606
4607 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4608}
4609
4610static bool IsRevOpcode(const unsigned Opcode)
4611{
4612 switch (Opcode) {
4613 case AMDGPU::V_SUBREV_F32_e32:
4614 case AMDGPU::V_SUBREV_F32_e64:
4615 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4616 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4617 case AMDGPU::V_SUBREV_F32_e32_vi:
4618 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4619 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4620 case AMDGPU::V_SUBREV_F32_e64_vi:
4621
4622 case AMDGPU::V_SUBREV_CO_U32_e32:
4623 case AMDGPU::V_SUBREV_CO_U32_e64:
4624 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4625 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4626
4627 case AMDGPU::V_SUBBREV_U32_e32:
4628 case AMDGPU::V_SUBBREV_U32_e64:
4629 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4630 case AMDGPU::V_SUBBREV_U32_e32_vi:
4631 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4632 case AMDGPU::V_SUBBREV_U32_e64_vi:
4633
4634 case AMDGPU::V_SUBREV_U32_e32:
4635 case AMDGPU::V_SUBREV_U32_e64:
4636 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4637 case AMDGPU::V_SUBREV_U32_e32_vi:
4638 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4639 case AMDGPU::V_SUBREV_U32_e64_vi:
4640
4641 case AMDGPU::V_SUBREV_F16_e32:
4642 case AMDGPU::V_SUBREV_F16_e64:
4643 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4644 case AMDGPU::V_SUBREV_F16_e32_vi:
4645 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4646 case AMDGPU::V_SUBREV_F16_e64_vi:
4647
4648 case AMDGPU::V_SUBREV_U16_e32:
4649 case AMDGPU::V_SUBREV_U16_e64:
4650 case AMDGPU::V_SUBREV_U16_e32_vi:
4651 case AMDGPU::V_SUBREV_U16_e64_vi:
4652
4653 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4654 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4655 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4656
4657 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4658 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4659
4660 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4661 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4662
4663 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4664 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4665
4666 case AMDGPU::V_LSHRREV_B32_e32:
4667 case AMDGPU::V_LSHRREV_B32_e64:
4668 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4669 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4670 case AMDGPU::V_LSHRREV_B32_e32_vi:
4671 case AMDGPU::V_LSHRREV_B32_e64_vi:
4672 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4673 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4674
4675 case AMDGPU::V_ASHRREV_I32_e32:
4676 case AMDGPU::V_ASHRREV_I32_e64:
4677 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4678 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4679 case AMDGPU::V_ASHRREV_I32_e32_vi:
4680 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4681 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4682 case AMDGPU::V_ASHRREV_I32_e64_vi:
4683
4684 case AMDGPU::V_LSHLREV_B32_e32:
4685 case AMDGPU::V_LSHLREV_B32_e64:
4686 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4687 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4688 case AMDGPU::V_LSHLREV_B32_e32_vi:
4689 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4690 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4691 case AMDGPU::V_LSHLREV_B32_e64_vi:
4692
4693 case AMDGPU::V_LSHLREV_B16_e32:
4694 case AMDGPU::V_LSHLREV_B16_e64:
4695 case AMDGPU::V_LSHLREV_B16_e32_vi:
4696 case AMDGPU::V_LSHLREV_B16_e64_vi:
4697 case AMDGPU::V_LSHLREV_B16_gfx10:
4698
4699 case AMDGPU::V_LSHRREV_B16_e32:
4700 case AMDGPU::V_LSHRREV_B16_e64:
4701 case AMDGPU::V_LSHRREV_B16_e32_vi:
4702 case AMDGPU::V_LSHRREV_B16_e64_vi:
4703 case AMDGPU::V_LSHRREV_B16_gfx10:
4704
4705 case AMDGPU::V_ASHRREV_I16_e32:
4706 case AMDGPU::V_ASHRREV_I16_e64:
4707 case AMDGPU::V_ASHRREV_I16_e32_vi:
4708 case AMDGPU::V_ASHRREV_I16_e64_vi:
4709 case AMDGPU::V_ASHRREV_I16_gfx10:
4710
4711 case AMDGPU::V_LSHLREV_B64_e64:
4712 case AMDGPU::V_LSHLREV_B64_gfx10:
4713 case AMDGPU::V_LSHLREV_B64_vi:
4714
4715 case AMDGPU::V_LSHRREV_B64_e64:
4716 case AMDGPU::V_LSHRREV_B64_gfx10:
4717 case AMDGPU::V_LSHRREV_B64_vi:
4718
4719 case AMDGPU::V_ASHRREV_I64_e64:
4720 case AMDGPU::V_ASHRREV_I64_gfx10:
4721 case AMDGPU::V_ASHRREV_I64_vi:
4722
4723 case AMDGPU::V_PK_LSHLREV_B16:
4724 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4725 case AMDGPU::V_PK_LSHLREV_B16_vi:
4726
4727 case AMDGPU::V_PK_LSHRREV_B16:
4728 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4729 case AMDGPU::V_PK_LSHRREV_B16_vi:
4730 case AMDGPU::V_PK_ASHRREV_I16:
4731 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4732 case AMDGPU::V_PK_ASHRREV_I16_vi:
4733 return true;
4734 default:
4735 return false;
4736 }
4737}
4738
4739bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4740 const OperandVector &Operands) {
4741 using namespace SIInstrFlags;
4742 const unsigned Opcode = Inst.getOpcode();
4743 const MCInstrDesc &Desc = MII.get(Opcode);
4744
4745 // lds_direct register is defined so that it can be used
4746 // with 9-bit operands only. Ignore encodings which do not accept these.
4747 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4748 if ((Desc.TSFlags & Enc) == 0)
4749 return true;
4750
4751 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4752 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4753 if (SrcIdx == -1)
4754 break;
4755 const auto &Src = Inst.getOperand(SrcIdx);
4756 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4757
4758 if (isGFX90A() || isGFX11Plus()) {
4759 Error(getOperandLoc(Operands, SrcIdx),
4760 "lds_direct is not supported on this GPU");
4761 return false;
4762 }
4763
4764 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) {
4765 Error(getOperandLoc(Operands, SrcIdx),
4766 "lds_direct cannot be used with this instruction");
4767 return false;
4768 }
4769
4770 if (SrcName != OpName::src0) {
4771 Error(getOperandLoc(Operands, SrcIdx),
4772 "lds_direct may be used as src0 only");
4773 return false;
4774 }
4775 }
4776 }
4777
4778 return true;
4779}
4780
4781SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4782 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4783 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4784 if (Op.isFlatOffset())
4785 return Op.getStartLoc();
4786 }
4787 return getLoc();
4788}
4789
4790bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4791 const OperandVector &Operands) {
4792 auto Opcode = Inst.getOpcode();
4793 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4794 if (OpNum == -1)
4795 return true;
4796
4797 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4798 if ((TSFlags & SIInstrFlags::FLAT))
4799 return validateFlatOffset(Inst, Operands);
4800
4801 if ((TSFlags & SIInstrFlags::SMRD))
4802 return validateSMEMOffset(Inst, Operands);
4803
4804 const auto &Op = Inst.getOperand(OpNum);
4805 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4806 if (isGFX12Plus() &&
4807 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4808 const unsigned OffsetSize = 24;
4809 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
4810 Error(getFlatOffsetLoc(Operands),
4811 Twine("expected a ") + Twine(OffsetSize - 1) +
4812 "-bit unsigned offset for buffer ops");
4813 return false;
4814 }
4815 } else {
4816 const unsigned OffsetSize = 16;
4817 if (!isUIntN(OffsetSize, Op.getImm())) {
4818 Error(getFlatOffsetLoc(Operands),
4819 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4820 return false;
4821 }
4822 }
4823 return true;
4824}
4825
4826bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4827 const OperandVector &Operands) {
4828 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4829 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4830 return true;
4831
4832 auto Opcode = Inst.getOpcode();
4833 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4834 assert(OpNum != -1);
4835
4836 const auto &Op = Inst.getOperand(OpNum);
4837 if (!hasFlatOffsets() && Op.getImm() != 0) {
4838 Error(getFlatOffsetLoc(Operands),
4839 "flat offset modifier is not supported on this GPU");
4840 return false;
4841 }
4842
4843 // For pre-GFX12 FLAT instructions the offset must be positive;
4844 // MSB is ignored and forced to zero.
4845 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4846 bool AllowNegative =
4848 isGFX12Plus();
4849 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4850 Error(getFlatOffsetLoc(Operands),
4851 Twine("expected a ") +
4852 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4853 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4854 return false;
4855 }
4856
4857 return true;
4858}
4859
4860SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4861 // Start with second operand because SMEM Offset cannot be dst or src0.
4862 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4863 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4864 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4865 return Op.getStartLoc();
4866 }
4867 return getLoc();
4868}
4869
4870bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4871 const OperandVector &Operands) {
4872 if (isCI() || isSI())
4873 return true;
4874
4875 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4876 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4877 return true;
4878
4879 auto Opcode = Inst.getOpcode();
4880 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4881 if (OpNum == -1)
4882 return true;
4883
4884 const auto &Op = Inst.getOperand(OpNum);
4885 if (!Op.isImm())
4886 return true;
4887
4888 uint64_t Offset = Op.getImm();
4889 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4892 return true;
4893
4894 Error(getSMEMOffsetLoc(Operands),
4895 isGFX12Plus() && IsBuffer
4896 ? "expected a 23-bit unsigned offset for buffer ops"
4897 : isGFX12Plus() ? "expected a 24-bit signed offset"
4898 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4899 : "expected a 21-bit signed offset");
4900
4901 return false;
4902}
4903
4904bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4905 const OperandVector &Operands) {
4906 unsigned Opcode = Inst.getOpcode();
4907 const MCInstrDesc &Desc = MII.get(Opcode);
4908 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4909 return true;
4910
4911 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4912 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4913
4914 const int OpIndices[] = { Src0Idx, Src1Idx };
4915
4916 unsigned NumExprs = 0;
4917 unsigned NumLiterals = 0;
4918 int64_t LiteralValue;
4919
4920 for (int OpIdx : OpIndices) {
4921 if (OpIdx == -1) break;
4922
4923 const MCOperand &MO = Inst.getOperand(OpIdx);
4924 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4926 bool IsLit = false;
4927 std::optional<int64_t> Imm;
4928 if (MO.isImm()) {
4929 Imm = MO.getImm();
4930 } else if (MO.isExpr()) {
4931 if (isLitExpr(MO.getExpr())) {
4932 IsLit = true;
4933 Imm = getLitValue(MO.getExpr());
4934 }
4935 } else {
4936 continue;
4937 }
4938
4939 if (!Imm.has_value()) {
4940 ++NumExprs;
4941 } else if (!isInlineConstant(Inst, OpIdx)) {
4942 auto OpType = static_cast<AMDGPU::OperandType>(
4943 Desc.operands()[OpIdx].OperandType);
4944 int64_t Value = encode32BitLiteral(*Imm, OpType, IsLit);
4945 if (NumLiterals == 0 || LiteralValue != Value) {
4947 ++NumLiterals;
4948 }
4949 }
4950 }
4951 }
4952
4953 if (NumLiterals + NumExprs <= 1)
4954 return true;
4955
4956 Error(getOperandLoc(Operands, Src1Idx),
4957 "only one unique literal operand is allowed");
4958 return false;
4959}
4960
4961bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4962 const unsigned Opc = Inst.getOpcode();
4963 if (isPermlane16(Opc)) {
4964 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4965 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4966
4967 if (OpSel & ~3)
4968 return false;
4969 }
4970
4971 uint64_t TSFlags = MII.get(Opc).TSFlags;
4972
4973 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4974 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4975 if (OpSelIdx != -1) {
4976 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4977 return false;
4978 }
4979 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4980 if (OpSelHiIdx != -1) {
4981 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4982 return false;
4983 }
4984 }
4985
4986 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4987 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4988 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4989 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4990 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4991 if (OpSel & 3)
4992 return false;
4993 }
4994
4995 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4996 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4997 // the first SGPR and use it for both the low and high operations.
4998 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4999 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
5000 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5001 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5002 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5003
5004 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
5005 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5006 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5007 unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5008
5009 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5010
5011 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
5012 unsigned Mask = 1U << Index;
5013 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
5014 };
5015
5016 if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
5017 !VerifyOneSGPR(/*Index=*/0))
5018 return false;
5019 if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
5020 !VerifyOneSGPR(/*Index=*/1))
5021 return false;
5022
5023 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
5024 if (Src2Idx != -1) {
5025 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
5026 if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
5027 !VerifyOneSGPR(/*Index=*/2))
5028 return false;
5029 }
5030 }
5031
5032 return true;
5033}
5034
5035bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
5036 if (!hasTrue16Insts())
5037 return true;
5038 const MCRegisterInfo *MRI = getMRI();
5039 const unsigned Opc = Inst.getOpcode();
5040 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5041 if (OpSelIdx == -1)
5042 return true;
5043 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
5044 // If the value is 0 we could have a default OpSel Operand, so conservatively
5045 // allow it.
5046 if (OpSelOpValue == 0)
5047 return true;
5048 unsigned OpCount = 0;
5049 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
5050 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
5051 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
5052 if (OpIdx == -1)
5053 continue;
5054 const MCOperand &Op = Inst.getOperand(OpIdx);
5055 if (Op.isReg() &&
5056 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
5057 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
5058 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
5059 if (OpSelOpIsHi != VGPRSuffixIsHi)
5060 return false;
5061 }
5062 ++OpCount;
5063 }
5064
5065 return true;
5066}
5067
5068bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
5069 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
5070
5071 const unsigned Opc = Inst.getOpcode();
5072 uint64_t TSFlags = MII.get(Opc).TSFlags;
5073
5074 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
5075 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
5076 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
5077 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
5078 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
5079 !(TSFlags & SIInstrFlags::IsSWMMAC))
5080 return true;
5081
5082 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
5083 if (NegIdx == -1)
5084 return true;
5085
5086 unsigned Neg = Inst.getOperand(NegIdx).getImm();
5087
5088 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
5089 // on some src operands but not allowed on other.
5090 // It is convenient that such instructions don't have src_modifiers operand
5091 // for src operands that don't allow neg because they also don't allow opsel.
5092
5093 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5094 AMDGPU::OpName::src1_modifiers,
5095 AMDGPU::OpName::src2_modifiers};
5096
5097 for (unsigned i = 0; i < 3; ++i) {
5098 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
5099 if (Neg & (1 << i))
5100 return false;
5101 }
5102 }
5103
5104 return true;
5105}
5106
5107bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
5108 const OperandVector &Operands) {
5109 const unsigned Opc = Inst.getOpcode();
5110 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
5111 if (DppCtrlIdx >= 0) {
5112 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
5113
5114 if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
5115 AMDGPU::isDPALU_DPP(MII.get(Opc), MII, getSTI())) {
5116 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5117 // only on GFX12.
5118 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5119 Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
5120 : "DP ALU dpp only supports row_newbcast");
5121 return false;
5122 }
5123 }
5124
5125 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
5126 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5127
5128 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
5129 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5130 if (Src1Idx >= 0) {
5131 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5132 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5133 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
5134 Error(getOperandLoc(Operands, Src1Idx),
5135 "invalid operand for instruction");
5136 return false;
5137 }
5138 if (Src1.isImm()) {
5139 Error(getInstLoc(Operands),
5140 "src1 immediate operand invalid for instruction");
5141 return false;
5142 }
5143 }
5144 }
5145
5146 return true;
5147}
5148
5149// Check if VCC register matches wavefront size
5150bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5151 return (Reg == AMDGPU::VCC && isWave64()) ||
5152 (Reg == AMDGPU::VCC_LO && isWave32());
5153}
5154
5155// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5156bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5157 const OperandVector &Operands) {
5158 unsigned Opcode = Inst.getOpcode();
5159 const MCInstrDesc &Desc = MII.get(Opcode);
5160 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5161 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5162 !HasMandatoryLiteral && !isVOPD(Opcode))
5163 return true;
5164
5165 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
5166
5167 std::optional<unsigned> LiteralOpIdx;
5168 std::optional<uint64_t> LiteralValue;
5169
5170 for (int OpIdx : OpIndices) {
5171 if (OpIdx == -1)
5172 continue;
5173
5174 const MCOperand &MO = Inst.getOperand(OpIdx);
5175 if (!MO.isImm() && !MO.isExpr())
5176 continue;
5177 if (!isSISrcOperand(Desc, OpIdx))
5178 continue;
5179
5180 std::optional<int64_t> Imm;
5181 if (MO.isImm())
5182 Imm = MO.getImm();
5183 else if (MO.isExpr() && isLitExpr(MO.getExpr()))
5184 Imm = getLitValue(MO.getExpr());
5185
5186 bool IsAnotherLiteral = false;
5187 bool IsForcedLit = findMCOperand(Operands, OpIdx).isForcedLit();
5188 bool IsForcedLit64 = findMCOperand(Operands, OpIdx).isForcedLit64();
5189 if (!Imm.has_value()) {
5190 // Literal value not known, so we conservately assume it's different.
5191 IsAnotherLiteral = true;
5192 } else if (IsForcedLit || IsForcedLit64 || !isInlineConstant(Inst, OpIdx)) {
5193 uint64_t Value = *Imm;
5194 bool IsForcedFP64 =
5195 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5197 HasMandatoryLiteral);
5198 unsigned OpTy = Desc.operands()[OpIdx].OperandType;
5199 bool IsFP64 =
5200 (IsForcedFP64 || (AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
5202 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5203 bool IsValid32Op =
5204 IsForcedLit || AMDGPU::isValid32BitLiteral(Value, IsFP64);
5205
5206 if (((!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5207 !IsForcedFP64) ||
5208 (IsForcedLit64 && !HasMandatoryLiteral)) &&
5209 (!has64BitLiterals() || Desc.getSize() != 4)) {
5210 Error(getOperandLoc(Operands, OpIdx),
5211 "invalid operand for instruction");
5212 return false;
5213 }
5214
5215 // Only src0 can use lit64 in VOP* encoding.
5216 if (!IsForcedFP64 && (IsForcedLit64 || !IsValid32Op) &&
5217 OpIdx != getNamedOperandIdx(Opcode, OpName::src0)) {
5218 Error(getOperandLoc(Operands, OpIdx),
5219 "invalid operand for instruction");
5220 return false;
5221 }
5222
5223 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5224 Value = Hi_32(Value);
5225
5226 IsAnotherLiteral = !LiteralValue || *LiteralValue != Value;
5228 }
5229
5230 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5231 !getFeatureBits()[FeatureVOP3Literal]) {
5232 Error(getOperandLoc(Operands, OpIdx),
5233 "literal operands are not supported");
5234 return false;
5235 }
5236
5237 if (LiteralOpIdx && IsAnotherLiteral) {
5238 Error(getLaterLoc(getOperandLoc(Operands, OpIdx),
5239 getOperandLoc(Operands, *LiteralOpIdx)),
5240 "only one unique literal operand is allowed");
5241 return false;
5242 }
5243
5244 if (IsAnotherLiteral)
5245 LiteralOpIdx = OpIdx;
5246 }
5247
5248 return true;
5249}
5250
5251// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5252static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5253 const MCRegisterInfo *MRI) {
5254 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5255 if (OpIdx < 0)
5256 return -1;
5257
5258 const MCOperand &Op = Inst.getOperand(OpIdx);
5259 if (!Op.isReg())
5260 return -1;
5261
5262 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5263 auto Reg = Sub ? Sub : Op.getReg();
5264 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5265 return AGPR32.contains(Reg) ? 1 : 0;
5266}
5267
5268bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5269 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5270 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5272 SIInstrFlags::DS)) == 0)
5273 return true;
5274
5275 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5276 ? AMDGPU::OpName::data0
5277 : AMDGPU::OpName::vdata;
5278
5279 const MCRegisterInfo *MRI = getMRI();
5280 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5281 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5282
5283 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5284 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5285 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5286 return false;
5287 }
5288
5289 auto FB = getFeatureBits();
5290 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5291 if (DataAreg < 0 || DstAreg < 0)
5292 return true;
5293 return DstAreg == DataAreg;
5294 }
5295
5296 return DstAreg < 1 && DataAreg < 1;
5297}
5298
5299bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5300 auto FB = getFeatureBits();
5301 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5302 return true;
5303
5304 unsigned Opc = Inst.getOpcode();
5305 const MCRegisterInfo *MRI = getMRI();
5306 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5307 // unaligned VGPR. All others only allow even aligned VGPRs.
5308 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5309 return true;
5310
5311 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5312 switch (Opc) {
5313 default:
5314 break;
5315 case AMDGPU::DS_LOAD_TR6_B96:
5316 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5317 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5318 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5319 return true;
5320 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5321 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5322 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5323 // allows unaligned VGPR for vdst, but other operands still only allow
5324 // even aligned VGPRs.
5325 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
5326 if (VAddrIdx != -1) {
5327 const MCOperand &Op = Inst.getOperand(VAddrIdx);
5328 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5329 if ((Sub - AMDGPU::VGPR0) & 1)
5330 return false;
5331 }
5332 return true;
5333 }
5334 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5335 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5336 return true;
5337 }
5338 }
5339
5340 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5341 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5342 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5343 const MCOperand &Op = Inst.getOperand(I);
5344 if (!Op.isReg())
5345 continue;
5346
5347 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5348 if (!Sub)
5349 continue;
5350
5351 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5352 return false;
5353 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5354 return false;
5355 }
5356
5357 return true;
5358}
5359
5360SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5361 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5362 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5363 if (Op.isBLGP())
5364 return Op.getStartLoc();
5365 }
5366 return SMLoc();
5367}
5368
5369bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5370 const OperandVector &Operands) {
5371 unsigned Opc = Inst.getOpcode();
5372 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5373 if (BlgpIdx == -1)
5374 return true;
5375 SMLoc BLGPLoc = getBLGPLoc(Operands);
5376 if (!BLGPLoc.isValid())
5377 return true;
5378 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5379 auto FB = getFeatureBits();
5380 bool UsesNeg = false;
5381 if (FB[AMDGPU::FeatureGFX940Insts]) {
5382 switch (Opc) {
5383 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5384 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5385 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5386 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5387 UsesNeg = true;
5388 }
5389 }
5390
5391 if (IsNeg == UsesNeg)
5392 return true;
5393
5394 Error(BLGPLoc,
5395 UsesNeg ? "invalid modifier: blgp is not supported"
5396 : "invalid modifier: neg is not supported");
5397
5398 return false;
5399}
5400
5401bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5402 const OperandVector &Operands) {
5403 if (!isGFX11Plus())
5404 return true;
5405
5406 unsigned Opc = Inst.getOpcode();
5407 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5408 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5409 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5410 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5411 return true;
5412
5413 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5414 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5415 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5416 if (Reg == AMDGPU::SGPR_NULL)
5417 return true;
5418
5419 Error(getOperandLoc(Operands, Src0Idx), "src0 must be null");
5420 return false;
5421}
5422
5423bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5424 const OperandVector &Operands) {
5425 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5426 if ((TSFlags & SIInstrFlags::DS) == 0)
5427 return true;
5428 if (TSFlags & SIInstrFlags::GWS)
5429 return validateGWS(Inst, Operands);
5430 // Only validate GDS for non-GWS instructions.
5431 if (hasGDS())
5432 return true;
5433 int GDSIdx =
5434 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5435 if (GDSIdx < 0)
5436 return true;
5437 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5438 if (GDS) {
5439 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5440 Error(S, "gds modifier is not supported on this GPU");
5441 return false;
5442 }
5443 return true;
5444}
5445
5446// gfx90a has an undocumented limitation:
5447// DS_GWS opcodes must use even aligned registers.
5448bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5449 const OperandVector &Operands) {
5450 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5451 return true;
5452
5453 int Opc = Inst.getOpcode();
5454 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5455 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5456 return true;
5457
5458 const MCRegisterInfo *MRI = getMRI();
5459 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5460 int Data0Pos =
5461 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5462 assert(Data0Pos != -1);
5463 auto Reg = Inst.getOperand(Data0Pos).getReg();
5464 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5465 if (RegIdx & 1) {
5466 Error(getOperandLoc(Operands, Data0Pos), "vgpr must be even aligned");
5467 return false;
5468 }
5469
5470 return true;
5471}
5472
5473bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5474 const OperandVector &Operands,
5475 SMLoc IDLoc) {
5476 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5477 AMDGPU::OpName::cpol);
5478 if (CPolPos == -1)
5479 return true;
5480
5481 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5482
5483 if (!isGFX1250Plus()) {
5484 if (CPol & CPol::SCAL) {
5485 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5486 StringRef CStr(S.getPointer());
5487 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5488 Error(S, "scale_offset is not supported on this GPU");
5489 }
5490 if (CPol & CPol::NV) {
5491 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5492 StringRef CStr(S.getPointer());
5493 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5494 Error(S, "nv is not supported on this GPU");
5495 }
5496 }
5497
5498 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
5499 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5500 StringRef CStr(S.getPointer());
5501 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5502 Error(S, "scale_offset is not supported for this instruction");
5503 }
5504
5505 if (isGFX12Plus())
5506 return validateTHAndScopeBits(Inst, Operands, CPol);
5507
5508 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5509 if (TSFlags & SIInstrFlags::SMRD) {
5510 if (CPol && (isSI() || isCI())) {
5511 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5512 Error(S, "cache policy is not supported for SMRD instructions");
5513 return false;
5514 }
5515 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5516 Error(IDLoc, "invalid cache policy for SMEM instruction");
5517 return false;
5518 }
5519 }
5520
5521 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5522 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5525 if (!(TSFlags & AllowSCCModifier)) {
5526 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5527 StringRef CStr(S.getPointer());
5528 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5529 Error(S,
5530 "scc modifier is not supported for this instruction on this GPU");
5531 return false;
5532 }
5533 }
5534
5536 return true;
5537
5538 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5539 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5540 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5541 : "instruction must use glc");
5542 return false;
5543 }
5544 } else {
5545 if (CPol & CPol::GLC) {
5546 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5547 StringRef CStr(S.getPointer());
5549 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5550 Error(S, isGFX940() ? "instruction must not use sc0"
5551 : "instruction must not use glc");
5552 return false;
5553 }
5554 }
5555
5556 return true;
5557}
5558
5559bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5560 const OperandVector &Operands,
5561 const unsigned CPol) {
5562 const unsigned TH = CPol & AMDGPU::CPol::TH;
5563 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5564
5565 const unsigned Opcode = Inst.getOpcode();
5566 const MCInstrDesc &TID = MII.get(Opcode);
5567
5568 auto PrintError = [&](StringRef Msg) {
5569 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5570 Error(S, Msg);
5571 return false;
5572 };
5573
5574 if ((TH & AMDGPU::CPol::TH_ATOMIC_RETURN) &&
5576 return PrintError("th:TH_ATOMIC_RETURN requires a destination operand");
5577
5578 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5581 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5582
5583 if (TH == 0)
5584 return true;
5585
5586 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5587 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5588 (TH == AMDGPU::CPol::TH_NT_HT)))
5589 return PrintError("invalid th value for SMEM instruction");
5590
5591 if (TH == AMDGPU::CPol::TH_BYPASS) {
5592 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5594 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5596 return PrintError("scope and th combination is not valid");
5597 }
5598
5599 unsigned THType = AMDGPU::getTemporalHintType(TID);
5600 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5601 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5602 return PrintError("invalid th value for atomic instructions");
5603 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5604 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5605 return PrintError("invalid th value for store instructions");
5606 } else {
5607 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5608 return PrintError("invalid th value for load instructions");
5609 }
5610
5611 return true;
5612}
5613
5614bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5615 const OperandVector &Operands) {
5616 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5617 if (Desc.mayStore() &&
5619 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5620 if (Loc != getInstLoc(Operands)) {
5621 Error(Loc, "TFE modifier has no meaning for store instructions");
5622 return false;
5623 }
5624 }
5625
5626 return true;
5627}
5628
5629bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5630 const OperandVector &Operands) {
5631 unsigned Opc = Inst.getOpcode();
5632 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5633 const MCInstrDesc &Desc = MII.get(Opc);
5634
5635 int AFmtIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
5636 if (AFmtIdx == -1)
5637 return true;
5638 unsigned AFmt = Inst.getOperand(AFmtIdx).getImm();
5639 int BFmtIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
5640 unsigned BFmt = Inst.getOperand(BFmtIdx).getImm();
5641
5642 auto validateFmt = [&](unsigned Fmt, AMDGPU::OpName SrcOp) -> bool {
5643 int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
5644 unsigned RegSize =
5645 TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[SrcIdx], HwMode))
5646 .getSizeInBits();
5647
5649 return true;
5650
5651 Error(getOperandLoc(Operands, SrcIdx),
5652 "wrong register tuple size for " +
5653 Twine(WMMAMods::ModMatrixFmt[Fmt]));
5654 return false;
5655 };
5656
5657 if (!validateFmt(AFmt, AMDGPU::OpName::src0) ||
5658 !validateFmt(BFmt, AMDGPU::OpName::src1))
5659 return false;
5660
5661 int AScaleIdx =
5662 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
5663 if (AScaleIdx == -1)
5664 return true;
5665 unsigned AScale = Inst.getOperand(AScaleIdx).getImm();
5666 int BScaleIdx =
5667 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
5668 unsigned BScale = Inst.getOperand(BScaleIdx).getImm();
5669 if (!isValidWMMAScaleFmtCombination(AFmt, AScale, BFmt, BScale)) {
5670 Error(getImmLoc(AMDGPUOperand::ImmTyMatrixAFMT, Operands),
5671 "invalid matrix and scale format combination");
5672 return false;
5673 }
5674
5675 return true;
5676}
5677
5678bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc,
5679 const OperandVector &Operands) {
5680 if (!validateLdsDirect(Inst, Operands))
5681 return false;
5682 if (!validateTrue16OpSel(Inst)) {
5683 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5684 "op_sel operand conflicts with 16-bit operand suffix");
5685 return false;
5686 }
5687 if (!validateSOPLiteral(Inst, Operands))
5688 return false;
5689 if (!validateVOPLiteral(Inst, Operands)) {
5690 return false;
5691 }
5692 if (!validateConstantBusLimitations(Inst, Operands)) {
5693 return false;
5694 }
5695 if (!validateVOPD(Inst, Operands)) {
5696 return false;
5697 }
5698 if (!validateIntClampSupported(Inst)) {
5699 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5700 "integer clamping is not supported on this GPU");
5701 return false;
5702 }
5703 if (!validateOpSel(Inst)) {
5704 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5705 "invalid op_sel operand");
5706 return false;
5707 }
5708 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5709 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5710 "invalid neg_lo operand");
5711 return false;
5712 }
5713 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5714 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5715 "invalid neg_hi operand");
5716 return false;
5717 }
5718 if (!validateDPP(Inst, Operands)) {
5719 return false;
5720 }
5721 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5722 if (!validateMIMGD16(Inst)) {
5723 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5724 "d16 modifier is not supported on this GPU");
5725 return false;
5726 }
5727 if (!validateMIMGDim(Inst, Operands)) {
5728 Error(IDLoc, "missing dim operand");
5729 return false;
5730 }
5731 if (!validateTensorR128(Inst)) {
5732 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5733 "instruction must set modifier r128=0");
5734 return false;
5735 }
5736 if (!validateMIMGMSAA(Inst)) {
5737 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5738 "invalid dim; must be MSAA type");
5739 return false;
5740 }
5741 if (!validateMIMGDataSize(Inst, IDLoc)) {
5742 return false;
5743 }
5744 if (!validateMIMGAddrSize(Inst, IDLoc))
5745 return false;
5746 if (!validateMIMGAtomicDMask(Inst)) {
5747 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5748 "invalid atomic image dmask");
5749 return false;
5750 }
5751 if (!validateMIMGGatherDMask(Inst)) {
5752 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5753 "invalid image_gather dmask: only one bit must be set");
5754 return false;
5755 }
5756 if (!validateMovrels(Inst, Operands)) {
5757 return false;
5758 }
5759 if (!validateOffset(Inst, Operands)) {
5760 return false;
5761 }
5762 if (!validateMAIAccWrite(Inst, Operands)) {
5763 return false;
5764 }
5765 if (!validateMAISrc2(Inst, Operands)) {
5766 return false;
5767 }
5768 if (!validateMFMA(Inst, Operands)) {
5769 return false;
5770 }
5771 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5772 return false;
5773 }
5774
5775 if (!validateAGPRLdSt(Inst)) {
5776 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5777 ? "invalid register class: data and dst should be all VGPR or AGPR"
5778 : "invalid register class: agpr loads and stores not supported on this GPU"
5779 );
5780 return false;
5781 }
5782 if (!validateVGPRAlign(Inst)) {
5783 Error(IDLoc,
5784 "invalid register class: vgpr tuples must be 64 bit aligned");
5785 return false;
5786 }
5787 if (!validateDS(Inst, Operands)) {
5788 return false;
5789 }
5790
5791 if (!validateBLGP(Inst, Operands)) {
5792 return false;
5793 }
5794
5795 if (!validateDivScale(Inst)) {
5796 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5797 return false;
5798 }
5799 if (!validateWaitCnt(Inst, Operands)) {
5800 return false;
5801 }
5802 if (!validateTFE(Inst, Operands)) {
5803 return false;
5804 }
5805 if (!validateWMMA(Inst, Operands)) {
5806 return false;
5807 }
5808
5809 return true;
5810}
5811
5813 const FeatureBitset &FBS,
5814 unsigned VariantID = 0);
5815
5816static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5817 const FeatureBitset &AvailableFeatures,
5818 unsigned VariantID);
5819
5820bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5821 const FeatureBitset &FBS) {
5822 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5823}
5824
5825bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5826 const FeatureBitset &FBS,
5827 ArrayRef<unsigned> Variants) {
5828 for (auto Variant : Variants) {
5829 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5830 return true;
5831 }
5832
5833 return false;
5834}
5835
5836bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5837 SMLoc IDLoc) {
5838 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5839
5840 // Check if requested instruction variant is supported.
5841 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5842 return false;
5843
5844 // This instruction is not supported.
5845 // Clear any other pending errors because they are no longer relevant.
5846 getParser().clearPendingErrors();
5847
5848 // Requested instruction variant is not supported.
5849 // Check if any other variants are supported.
5850 StringRef VariantName = getMatchedVariantName();
5851 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5852 return Error(IDLoc,
5853 Twine(VariantName,
5854 " variant of this instruction is not supported"));
5855 }
5856
5857 // Check if this instruction may be used with a different wavesize.
5858 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5859 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5860 // FIXME: Use getAvailableFeatures, and do not manually recompute
5861 FeatureBitset FeaturesWS32 = getFeatureBits();
5862 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5863 .flip(AMDGPU::FeatureWavefrontSize32);
5864 FeatureBitset AvailableFeaturesWS32 =
5865 ComputeAvailableFeatures(FeaturesWS32);
5866
5867 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5868 return Error(IDLoc, "instruction requires wavesize=32");
5869 }
5870
5871 // Finally check if this instruction is supported on any other GPU.
5872 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5873 return Error(IDLoc, "instruction not supported on this GPU (" +
5874 getSTI().getCPU() + ")" + ": " + Mnemo);
5875 }
5876
5877 // Instruction not supported on any GPU. Probably a typo.
5878 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5879 return Error(IDLoc, "invalid instruction" + Suggestion);
5880}
5881
5882static bool isInvalidVOPDY(const OperandVector &Operands,
5883 uint64_t InvalidOprIdx) {
5884 assert(InvalidOprIdx < Operands.size());
5885 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5886 if (Op.isToken() && InvalidOprIdx > 1) {
5887 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5888 return PrevOp.isToken() && PrevOp.getToken() == "::";
5889 }
5890 return false;
5891}
5892
5893bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5894 OperandVector &Operands,
5895 MCStreamer &Out,
5896 uint64_t &ErrorInfo,
5897 bool MatchingInlineAsm) {
5898 MCInst Inst;
5899 Inst.setLoc(IDLoc);
5900 unsigned Result = Match_Success;
5901 for (auto Variant : getMatchedVariants()) {
5902 uint64_t EI;
5903 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5904 Variant);
5905 // We order match statuses from least to most specific. We use most specific
5906 // status as resulting
5907 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5908 if (R == Match_Success || R == Match_MissingFeature ||
5909 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5910 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5911 Result != Match_MissingFeature)) {
5912 Result = R;
5913 ErrorInfo = EI;
5914 }
5915 if (R == Match_Success)
5916 break;
5917 }
5918
5919 if (Result == Match_Success) {
5920 if (!validateInstruction(Inst, IDLoc, Operands)) {
5921 return true;
5922 }
5923 Out.emitInstruction(Inst, getSTI());
5924 return false;
5925 }
5926
5927 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5928 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5929 return true;
5930 }
5931
5932 switch (Result) {
5933 default: break;
5934 case Match_MissingFeature:
5935 // It has been verified that the specified instruction
5936 // mnemonic is valid. A match was found but it requires
5937 // features which are not supported on this GPU.
5938 return Error(IDLoc, "operands are not valid for this GPU or mode");
5939
5940 case Match_InvalidOperand: {
5941 SMLoc ErrorLoc = IDLoc;
5942 if (ErrorInfo != ~0ULL) {
5943 if (ErrorInfo >= Operands.size()) {
5944 return Error(IDLoc, "too few operands for instruction");
5945 }
5946 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5947 if (ErrorLoc == SMLoc())
5948 ErrorLoc = IDLoc;
5949
5950 if (isInvalidVOPDY(Operands, ErrorInfo))
5951 return Error(ErrorLoc, "invalid VOPDY instruction");
5952 }
5953 return Error(ErrorLoc, "invalid operand for instruction");
5954 }
5955
5956 case Match_MnemonicFail:
5957 llvm_unreachable("Invalid instructions should have been handled already");
5958 }
5959 llvm_unreachable("Implement any new match types added!");
5960}
5961
5962bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5963 int64_t Tmp = -1;
5964 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5965 return true;
5966 }
5967 if (getParser().parseAbsoluteExpression(Tmp)) {
5968 return true;
5969 }
5970 Ret = static_cast<uint32_t>(Tmp);
5971 return false;
5972}
5973
5974bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5975 if (!getSTI().getTargetTriple().isAMDGCN())
5976 return TokError("directive only supported for amdgcn architecture");
5977
5978 std::string TargetIDDirective;
5979 SMLoc TargetStart = getTok().getLoc();
5980 if (getParser().parseEscapedString(TargetIDDirective))
5981 return true;
5982
5983 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5984 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5985 return getParser().Error(TargetRange.Start,
5986 (Twine(".amdgcn_target directive's target id ") +
5987 Twine(TargetIDDirective) +
5988 Twine(" does not match the specified target id ") +
5989 Twine(getTargetStreamer().getTargetID()->toString())).str());
5990
5991 return false;
5992}
5993
5994bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5995 return Error(Range.Start, "value out of range", Range);
5996}
5997
5998bool AMDGPUAsmParser::calculateGPRBlocks(
5999 const FeatureBitset &Features, const MCExpr *VCCUsed,
6000 const MCExpr *FlatScrUsed, bool XNACKUsed,
6001 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
6002 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
6003 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
6004 // TODO(scott.linder): These calculations are duplicated from
6005 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
6006 IsaVersion Version = getIsaVersion(getSTI().getCPU());
6007 MCContext &Ctx = getContext();
6008
6009 const MCExpr *NumSGPRs = NextFreeSGPR;
6010 int64_t EvaluatedSGPRs;
6011
6012 if (Version.Major >= 10)
6014 else {
6015 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(getSTI());
6016
6017 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
6018 !Features.test(FeatureSGPRInitBug) &&
6019 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
6020 return OutOfRangeError(SGPRRange);
6021
6022 const MCExpr *ExtraSGPRs =
6023 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
6024 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
6025
6026 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
6027 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
6028 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
6029 return OutOfRangeError(SGPRRange);
6030
6031 if (Features.test(FeatureSGPRInitBug))
6032 NumSGPRs =
6034 }
6035
6036 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
6037 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
6038 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
6039 unsigned Granule) -> const MCExpr * {
6040 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
6041 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
6042 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
6043 const MCExpr *AlignToGPR =
6044 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
6045 const MCExpr *DivGPR =
6046 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
6047 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
6048 return SubGPR;
6049 };
6050
6051 VGPRBlocks = GetNumGPRBlocks(
6052 NextFreeVGPR,
6053 IsaInfo::getVGPREncodingGranule(getSTI(), EnableWavefrontSize32));
6054 SGPRBlocks =
6055 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(getSTI()));
6056
6057 return false;
6058}
6059
6060bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
6061 if (!getSTI().getTargetTriple().isAMDGCN())
6062 return TokError("directive only supported for amdgcn architecture");
6063
6064 if (!isHsaAbi(getSTI()))
6065 return TokError("directive only supported for amdhsa OS");
6066
6067 StringRef KernelName;
6068 if (getParser().parseIdentifier(KernelName))
6069 return true;
6070
6071 AMDGPU::MCKernelDescriptor KD =
6073 &getSTI(), getContext());
6074
6075 StringSet<> Seen;
6076
6077 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
6078
6079 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
6080 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
6081
6082 SMRange VGPRRange;
6083 const MCExpr *NextFreeVGPR = ZeroExpr;
6084 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
6085 const MCExpr *NamedBarCnt = ZeroExpr;
6086 uint64_t SharedVGPRCount = 0;
6087 uint64_t PreloadLength = 0;
6088 uint64_t PreloadOffset = 0;
6089 SMRange SGPRRange;
6090 const MCExpr *NextFreeSGPR = ZeroExpr;
6091
6092 // Count the number of user SGPRs implied from the enabled feature bits.
6093 unsigned ImpliedUserSGPRCount = 0;
6094
6095 // Track if the asm explicitly contains the directive for the user SGPR
6096 // count.
6097 std::optional<unsigned> ExplicitUserSGPRCount;
6098 const MCExpr *ReserveVCC = OneExpr;
6099 const MCExpr *ReserveFlatScr = OneExpr;
6100 std::optional<bool> EnableWavefrontSize32;
6101
6102 while (true) {
6103 while (trySkipToken(AsmToken::EndOfStatement));
6104
6105 StringRef ID;
6106 SMRange IDRange = getTok().getLocRange();
6107 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
6108 return true;
6109
6110 if (ID == ".end_amdhsa_kernel")
6111 break;
6112
6113 if (!Seen.insert(ID).second)
6114 return TokError(".amdhsa_ directives cannot be repeated");
6115
6116 SMLoc ValStart = getLoc();
6117 const MCExpr *ExprVal;
6118 if (getParser().parseExpression(ExprVal))
6119 return true;
6120 SMLoc ValEnd = getLoc();
6121 SMRange ValRange = SMRange(ValStart, ValEnd);
6122
6123 int64_t IVal = 0;
6124 uint64_t Val = IVal;
6125 bool EvaluatableExpr;
6126 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6127 if (IVal < 0)
6128 return OutOfRangeError(ValRange);
6129 Val = IVal;
6130 }
6131
6132#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6133 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6134 return OutOfRangeError(RANGE); \
6135 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6136 getContext());
6137
6138// Some fields use the parsed value immediately which requires the expression to
6139// be solvable.
6140#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6141 if (!(RESOLVED)) \
6142 return Error(IDRange.Start, "directive should have resolvable expression", \
6143 IDRange);
6144
6145 if (ID == ".amdhsa_group_segment_fixed_size") {
6147 CHAR_BIT>(Val))
6148 return OutOfRangeError(ValRange);
6149 KD.group_segment_fixed_size = ExprVal;
6150 } else if (ID == ".amdhsa_private_segment_fixed_size") {
6152 CHAR_BIT>(Val))
6153 return OutOfRangeError(ValRange);
6154 KD.private_segment_fixed_size = ExprVal;
6155 } else if (ID == ".amdhsa_kernarg_size") {
6156 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
6157 return OutOfRangeError(ValRange);
6158 KD.kernarg_size = ExprVal;
6159 } else if (ID == ".amdhsa_user_sgpr_count") {
6160 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6161 ExplicitUserSGPRCount = Val;
6162 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6163 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6165 return Error(IDRange.Start,
6166 "directive is not supported with architected flat scratch",
6167 IDRange);
6169 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6170 ExprVal, ValRange);
6171 if (Val)
6172 ImpliedUserSGPRCount += 4;
6173 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6174 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6175 if (!hasKernargPreload())
6176 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6177
6178 if (Val > getMaxNumUserSGPRs())
6179 return OutOfRangeError(ValRange);
6180 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6181 ValRange);
6182 if (Val) {
6183 ImpliedUserSGPRCount += Val;
6184 PreloadLength = Val;
6185 }
6186 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6187 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6188 if (!hasKernargPreload())
6189 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6190
6191 if (Val >= 1024)
6192 return OutOfRangeError(ValRange);
6193 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6194 ValRange);
6195 if (Val)
6196 PreloadOffset = Val;
6197 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6198 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6200 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6201 ValRange);
6202 if (Val)
6203 ImpliedUserSGPRCount += 2;
6204 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6205 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6207 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6208 ValRange);
6209 if (Val)
6210 ImpliedUserSGPRCount += 2;
6211 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6212 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6214 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6215 ExprVal, ValRange);
6216 if (Val)
6217 ImpliedUserSGPRCount += 2;
6218 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6219 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6221 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6222 ValRange);
6223 if (Val)
6224 ImpliedUserSGPRCount += 2;
6225 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6227 return Error(IDRange.Start,
6228 "directive is not supported with architected flat scratch",
6229 IDRange);
6230 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6232 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6233 ExprVal, ValRange);
6234 if (Val)
6235 ImpliedUserSGPRCount += 2;
6236 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6237 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6239 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6240 ExprVal, ValRange);
6241 if (Val)
6242 ImpliedUserSGPRCount += 1;
6243 } else if (ID == ".amdhsa_wavefront_size32") {
6244 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6245 if (IVersion.Major < 10)
6246 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6247 EnableWavefrontSize32 = Val;
6249 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6250 ValRange);
6251 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6253 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6254 ValRange);
6255 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6257 return Error(IDRange.Start,
6258 "directive is not supported with architected flat scratch",
6259 IDRange);
6261 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6262 ValRange);
6263 } else if (ID == ".amdhsa_enable_private_segment") {
6265 return Error(
6266 IDRange.Start,
6267 "directive is not supported without architected flat scratch",
6268 IDRange);
6270 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6271 ValRange);
6272 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6274 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6275 ValRange);
6276 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6278 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6279 ValRange);
6280 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6282 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6283 ValRange);
6284 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6286 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6287 ValRange);
6288 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6290 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6291 ValRange);
6292 } else if (ID == ".amdhsa_next_free_vgpr") {
6293 VGPRRange = ValRange;
6294 NextFreeVGPR = ExprVal;
6295 } else if (ID == ".amdhsa_next_free_sgpr") {
6296 SGPRRange = ValRange;
6297 NextFreeSGPR = ExprVal;
6298 } else if (ID == ".amdhsa_accum_offset") {
6299 if (!isGFX90A())
6300 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6301 AccumOffset = ExprVal;
6302 } else if (ID == ".amdhsa_named_barrier_count") {
6303 if (!isGFX1250Plus())
6304 return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
6305 NamedBarCnt = ExprVal;
6306 } else if (ID == ".amdhsa_reserve_vcc") {
6307 if (EvaluatableExpr && !isUInt<1>(Val))
6308 return OutOfRangeError(ValRange);
6309 ReserveVCC = ExprVal;
6310 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6311 if (IVersion.Major < 7)
6312 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6314 return Error(IDRange.Start,
6315 "directive is not supported with architected flat scratch",
6316 IDRange);
6317 if (EvaluatableExpr && !isUInt<1>(Val))
6318 return OutOfRangeError(ValRange);
6319 ReserveFlatScr = ExprVal;
6320 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6321 if (IVersion.Major < 8)
6322 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6323 if (!isUInt<1>(Val))
6324 return OutOfRangeError(ValRange);
6325 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6326 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6327 IDRange);
6328 } else if (ID == ".amdhsa_float_round_mode_32") {
6330 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6331 ValRange);
6332 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6334 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6335 ValRange);
6336 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6338 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6339 ValRange);
6340 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6342 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6343 ValRange);
6344 } else if (ID == ".amdhsa_dx10_clamp") {
6345 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6346 return Error(IDRange.Start, "directive unsupported on gfx1170+",
6347 IDRange);
6349 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6350 ValRange);
6351 } else if (ID == ".amdhsa_ieee_mode") {
6352 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6353 return Error(IDRange.Start, "directive unsupported on gfx1170+",
6354 IDRange);
6356 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6357 ValRange);
6358 } else if (ID == ".amdhsa_fp16_overflow") {
6359 if (IVersion.Major < 9)
6360 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6362 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6363 ValRange);
6364 } else if (ID == ".amdhsa_tg_split") {
6365 if (!isGFX90A())
6366 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6367 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6368 ExprVal, ValRange);
6369 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6370 if (!supportsWGP(getSTI()))
6371 return Error(IDRange.Start,
6372 "directive unsupported on " + getSTI().getCPU(), IDRange);
6374 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6375 ValRange);
6376 } else if (ID == ".amdhsa_memory_ordered") {
6377 if (IVersion.Major < 10)
6378 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6380 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6381 ValRange);
6382 } else if (ID == ".amdhsa_forward_progress") {
6383 if (IVersion.Major < 10)
6384 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6386 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6387 ValRange);
6388 } else if (ID == ".amdhsa_shared_vgpr_count") {
6389 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6390 if (IVersion.Major < 10 || IVersion.Major >= 12)
6391 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6392 IDRange);
6393 SharedVGPRCount = Val;
6395 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6396 ValRange);
6397 } else if (ID == ".amdhsa_inst_pref_size") {
6398 if (IVersion.Major < 11)
6399 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6400 if (IVersion.Major == 11) {
6402 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6403 ValRange);
6404 } else {
6406 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6407 ValRange);
6408 }
6409 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6412 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6413 ExprVal, ValRange);
6414 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6416 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6417 ExprVal, ValRange);
6418 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6421 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6422 ExprVal, ValRange);
6423 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6425 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6426 ExprVal, ValRange);
6427 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6429 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6430 ExprVal, ValRange);
6431 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6433 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6434 ExprVal, ValRange);
6435 } else if (ID == ".amdhsa_exception_int_div_zero") {
6437 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6438 ExprVal, ValRange);
6439 } else if (ID == ".amdhsa_round_robin_scheduling") {
6440 if (IVersion.Major < 12)
6441 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6443 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6444 ValRange);
6445 } else {
6446 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6447 }
6448
6449#undef PARSE_BITS_ENTRY
6450 }
6451
6452 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6453 return TokError(".amdhsa_next_free_vgpr directive is required");
6454
6455 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6456 return TokError(".amdhsa_next_free_sgpr directive is required");
6457
6458 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6459 if (UserSGPRCount > getMaxNumUserSGPRs())
6460 return TokError("too many user SGPRs enabled, found " +
6461 Twine(UserSGPRCount) + ", but only " +
6462 Twine(getMaxNumUserSGPRs()) + " are supported.");
6463
6464 // Consider the case where the total number of UserSGPRs with trailing
6465 // allocated preload SGPRs, is greater than the number of explicitly
6466 // referenced SGPRs.
6467 if (PreloadLength) {
6468 MCContext &Ctx = getContext();
6469 NextFreeSGPR = AMDGPUMCExpr::createMax(
6470 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6471 }
6472
6473 const MCExpr *VGPRBlocks;
6474 const MCExpr *SGPRBlocks;
6475 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6476 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6477 EnableWavefrontSize32, NextFreeVGPR,
6478 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6479 SGPRBlocks))
6480 return true;
6481
6482 int64_t EvaluatedVGPRBlocks;
6483 bool VGPRBlocksEvaluatable =
6484 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6485 if (VGPRBlocksEvaluatable &&
6487 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6488 return OutOfRangeError(VGPRRange);
6489 }
6491 KD.compute_pgm_rsrc1, VGPRBlocks,
6492 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6493 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6494
6495 int64_t EvaluatedSGPRBlocks;
6496 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6498 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6499 return OutOfRangeError(SGPRRange);
6501 KD.compute_pgm_rsrc1, SGPRBlocks,
6502 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6503 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6504
6505 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6506 return TokError("amdgpu_user_sgpr_count smaller than implied by "
6507 "enabled user SGPRs");
6508
6509 if (isGFX1250Plus()) {
6512 MCConstantExpr::create(UserSGPRCount, getContext()),
6513 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6514 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6515 } else {
6518 MCConstantExpr::create(UserSGPRCount, getContext()),
6519 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6520 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6521 }
6522
6523 int64_t IVal = 0;
6524 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6525 return TokError("Kernarg size should be resolvable");
6526 uint64_t kernarg_size = IVal;
6527 if (PreloadLength && kernarg_size &&
6528 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6529 return TokError("Kernarg preload length + offset is larger than the "
6530 "kernarg segment size");
6531
6532 if (isGFX90A()) {
6533 if (!Seen.contains(".amdhsa_accum_offset"))
6534 return TokError(".amdhsa_accum_offset directive is required");
6535 int64_t EvaluatedAccum;
6536 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6537 uint64_t UEvaluatedAccum = EvaluatedAccum;
6538 if (AccumEvaluatable &&
6539 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6540 return TokError("accum_offset should be in range [4..256] in "
6541 "increments of 4");
6542
6543 int64_t EvaluatedNumVGPR;
6544 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6545 AccumEvaluatable &&
6546 UEvaluatedAccum >
6547 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6548 return TokError("accum_offset exceeds total VGPR allocation");
6549 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6551 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6554 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6555 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6556 getContext());
6557 }
6558
6559 if (isGFX1250Plus())
6561 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6562 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6563 getContext());
6564
6565 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6566 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6567 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6568 return TokError("shared_vgpr_count directive not valid on "
6569 "wavefront size 32");
6570 }
6571
6572 if (VGPRBlocksEvaluatable &&
6573 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6574 63)) {
6575 return TokError("shared_vgpr_count*2 + "
6576 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6577 "exceed 63\n");
6578 }
6579 }
6580
6581 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6582 NextFreeVGPR, NextFreeSGPR,
6583 ReserveVCC, ReserveFlatScr);
6584 return false;
6585}
6586
6587bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6588 uint32_t Version;
6589 if (ParseAsAbsoluteExpression(Version))
6590 return true;
6591
6592 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6593 return false;
6594}
6595
6596bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6597 AMDGPUMCKernelCodeT &C) {
6598 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6599 // assembly for backwards compatibility.
6600 if (ID == "max_scratch_backing_memory_byte_size") {
6601 Parser.eatToEndOfStatement();
6602 return false;
6603 }
6604
6605 SmallString<40> ErrStr;
6606 raw_svector_ostream Err(ErrStr);
6607 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6608 return TokError(Err.str());
6609 }
6610 Lex();
6611
6612 if (ID == "enable_wavefront_size32") {
6613 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6614 if (!isGFX10Plus())
6615 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6616 if (!isWave32())
6617 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6618 } else {
6619 if (!isWave64())
6620 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6621 }
6622 }
6623
6624 if (ID == "wavefront_size") {
6625 if (C.wavefront_size == 5) {
6626 if (!isGFX10Plus())
6627 return TokError("wavefront_size=5 is only allowed on GFX10+");
6628 if (!isWave32())
6629 return TokError("wavefront_size=5 requires +WavefrontSize32");
6630 } else if (C.wavefront_size == 6) {
6631 if (!isWave64())
6632 return TokError("wavefront_size=6 requires +WavefrontSize64");
6633 }
6634 }
6635
6636 return false;
6637}
6638
6639bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6640 AMDGPUMCKernelCodeT KernelCode;
6641 KernelCode.initDefault(getSTI(), getContext());
6642
6643 while (true) {
6644 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6645 // will set the current token to EndOfStatement.
6646 while(trySkipToken(AsmToken::EndOfStatement));
6647
6648 StringRef ID;
6649 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6650 return true;
6651
6652 if (ID == ".end_amd_kernel_code_t")
6653 break;
6654
6655 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6656 return true;
6657 }
6658
6659 KernelCode.validate(&getSTI(), getContext());
6660 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6661
6662 return false;
6663}
6664
6665bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6666 StringRef KernelName;
6667 if (!parseId(KernelName, "expected symbol name"))
6668 return true;
6669
6670 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6672
6673 KernelScope.initialize(getContext());
6674 return false;
6675}
6676
6677bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6678 if (!getSTI().getTargetTriple().isAMDGCN()) {
6679 return Error(getLoc(),
6680 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6681 "architectures");
6682 }
6683
6684 auto TargetIDDirective = getLexer().getTok().getStringContents();
6685 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6686 return Error(getParser().getTok().getLoc(), "target id must match options");
6687
6688 getTargetStreamer().EmitISAVersion();
6689 Lex();
6690
6691 return false;
6692}
6693
6694bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6695 assert(isHsaAbi(getSTI()));
6696
6697 std::string HSAMetadataString;
6698 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6699 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6700 return true;
6701
6702 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6703 return Error(getLoc(), "invalid HSA metadata");
6704
6705 return false;
6706}
6707
6708/// Common code to parse out a block of text (typically YAML) between start and
6709/// end directives.
6710bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6711 const char *AssemblerDirectiveEnd,
6712 std::string &CollectString) {
6713
6714 raw_string_ostream CollectStream(CollectString);
6715
6716 getLexer().setSkipSpace(false);
6717
6718 bool FoundEnd = false;
6719 while (!isToken(AsmToken::Eof)) {
6720 while (isToken(AsmToken::Space)) {
6721 CollectStream << getTokenStr();
6722 Lex();
6723 }
6724
6725 if (trySkipId(AssemblerDirectiveEnd)) {
6726 FoundEnd = true;
6727 break;
6728 }
6729
6730 CollectStream << Parser.parseStringToEndOfStatement()
6731 << getContext().getAsmInfo().getSeparatorString();
6732
6733 Parser.eatToEndOfStatement();
6734 }
6735
6736 getLexer().setSkipSpace(true);
6737
6738 if (isToken(AsmToken::Eof) && !FoundEnd) {
6739 return TokError(Twine("expected directive ") +
6740 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6741 }
6742
6743 return false;
6744}
6745
6746/// Parse the assembler directive for new MsgPack-format PAL metadata.
6747bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6748 std::string String;
6749 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6751 return true;
6752
6753 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6754 if (!PALMetadata->setFromString(String))
6755 return Error(getLoc(), "invalid PAL metadata");
6756 return false;
6757}
6758
6759/// Parse the assembler directive for old linear-format PAL metadata.
6760bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6761 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6762 return Error(getLoc(),
6763 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6764 "not available on non-amdpal OSes")).str());
6765 }
6766
6767 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6768 PALMetadata->setLegacy();
6769 for (;;) {
6770 uint32_t Key, Value;
6771 if (ParseAsAbsoluteExpression(Key)) {
6772 return TokError(Twine("invalid value in ") +
6774 }
6775 if (!trySkipToken(AsmToken::Comma)) {
6776 return TokError(Twine("expected an even number of values in ") +
6778 }
6779 if (ParseAsAbsoluteExpression(Value)) {
6780 return TokError(Twine("invalid value in ") +
6782 }
6783 PALMetadata->setRegister(Key, Value);
6784 if (!trySkipToken(AsmToken::Comma))
6785 break;
6786 }
6787 return false;
6788}
6789
6790/// ParseDirectiveAMDGPULDS
6791/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6792bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6793 if (getParser().checkForValidSection())
6794 return true;
6795
6796 StringRef Name;
6797 SMLoc NameLoc = getLoc();
6798 if (getParser().parseIdentifier(Name))
6799 return TokError("expected identifier in directive");
6800
6801 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6802 if (getParser().parseComma())
6803 return true;
6804
6805 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(getSTI());
6806
6807 int64_t Size;
6808 SMLoc SizeLoc = getLoc();
6809 if (getParser().parseAbsoluteExpression(Size))
6810 return true;
6811 if (Size < 0)
6812 return Error(SizeLoc, "size must be non-negative");
6813 if (Size > LocalMemorySize)
6814 return Error(SizeLoc, "size is too large");
6815
6816 int64_t Alignment = 4;
6817 if (trySkipToken(AsmToken::Comma)) {
6818 SMLoc AlignLoc = getLoc();
6819 if (getParser().parseAbsoluteExpression(Alignment))
6820 return true;
6821 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6822 return Error(AlignLoc, "alignment must be a power of two");
6823
6824 // Alignment larger than the size of LDS is possible in theory, as long
6825 // as the linker manages to place to symbol at address 0, but we do want
6826 // to make sure the alignment fits nicely into a 32-bit integer.
6827 if (Alignment >= 1u << 31)
6828 return Error(AlignLoc, "alignment is too large");
6829 }
6830
6831 if (parseEOL())
6832 return true;
6833
6834 Symbol->redefineIfPossible();
6835 if (!Symbol->isUndefined())
6836 return Error(NameLoc, "invalid symbol redefinition");
6837
6838 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6839 return false;
6840}
6841
6842bool AMDGPUAsmParser::ParseDirectiveAMDGPUInfo() {
6843 if (getParser().checkForValidSection())
6844 return true;
6845
6846 StringRef FuncName;
6847 if (getParser().parseIdentifier(FuncName))
6848 return TokError("expected symbol name after .amdgpu_info");
6849
6850 MCSymbol *FuncSym = getContext().getOrCreateSymbol(FuncName);
6851 AMDGPU::InfoSectionData ParsedInfoData;
6852 AMDGPU::FuncInfo FI;
6853 FI.Sym = FuncSym;
6854 bool HasScalarAttrs = false;
6855
6856 while (true) {
6857 while (trySkipToken(AsmToken::EndOfStatement))
6858 ;
6859
6860 StringRef ID;
6861 SMLoc IDLoc = getLoc();
6862 if (!parseId(ID, "expected directive or .end_amdgpu_info"))
6863 return true;
6864
6865 if (ID == ".end_amdgpu_info")
6866 break;
6867
6868 // Every per-entry directive shares the `.amdgpu_` namespace prefix; strip
6869 // it once and dispatch on the distinguishing suffix below. The unstripped
6870 // ID is preserved for diagnostics.
6871 StringRef Dir = ID;
6872 if (!Dir.consume_front(".amdgpu_"))
6873 return Error(IDLoc, "unknown .amdgpu_info directive '" + ID + "'");
6874
6875 if (Dir == "flags") {
6876 int64_t Val;
6877 if (getParser().parseAbsoluteExpression(Val))
6878 return true;
6879 auto Flags = static_cast<AMDGPU::FuncInfoFlags>(Val);
6880 FI.UsesVCC = !!(Flags & AMDGPU::FuncInfoFlags::FUNC_USES_VCC);
6881 FI.UsesFlatScratch =
6882 !!(Flags & AMDGPU::FuncInfoFlags::FUNC_USES_FLAT_SCRATCH);
6883 FI.HasDynStack = !!(Flags & AMDGPU::FuncInfoFlags::FUNC_HAS_DYN_STACK);
6884 HasScalarAttrs = true;
6885 } else if (Dir == "num_sgpr") {
6886 int64_t Val;
6887 if (getParser().parseAbsoluteExpression(Val))
6888 return true;
6889 FI.NumSGPR = static_cast<uint32_t>(Val);
6890 HasScalarAttrs = true;
6891 } else if (Dir == "num_vgpr") {
6892 int64_t Val;
6893 if (getParser().parseAbsoluteExpression(Val))
6894 return true;
6895 FI.NumArchVGPR = static_cast<uint32_t>(Val);
6896 HasScalarAttrs = true;
6897 } else if (Dir == "num_agpr") {
6898 int64_t Val;
6899 if (getParser().parseAbsoluteExpression(Val))
6900 return true;
6901 FI.NumAccVGPR = static_cast<uint32_t>(Val);
6902 HasScalarAttrs = true;
6903 } else if (Dir == "private_segment_size") {
6904 int64_t Val;
6905 if (getParser().parseAbsoluteExpression(Val))
6906 return true;
6907 FI.PrivateSegmentSize = static_cast<uint32_t>(Val);
6908 HasScalarAttrs = true;
6909 } else if (Dir == "use") {
6910 StringRef ResName;
6911 if (getParser().parseIdentifier(ResName))
6912 return TokError("expected resource symbol for .amdgpu_use");
6913 ParsedInfoData.Uses.push_back(
6914 {FuncSym, getContext().getOrCreateSymbol(ResName)});
6915 } else if (Dir == "call") {
6916 StringRef DstName;
6917 if (getParser().parseIdentifier(DstName))
6918 return TokError("expected callee symbol for .amdgpu_call");
6919 ParsedInfoData.Calls.push_back(
6920 {FuncSym, getContext().getOrCreateSymbol(DstName)});
6921 } else if (Dir == "indirect_call") {
6922 std::string TypeId;
6923 if (getParser().parseEscapedString(TypeId))
6924 return TokError("expected type ID string for .amdgpu_indirect_call");
6925 ParsedInfoData.IndirectCalls.push_back({FuncSym, std::move(TypeId)});
6926 } else if (Dir == "typeid") {
6927 std::string TypeId;
6928 if (getParser().parseEscapedString(TypeId))
6929 return TokError("expected type ID string for .amdgpu_typeid");
6930 ParsedInfoData.TypeIds.push_back({FuncSym, std::move(TypeId)});
6931 } else {
6932 return Error(IDLoc, "unknown .amdgpu_info directive '" + ID + "'");
6933 }
6934 }
6935
6936 if (HasScalarAttrs)
6937 ParsedInfoData.Funcs.push_back(std::move(FI));
6938
6939 AMDGPU::InfoSectionData &Data = InfoData ? *InfoData : InfoData.emplace();
6940 for (AMDGPU::FuncInfo &Func : ParsedInfoData.Funcs)
6941 Data.Funcs.push_back(std::move(Func));
6942 for (std::pair<MCSymbol *, MCSymbol *> &Use : ParsedInfoData.Uses)
6943 Data.Uses.push_back(Use);
6944 for (std::pair<MCSymbol *, MCSymbol *> &Call : ParsedInfoData.Calls)
6945 Data.Calls.push_back(Call);
6946 for (std::pair<MCSymbol *, std::string> &IndirectCall :
6947 ParsedInfoData.IndirectCalls)
6948 Data.IndirectCalls.push_back(std::move(IndirectCall));
6949 for (std::pair<MCSymbol *, std::string> &TypeId : ParsedInfoData.TypeIds)
6950 Data.TypeIds.push_back(std::move(TypeId));
6951
6952 return false;
6953}
6954
6955void AMDGPUAsmParser::onEndOfFile() {
6956 if (InfoData)
6957 getTargetStreamer().emitAMDGPUInfo(*InfoData);
6958}
6959
6960bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6961 StringRef IDVal = DirectiveID.getString();
6962
6963 if (isHsaAbi(getSTI())) {
6964 if (IDVal == ".amdhsa_kernel")
6965 return ParseDirectiveAMDHSAKernel();
6966
6967 if (IDVal == ".amdhsa_code_object_version")
6968 return ParseDirectiveAMDHSACodeObjectVersion();
6969
6970 // TODO: Restructure/combine with PAL metadata directive.
6972 return ParseDirectiveHSAMetadata();
6973 } else {
6974 if (IDVal == ".amd_kernel_code_t")
6975 return ParseDirectiveAMDKernelCodeT();
6976
6977 if (IDVal == ".amdgpu_hsa_kernel")
6978 return ParseDirectiveAMDGPUHsaKernel();
6979
6980 if (IDVal == ".amd_amdgpu_isa")
6981 return ParseDirectiveISAVersion();
6982
6984 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6985 Twine(" directive is "
6986 "not available on non-amdhsa OSes"))
6987 .str());
6988 }
6989 }
6990
6991 if (IDVal == ".amdgcn_target")
6992 return ParseDirectiveAMDGCNTarget();
6993
6994 if (IDVal == ".amdgpu_lds")
6995 return ParseDirectiveAMDGPULDS();
6996
6997 if (IDVal == ".amdgpu_info")
6998 return ParseDirectiveAMDGPUInfo();
6999
7000 if (IDVal == PALMD::AssemblerDirectiveBegin)
7001 return ParseDirectivePALMetadataBegin();
7002
7003 if (IDVal == PALMD::AssemblerDirective)
7004 return ParseDirectivePALMetadata();
7005
7006 return true;
7007}
7008
7009bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
7010 MCRegister Reg) {
7011 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
7012 return isGFX9Plus();
7013
7014 // GFX10+ has 2 more SGPRs 104 and 105.
7015 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
7016 return hasSGPR104_SGPR105();
7017
7018 switch (Reg.id()) {
7019 case SRC_SHARED_BASE_LO:
7020 case SRC_SHARED_BASE:
7021 case SRC_SHARED_LIMIT_LO:
7022 case SRC_SHARED_LIMIT:
7023 case SRC_PRIVATE_BASE_LO:
7024 case SRC_PRIVATE_BASE:
7025 case SRC_PRIVATE_LIMIT_LO:
7026 case SRC_PRIVATE_LIMIT:
7027 return isGFX9Plus();
7028 case SRC_FLAT_SCRATCH_BASE_LO:
7029 case SRC_FLAT_SCRATCH_BASE_HI:
7030 return hasGloballyAddressableScratch();
7031 case SRC_POPS_EXITING_WAVE_ID:
7032 return isGFX9Plus() && !isGFX11Plus();
7033 case TBA:
7034 case TBA_LO:
7035 case TBA_HI:
7036 case TMA:
7037 case TMA_LO:
7038 case TMA_HI:
7039 return !isGFX9Plus();
7040 case XNACK_MASK:
7041 case XNACK_MASK_LO:
7042 case XNACK_MASK_HI:
7043 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
7044 case SGPR_NULL:
7045 return isGFX10Plus();
7046 case SRC_EXECZ:
7047 case SRC_VCCZ:
7048 return !isGFX11Plus();
7049 default:
7050 break;
7051 }
7052
7053 if (isCI())
7054 return true;
7055
7056 if (isSI() || isGFX10Plus()) {
7057 // No flat_scr on SI.
7058 // On GFX10Plus flat scratch is not a valid register operand and can only be
7059 // accessed with s_setreg/s_getreg.
7060 switch (Reg.id()) {
7061 case FLAT_SCR:
7062 case FLAT_SCR_LO:
7063 case FLAT_SCR_HI:
7064 return false;
7065 default:
7066 return true;
7067 }
7068 }
7069
7070 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
7071 // SI/CI have.
7072 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
7073 return hasSGPR102_SGPR103();
7074
7075 return true;
7076}
7077
7078ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
7079 StringRef Mnemonic,
7080 OperandMode Mode) {
7081 ParseStatus Res = parseVOPD(Operands);
7082 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
7083 return Res;
7084
7085 // Try to parse with a custom parser
7086 Res = MatchOperandParserImpl(Operands, Mnemonic);
7087
7088 // If we successfully parsed the operand or if there as an error parsing,
7089 // we are done.
7090 //
7091 // If we are parsing after we reach EndOfStatement then this means we
7092 // are appending default values to the Operands list. This is only done
7093 // by custom parser, so we shouldn't continue on to the generic parsing.
7094 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
7095 return Res;
7096
7097 SMLoc RBraceLoc;
7098 SMLoc LBraceLoc = getLoc();
7099 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
7100 unsigned Prefix = Operands.size();
7101
7102 for (;;) {
7103 auto Loc = getLoc();
7104 Res = parseReg(Operands);
7105 if (Res.isNoMatch())
7106 Error(Loc, "expected a register");
7107 if (!Res.isSuccess())
7108 return ParseStatus::Failure;
7109
7110 RBraceLoc = getLoc();
7111 if (trySkipToken(AsmToken::RBrac))
7112 break;
7113
7114 if (!skipToken(AsmToken::Comma,
7115 "expected a comma or a closing square bracket"))
7116 return ParseStatus::Failure;
7117 }
7118
7119 if (Operands.size() - Prefix > 1) {
7120 Operands.insert(Operands.begin() + Prefix,
7121 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
7122 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
7123 }
7124
7125 return ParseStatus::Success;
7126 }
7127
7128 return parseRegOrImm(Operands);
7129}
7130
7131StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
7132 // Clear any forced encodings from the previous instruction.
7133 setForcedEncodingSize(0);
7134 setForcedDPP(false);
7135 setForcedSDWA(false);
7136
7137 if (Name.consume_back("_e64_dpp")) {
7138 setForcedDPP(true);
7139 setForcedEncodingSize(64);
7140 return Name;
7141 }
7142 if (Name.consume_back("_e64")) {
7143 setForcedEncodingSize(64);
7144 return Name;
7145 }
7146 if (Name.consume_back("_e32")) {
7147 setForcedEncodingSize(32);
7148 return Name;
7149 }
7150 if (Name.consume_back("_dpp")) {
7151 setForcedDPP(true);
7152 return Name;
7153 }
7154 if (Name.consume_back("_sdwa")) {
7155 setForcedSDWA(true);
7156 return Name;
7157 }
7158 return Name;
7159}
7160
7161static void applyMnemonicAliases(StringRef &Mnemonic,
7162 const FeatureBitset &Features,
7163 unsigned VariantID);
7164
7165bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
7166 StringRef Name, SMLoc NameLoc,
7167 OperandVector &Operands) {
7168 // Add the instruction mnemonic
7169 Name = parseMnemonicSuffix(Name);
7170
7171 // If the target architecture uses MnemonicAlias, call it here to parse
7172 // operands correctly.
7173 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
7174
7175 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
7176
7177 bool IsMIMG = Name.starts_with("image_");
7178
7179 while (!trySkipToken(AsmToken::EndOfStatement)) {
7180 OperandMode Mode = OperandMode_Default;
7181 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
7182 Mode = OperandMode_NSA;
7183 ParseStatus Res = parseOperand(Operands, Name, Mode);
7184
7185 if (!Res.isSuccess()) {
7186 checkUnsupportedInstruction(Name, NameLoc);
7187 if (!Parser.hasPendingError()) {
7188 // FIXME: use real operand location rather than the current location.
7189 StringRef Msg = Res.isFailure() ? "failed parsing operand."
7190 : "not a valid operand.";
7191 Error(getLoc(), Msg);
7192 }
7193 while (!trySkipToken(AsmToken::EndOfStatement)) {
7194 lex();
7195 }
7196 return true;
7197 }
7198
7199 // Eat the comma or space if there is one.
7200 trySkipToken(AsmToken::Comma);
7201 }
7202
7203 return false;
7204}
7205
7206//===----------------------------------------------------------------------===//
7207// Utility functions
7208//===----------------------------------------------------------------------===//
7209
7210ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
7211 OperandVector &Operands) {
7212 SMLoc S = getLoc();
7213 if (!trySkipId(Name))
7214 return ParseStatus::NoMatch;
7215
7216 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
7217 return ParseStatus::Success;
7218}
7219
7220ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
7221 int64_t &IntVal) {
7222
7223 if (!trySkipId(Prefix, AsmToken::Colon))
7224 return ParseStatus::NoMatch;
7225
7227}
7228
7229ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
7230 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7231 std::function<bool(int64_t &)> ConvertResult) {
7232 SMLoc S = getLoc();
7233 int64_t Value = 0;
7234
7235 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
7236 if (!Res.isSuccess())
7237 return Res;
7238
7239 if (ConvertResult && !ConvertResult(Value)) {
7240 Error(S, "invalid " + StringRef(Prefix) + " value.");
7241 }
7242
7243 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
7244 return ParseStatus::Success;
7245}
7246
7247ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7248 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7249 bool (*ConvertResult)(int64_t &)) {
7250 SMLoc S = getLoc();
7251 if (!trySkipId(Prefix, AsmToken::Colon))
7252 return ParseStatus::NoMatch;
7253
7254 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
7255 return ParseStatus::Failure;
7256
7257 unsigned Val = 0;
7258 const unsigned MaxSize = 4;
7259
7260 // FIXME: How to verify the number of elements matches the number of src
7261 // operands?
7262 for (int I = 0; ; ++I) {
7263 int64_t Op;
7264 SMLoc Loc = getLoc();
7265 if (!parseExpr(Op))
7266 return ParseStatus::Failure;
7267
7268 if (Op != 0 && Op != 1)
7269 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
7270
7271 Val |= (Op << I);
7272
7273 if (trySkipToken(AsmToken::RBrac))
7274 break;
7275
7276 if (I + 1 == MaxSize)
7277 return Error(getLoc(), "expected a closing square bracket");
7278
7279 if (!skipToken(AsmToken::Comma, "expected a comma"))
7280 return ParseStatus::Failure;
7281 }
7282
7283 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
7284 return ParseStatus::Success;
7285}
7286
7287ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7288 OperandVector &Operands,
7289 AMDGPUOperand::ImmTy ImmTy,
7290 bool IgnoreNegative) {
7291 int64_t Bit;
7292 SMLoc S = getLoc();
7293
7294 if (trySkipId(Name)) {
7295 Bit = 1;
7296 } else if (trySkipId("no", Name)) {
7297 if (IgnoreNegative)
7298 return ParseStatus::Success;
7299 Bit = 0;
7300 } else {
7301 return ParseStatus::NoMatch;
7302 }
7303
7304 if (Name == "r128" && !hasMIMG_R128())
7305 return Error(S, "r128 modifier is not supported on this GPU");
7306 if (Name == "a16" && !hasA16())
7307 return Error(S, "a16 modifier is not supported on this GPU");
7308
7309 if (Bit == 0 && Name == "gds") {
7310 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7311 if (Mnemo.starts_with("ds_gws"))
7312 return Error(S, "nogds is not allowed");
7313 }
7314
7315 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7316 ImmTy = AMDGPUOperand::ImmTyR128A16;
7317
7318 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
7319 return ParseStatus::Success;
7320}
7321
7322unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7323 bool &Disabling) const {
7324 Disabling = Id.consume_front("no");
7325
7326 if (isGFX940() && !Mnemo.starts_with("s_")) {
7327 return StringSwitch<unsigned>(Id)
7328 .Case("nt", AMDGPU::CPol::NT)
7329 .Case("sc0", AMDGPU::CPol::SC0)
7330 .Case("sc1", AMDGPU::CPol::SC1)
7331 .Default(0);
7332 }
7333
7334 return StringSwitch<unsigned>(Id)
7335 .Case("dlc", AMDGPU::CPol::DLC)
7336 .Case("glc", AMDGPU::CPol::GLC)
7337 .Case("scc", AMDGPU::CPol::SCC)
7338 .Case("slc", AMDGPU::CPol::SLC)
7339 .Default(0);
7340}
7341
7342ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7343 if (isGFX12Plus()) {
7344 SMLoc StringLoc = getLoc();
7345
7346 int64_t CPolVal = 0;
7347 ParseStatus ResTH = ParseStatus::NoMatch;
7348 ParseStatus ResScope = ParseStatus::NoMatch;
7349 ParseStatus ResNV = ParseStatus::NoMatch;
7350 ParseStatus ResScal = ParseStatus::NoMatch;
7351
7352 for (;;) {
7353 if (ResTH.isNoMatch()) {
7354 int64_t TH;
7355 ResTH = parseTH(Operands, TH);
7356 if (ResTH.isFailure())
7357 return ResTH;
7358 if (ResTH.isSuccess()) {
7359 CPolVal |= TH;
7360 continue;
7361 }
7362 }
7363
7364 if (ResScope.isNoMatch()) {
7365 int64_t Scope;
7366 ResScope = parseScope(Operands, Scope);
7367 if (ResScope.isFailure())
7368 return ResScope;
7369 if (ResScope.isSuccess()) {
7370 CPolVal |= Scope;
7371 continue;
7372 }
7373 }
7374
7375 // NV bit exists on GFX12+, but does something starting from GFX1250.
7376 // Allow parsing on all GFX12 and fail on validation for better
7377 // diagnostics.
7378 if (ResNV.isNoMatch()) {
7379 if (trySkipId("nv")) {
7380 ResNV = ParseStatus::Success;
7381 CPolVal |= CPol::NV;
7382 continue;
7383 } else if (trySkipId("no", "nv")) {
7384 ResNV = ParseStatus::Success;
7385 continue;
7386 }
7387 }
7388
7389 if (ResScal.isNoMatch()) {
7390 if (trySkipId("scale_offset")) {
7391 ResScal = ParseStatus::Success;
7392 CPolVal |= CPol::SCAL;
7393 continue;
7394 } else if (trySkipId("no", "scale_offset")) {
7395 ResScal = ParseStatus::Success;
7396 continue;
7397 }
7398 }
7399
7400 break;
7401 }
7402
7403 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7404 ResScal.isNoMatch())
7405 return ParseStatus::NoMatch;
7406
7407 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
7408 AMDGPUOperand::ImmTyCPol));
7409 return ParseStatus::Success;
7410 }
7411
7412 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7413 SMLoc OpLoc = getLoc();
7414 unsigned Enabled = 0, Seen = 0;
7415 for (;;) {
7416 SMLoc S = getLoc();
7417 bool Disabling;
7418 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7419 if (!CPol)
7420 break;
7421
7422 lex();
7423
7424 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7425 return Error(S, "dlc modifier is not supported on this GPU");
7426
7427 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7428 return Error(S, "scc modifier is not supported on this GPU");
7429
7430 if (Seen & CPol)
7431 return Error(S, "duplicate cache policy modifier");
7432
7433 if (!Disabling)
7434 Enabled |= CPol;
7435
7436 Seen |= CPol;
7437 }
7438
7439 if (!Seen)
7440 return ParseStatus::NoMatch;
7441
7442 Operands.push_back(
7443 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7444 return ParseStatus::Success;
7445}
7446
7447ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7448 int64_t &Scope) {
7449 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7451
7452 ParseStatus Res = parseStringOrIntWithPrefix(
7453 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7454 Scope);
7455
7456 if (Res.isSuccess())
7457 Scope = Scopes[Scope];
7458
7459 return Res;
7460}
7461
7462ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7463 TH = AMDGPU::CPol::TH_RT; // default
7464
7465 StringRef Value;
7466 SMLoc StringLoc;
7467 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7468 if (!Res.isSuccess())
7469 return Res;
7470
7471 if (Value == "TH_DEFAULT")
7473 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7474 Value == "TH_LOAD_NT_WB") {
7475 return Error(StringLoc, "invalid th value");
7476 } else if (Value.consume_front("TH_ATOMIC_")) {
7478 } else if (Value.consume_front("TH_LOAD_")) {
7480 } else if (Value.consume_front("TH_STORE_")) {
7482 } else {
7483 return Error(StringLoc, "invalid th value");
7484 }
7485
7486 if (Value == "BYPASS")
7488
7489 if (TH != 0) {
7491 TH |= StringSwitch<int64_t>(Value)
7492 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7493 .Case("RT", AMDGPU::CPol::TH_RT)
7494 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7495 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7496 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7498 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7499 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7501 .Default(0xffffffff);
7502 else
7503 TH |= StringSwitch<int64_t>(Value)
7504 .Case("RT", AMDGPU::CPol::TH_RT)
7505 .Case("NT", AMDGPU::CPol::TH_NT)
7506 .Case("HT", AMDGPU::CPol::TH_HT)
7507 .Case("LU", AMDGPU::CPol::TH_LU)
7508 .Case("WB", AMDGPU::CPol::TH_WB)
7509 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7510 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7511 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7512 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7513 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7514 .Default(0xffffffff);
7515 }
7516
7517 if (TH == 0xffffffff)
7518 return Error(StringLoc, "invalid th value");
7519
7520 return ParseStatus::Success;
7521}
7522
7523static void
7525 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7526 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7527 std::optional<unsigned> InsertAt = std::nullopt) {
7528 auto i = OptionalIdx.find(ImmT);
7529 if (i != OptionalIdx.end()) {
7530 unsigned Idx = i->second;
7531 const AMDGPUOperand &Op =
7532 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7533 if (InsertAt)
7534 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7535 else
7536 Op.addImmOperands(Inst, 1);
7537 } else {
7538 if (InsertAt.has_value())
7539 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7540 else
7542 }
7543}
7544
7545ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7546 StringRef &Value,
7547 SMLoc &StringLoc) {
7548 if (!trySkipId(Prefix, AsmToken::Colon))
7549 return ParseStatus::NoMatch;
7550
7551 StringLoc = getLoc();
7552 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7554}
7555
7556ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7557 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7558 int64_t &IntVal) {
7559 if (!trySkipId(Name, AsmToken::Colon))
7560 return ParseStatus::NoMatch;
7561
7562 SMLoc StringLoc = getLoc();
7563
7564 StringRef Value;
7565 if (isToken(AsmToken::Identifier)) {
7566 Value = getTokenStr();
7567 lex();
7568
7569 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7570 if (Value == Ids[IntVal])
7571 break;
7572 } else if (!parseExpr(IntVal))
7573 return ParseStatus::Failure;
7574
7575 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7576 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7577
7578 return ParseStatus::Success;
7579}
7580
7581ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7582 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7583 AMDGPUOperand::ImmTy Type) {
7584 SMLoc S = getLoc();
7585 int64_t IntVal;
7586
7587 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7588 if (Res.isSuccess())
7589 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7590
7591 return Res;
7592}
7593
7594//===----------------------------------------------------------------------===//
7595// MTBUF format
7596//===----------------------------------------------------------------------===//
7597
7598bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7599 int64_t MaxVal,
7600 int64_t &Fmt) {
7601 int64_t Val;
7602 SMLoc Loc = getLoc();
7603
7604 auto Res = parseIntWithPrefix(Pref, Val);
7605 if (Res.isFailure())
7606 return false;
7607 if (Res.isNoMatch())
7608 return true;
7609
7610 if (Val < 0 || Val > MaxVal) {
7611 Error(Loc, Twine("out of range ", StringRef(Pref)));
7612 return false;
7613 }
7614
7615 Fmt = Val;
7616 return true;
7617}
7618
7619ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7620 AMDGPUOperand::ImmTy ImmTy) {
7621 const char *Pref = "index_key";
7622 int64_t ImmVal = 0;
7623 SMLoc Loc = getLoc();
7624 auto Res = parseIntWithPrefix(Pref, ImmVal);
7625 if (!Res.isSuccess())
7626 return Res;
7627
7628 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7629 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7630 (ImmVal < 0 || ImmVal > 1))
7631 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7632
7633 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7634 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7635
7636 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7637 return ParseStatus::Success;
7638}
7639
7640ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7641 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7642}
7643
7644ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7645 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7646}
7647
7648ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7649 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7650}
7651
7652ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7653 StringRef Name,
7654 AMDGPUOperand::ImmTy Type) {
7655 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixFmt,
7656 Type);
7657}
7658
7659ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7660 return tryParseMatrixFMT(Operands, "matrix_a_fmt",
7661 AMDGPUOperand::ImmTyMatrixAFMT);
7662}
7663
7664ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7665 return tryParseMatrixFMT(Operands, "matrix_b_fmt",
7666 AMDGPUOperand::ImmTyMatrixBFMT);
7667}
7668
7669ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7670 StringRef Name,
7671 AMDGPUOperand::ImmTy Type) {
7672 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixScale,
7673 Type);
7674}
7675
7676ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7677 return tryParseMatrixScale(Operands, "matrix_a_scale",
7678 AMDGPUOperand::ImmTyMatrixAScale);
7679}
7680
7681ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7682 return tryParseMatrixScale(Operands, "matrix_b_scale",
7683 AMDGPUOperand::ImmTyMatrixBScale);
7684}
7685
7686ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7687 StringRef Name,
7688 AMDGPUOperand::ImmTy Type) {
7689 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixScaleFmt,
7690 Type);
7691}
7692
7693ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7694 return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
7695 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7696}
7697
7698ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7699 return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
7700 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7701}
7702
7703// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7704// values to live in a joint format operand in the MCInst encoding.
7705ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7706 using namespace llvm::AMDGPU::MTBUFFormat;
7707
7708 int64_t Dfmt = DFMT_UNDEF;
7709 int64_t Nfmt = NFMT_UNDEF;
7710
7711 // dfmt and nfmt can appear in either order, and each is optional.
7712 for (int I = 0; I < 2; ++I) {
7713 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7714 return ParseStatus::Failure;
7715
7716 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7717 return ParseStatus::Failure;
7718
7719 // Skip optional comma between dfmt/nfmt
7720 // but guard against 2 commas following each other.
7721 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7722 !peekToken().is(AsmToken::Comma)) {
7723 trySkipToken(AsmToken::Comma);
7724 }
7725 }
7726
7727 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7728 return ParseStatus::NoMatch;
7729
7730 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7731 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7732
7733 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7734 return ParseStatus::Success;
7735}
7736
7737ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7738 using namespace llvm::AMDGPU::MTBUFFormat;
7739
7740 int64_t Fmt = UFMT_UNDEF;
7741
7742 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7743 return ParseStatus::Failure;
7744
7745 if (Fmt == UFMT_UNDEF)
7746 return ParseStatus::NoMatch;
7747
7748 Format = Fmt;
7749 return ParseStatus::Success;
7750}
7751
7752bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7753 int64_t &Nfmt,
7754 StringRef FormatStr,
7755 SMLoc Loc) {
7756 using namespace llvm::AMDGPU::MTBUFFormat;
7757 int64_t Format;
7758
7759 Format = getDfmt(FormatStr);
7760 if (Format != DFMT_UNDEF) {
7761 Dfmt = Format;
7762 return true;
7763 }
7764
7765 Format = getNfmt(FormatStr, getSTI());
7766 if (Format != NFMT_UNDEF) {
7767 Nfmt = Format;
7768 return true;
7769 }
7770
7771 Error(Loc, "unsupported format");
7772 return false;
7773}
7774
7775ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7776 SMLoc FormatLoc,
7777 int64_t &Format) {
7778 using namespace llvm::AMDGPU::MTBUFFormat;
7779
7780 int64_t Dfmt = DFMT_UNDEF;
7781 int64_t Nfmt = NFMT_UNDEF;
7782 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7783 return ParseStatus::Failure;
7784
7785 if (trySkipToken(AsmToken::Comma)) {
7786 StringRef Str;
7787 SMLoc Loc = getLoc();
7788 if (!parseId(Str, "expected a format string") ||
7789 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7790 return ParseStatus::Failure;
7791 if (Dfmt == DFMT_UNDEF)
7792 return Error(Loc, "duplicate numeric format");
7793 if (Nfmt == NFMT_UNDEF)
7794 return Error(Loc, "duplicate data format");
7795 }
7796
7797 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7798 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7799
7800 if (isGFX10Plus()) {
7801 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7802 if (Ufmt == UFMT_UNDEF)
7803 return Error(FormatLoc, "unsupported format");
7804 Format = Ufmt;
7805 } else {
7806 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7807 }
7808
7809 return ParseStatus::Success;
7810}
7811
7812ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7813 SMLoc Loc,
7814 int64_t &Format) {
7815 using namespace llvm::AMDGPU::MTBUFFormat;
7816
7817 auto Id = getUnifiedFormat(FormatStr, getSTI());
7818 if (Id == UFMT_UNDEF)
7819 return ParseStatus::NoMatch;
7820
7821 if (!isGFX10Plus())
7822 return Error(Loc, "unified format is not supported on this GPU");
7823
7824 Format = Id;
7825 return ParseStatus::Success;
7826}
7827
7828ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7829 using namespace llvm::AMDGPU::MTBUFFormat;
7830 SMLoc Loc = getLoc();
7831
7832 if (!parseExpr(Format))
7833 return ParseStatus::Failure;
7834 if (!isValidFormatEncoding(Format, getSTI()))
7835 return Error(Loc, "out of range format");
7836
7837 return ParseStatus::Success;
7838}
7839
7840ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7841 using namespace llvm::AMDGPU::MTBUFFormat;
7842
7843 if (!trySkipId("format", AsmToken::Colon))
7844 return ParseStatus::NoMatch;
7845
7846 if (trySkipToken(AsmToken::LBrac)) {
7847 StringRef FormatStr;
7848 SMLoc Loc = getLoc();
7849 if (!parseId(FormatStr, "expected a format string"))
7850 return ParseStatus::Failure;
7851
7852 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7853 if (Res.isNoMatch())
7854 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7855 if (!Res.isSuccess())
7856 return Res;
7857
7858 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7859 return ParseStatus::Failure;
7860
7861 return ParseStatus::Success;
7862 }
7863
7864 return parseNumericFormat(Format);
7865}
7866
7867ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7868 using namespace llvm::AMDGPU::MTBUFFormat;
7869
7870 int64_t Format = getDefaultFormatEncoding(getSTI());
7871 ParseStatus Res;
7872 SMLoc Loc = getLoc();
7873
7874 // Parse legacy format syntax.
7875 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7876 if (Res.isFailure())
7877 return Res;
7878
7879 bool FormatFound = Res.isSuccess();
7880
7881 Operands.push_back(
7882 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7883
7884 if (FormatFound)
7885 trySkipToken(AsmToken::Comma);
7886
7887 if (isToken(AsmToken::EndOfStatement)) {
7888 // We are expecting an soffset operand,
7889 // but let matcher handle the error.
7890 return ParseStatus::Success;
7891 }
7892
7893 // Parse soffset.
7894 Res = parseRegOrImm(Operands);
7895 if (!Res.isSuccess())
7896 return Res;
7897
7898 trySkipToken(AsmToken::Comma);
7899
7900 if (!FormatFound) {
7901 Res = parseSymbolicOrNumericFormat(Format);
7902 if (Res.isFailure())
7903 return Res;
7904 if (Res.isSuccess()) {
7905 auto Size = Operands.size();
7906 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7907 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7908 Op.setImm(Format);
7909 }
7910 return ParseStatus::Success;
7911 }
7912
7913 if (isId("format") && peekToken().is(AsmToken::Colon))
7914 return Error(getLoc(), "duplicate format");
7915 return ParseStatus::Success;
7916}
7917
7918ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7919 ParseStatus Res =
7920 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7921 if (Res.isNoMatch()) {
7922 Res = parseIntWithPrefix("inst_offset", Operands,
7923 AMDGPUOperand::ImmTyInstOffset);
7924 }
7925 return Res;
7926}
7927
7928ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7929 ParseStatus Res =
7930 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7931 if (Res.isNoMatch())
7932 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7933 return Res;
7934}
7935
7936ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7937 ParseStatus Res =
7938 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7939 if (Res.isNoMatch()) {
7940 Res =
7941 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7942 }
7943 return Res;
7944}
7945
7946//===----------------------------------------------------------------------===//
7947// Exp
7948//===----------------------------------------------------------------------===//
7949
7950void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7951 OptionalImmIndexMap OptionalIdx;
7952
7953 unsigned OperandIdx[4];
7954 unsigned EnMask = 0;
7955 int SrcIdx = 0;
7956
7957 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7958 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7959
7960 // Add the register arguments
7961 if (Op.isReg()) {
7962 assert(SrcIdx < 4);
7963 OperandIdx[SrcIdx] = Inst.size();
7964 Op.addRegOperands(Inst, 1);
7965 ++SrcIdx;
7966 continue;
7967 }
7968
7969 if (Op.isOff()) {
7970 assert(SrcIdx < 4);
7971 OperandIdx[SrcIdx] = Inst.size();
7972 Inst.addOperand(MCOperand::createReg(MCRegister()));
7973 ++SrcIdx;
7974 continue;
7975 }
7976
7977 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7978 Op.addImmOperands(Inst, 1);
7979 continue;
7980 }
7981
7982 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7983 continue;
7984
7985 // Handle optional arguments
7986 OptionalIdx[Op.getImmTy()] = i;
7987 }
7988
7989 assert(SrcIdx == 4);
7990
7991 bool Compr = false;
7992 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7993 Compr = true;
7994 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7995 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7996 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7997 }
7998
7999 for (auto i = 0; i < SrcIdx; ++i) {
8000 if (Inst.getOperand(OperandIdx[i]).getReg()) {
8001 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
8002 }
8003 }
8004
8005 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
8006 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
8007
8008 Inst.addOperand(MCOperand::createImm(EnMask));
8009}
8010
8011//===----------------------------------------------------------------------===//
8012// s_waitcnt
8013//===----------------------------------------------------------------------===//
8014
8015static bool
8017 const AMDGPU::IsaVersion ISA,
8018 int64_t &IntVal,
8019 int64_t CntVal,
8020 bool Saturate,
8021 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
8022 unsigned (*decode)(const IsaVersion &Version, unsigned))
8023{
8024 bool Failed = false;
8025
8026 IntVal = encode(ISA, IntVal, CntVal);
8027 if (CntVal != decode(ISA, IntVal)) {
8028 if (Saturate) {
8029 IntVal = encode(ISA, IntVal, -1);
8030 } else {
8031 Failed = true;
8032 }
8033 }
8034 return Failed;
8035}
8036
8037bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
8038
8039 SMLoc CntLoc = getLoc();
8040 StringRef CntName = getTokenStr();
8041
8042 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
8043 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
8044 return false;
8045
8046 int64_t CntVal;
8047 SMLoc ValLoc = getLoc();
8048 if (!parseExpr(CntVal))
8049 return false;
8050
8051 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
8052
8053 bool Failed = true;
8054 bool Sat = CntName.ends_with("_sat");
8055
8056 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
8057 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
8058 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
8059 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
8060 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
8061 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
8062 } else {
8063 Error(CntLoc, "invalid counter name " + CntName);
8064 return false;
8065 }
8066
8067 if (Failed) {
8068 Error(ValLoc, "too large value for " + CntName);
8069 return false;
8070 }
8071
8072 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8073 return false;
8074
8075 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
8076 if (isToken(AsmToken::EndOfStatement)) {
8077 Error(getLoc(), "expected a counter name");
8078 return false;
8079 }
8080 }
8081
8082 return true;
8083}
8084
8085ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
8086 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
8087 int64_t Waitcnt = getWaitcntBitMask(ISA);
8088 SMLoc S = getLoc();
8089
8090 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8091 while (!isToken(AsmToken::EndOfStatement)) {
8092 if (!parseCnt(Waitcnt))
8093 return ParseStatus::Failure;
8094 }
8095 } else {
8096 if (!parseExpr(Waitcnt))
8097 return ParseStatus::Failure;
8098 }
8099
8100 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
8101 return ParseStatus::Success;
8102}
8103
8104bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
8105 SMLoc FieldLoc = getLoc();
8106 StringRef FieldName = getTokenStr();
8107 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
8108 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
8109 return false;
8110
8111 SMLoc ValueLoc = getLoc();
8112 StringRef ValueName = getTokenStr();
8113 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
8114 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
8115 return false;
8116
8117 unsigned Shift;
8118 if (FieldName == "instid0") {
8119 Shift = 0;
8120 } else if (FieldName == "instskip") {
8121 Shift = 4;
8122 } else if (FieldName == "instid1") {
8123 Shift = 7;
8124 } else {
8125 Error(FieldLoc, "invalid field name " + FieldName);
8126 return false;
8127 }
8128
8129 int Value;
8130 if (Shift == 4) {
8131 // Parse values for instskip.
8132 Value = StringSwitch<int>(ValueName)
8133 .Case("SAME", 0)
8134 .Case("NEXT", 1)
8135 .Case("SKIP_1", 2)
8136 .Case("SKIP_2", 3)
8137 .Case("SKIP_3", 4)
8138 .Case("SKIP_4", 5)
8139 .Default(-1);
8140 } else {
8141 // Parse values for instid0 and instid1.
8142 Value = StringSwitch<int>(ValueName)
8143 .Case("NO_DEP", 0)
8144 .Case("VALU_DEP_1", 1)
8145 .Case("VALU_DEP_2", 2)
8146 .Case("VALU_DEP_3", 3)
8147 .Case("VALU_DEP_4", 4)
8148 .Case("TRANS32_DEP_1", 5)
8149 .Case("TRANS32_DEP_2", 6)
8150 .Case("TRANS32_DEP_3", 7)
8151 .Case("FMA_ACCUM_CYCLE_1", 8)
8152 .Case("SALU_CYCLE_1", 9)
8153 .Case("SALU_CYCLE_2", 10)
8154 .Case("SALU_CYCLE_3", 11)
8155 .Default(-1);
8156 }
8157 if (Value < 0) {
8158 Error(ValueLoc, "invalid value name " + ValueName);
8159 return false;
8160 }
8161
8162 Delay |= Value << Shift;
8163 return true;
8164}
8165
8166ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
8167 int64_t Delay = 0;
8168 SMLoc S = getLoc();
8169
8170 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8171 do {
8172 if (!parseDelay(Delay))
8173 return ParseStatus::Failure;
8174 } while (trySkipToken(AsmToken::Pipe));
8175 } else {
8176 if (!parseExpr(Delay))
8177 return ParseStatus::Failure;
8178 }
8179
8180 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
8181 return ParseStatus::Success;
8182}
8183
8184bool
8185AMDGPUOperand::isSWaitCnt() const {
8186 return isImm();
8187}
8188
8189bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
8190
8191//===----------------------------------------------------------------------===//
8192// DepCtr
8193//===----------------------------------------------------------------------===//
8194
8195void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
8196 StringRef DepCtrName) {
8197 switch (ErrorId) {
8198 case OPR_ID_UNKNOWN:
8199 Error(Loc, Twine("invalid counter name ", DepCtrName));
8200 return;
8201 case OPR_ID_UNSUPPORTED:
8202 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
8203 return;
8204 case OPR_ID_DUPLICATE:
8205 Error(Loc, Twine("duplicate counter name ", DepCtrName));
8206 return;
8207 case OPR_VAL_INVALID:
8208 Error(Loc, Twine("invalid value for ", DepCtrName));
8209 return;
8210 default:
8211 assert(false);
8212 }
8213}
8214
8215bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
8216
8217 using namespace llvm::AMDGPU::DepCtr;
8218
8219 SMLoc DepCtrLoc = getLoc();
8220 StringRef DepCtrName = getTokenStr();
8221
8222 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
8223 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
8224 return false;
8225
8226 int64_t ExprVal;
8227 if (!parseExpr(ExprVal))
8228 return false;
8229
8230 unsigned PrevOprMask = UsedOprMask;
8231 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
8232
8233 if (CntVal < 0) {
8234 depCtrError(DepCtrLoc, CntVal, DepCtrName);
8235 return false;
8236 }
8237
8238 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8239 return false;
8240
8241 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
8242 if (isToken(AsmToken::EndOfStatement)) {
8243 Error(getLoc(), "expected a counter name");
8244 return false;
8245 }
8246 }
8247
8248 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8249 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8250 return true;
8251}
8252
8253ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
8254 using namespace llvm::AMDGPU::DepCtr;
8255
8256 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
8257 SMLoc Loc = getLoc();
8258
8259 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8260 unsigned UsedOprMask = 0;
8261 while (!isToken(AsmToken::EndOfStatement)) {
8262 if (!parseDepCtr(DepCtr, UsedOprMask))
8263 return ParseStatus::Failure;
8264 }
8265 } else {
8266 if (!parseExpr(DepCtr))
8267 return ParseStatus::Failure;
8268 }
8269
8270 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
8271 return ParseStatus::Success;
8272}
8273
8274bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8275
8276//===----------------------------------------------------------------------===//
8277// hwreg
8278//===----------------------------------------------------------------------===//
8279
8280ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8281 OperandInfoTy &Offset,
8282 OperandInfoTy &Width) {
8283 using namespace llvm::AMDGPU::Hwreg;
8284
8285 if (!trySkipId("hwreg", AsmToken::LParen))
8286 return ParseStatus::NoMatch;
8287
8288 // The register may be specified by name or using a numeric code
8289 HwReg.Loc = getLoc();
8290 if (isToken(AsmToken::Identifier) &&
8291 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8292 HwReg.IsSymbolic = true;
8293 lex(); // skip register name
8294 } else if (!parseExpr(HwReg.Val, "a register name")) {
8295 return ParseStatus::Failure;
8296 }
8297
8298 if (trySkipToken(AsmToken::RParen))
8299 return ParseStatus::Success;
8300
8301 // parse optional params
8302 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
8303 return ParseStatus::Failure;
8304
8305 Offset.Loc = getLoc();
8306 if (!parseExpr(Offset.Val))
8307 return ParseStatus::Failure;
8308
8309 if (!skipToken(AsmToken::Comma, "expected a comma"))
8310 return ParseStatus::Failure;
8311
8312 Width.Loc = getLoc();
8313 if (!parseExpr(Width.Val) ||
8314 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8315 return ParseStatus::Failure;
8316
8317 return ParseStatus::Success;
8318}
8319
8320ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8321 using namespace llvm::AMDGPU::Hwreg;
8322
8323 int64_t ImmVal = 0;
8324 SMLoc Loc = getLoc();
8325
8326 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8327 HwregId::Default);
8328 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8329 HwregOffset::Default);
8330 struct : StructuredOpField {
8331 using StructuredOpField::StructuredOpField;
8332 bool validate(AMDGPUAsmParser &Parser) const override {
8333 if (!isUIntN(Width, Val - 1))
8334 return Error(Parser, "only values from 1 to 32 are legal");
8335 return true;
8336 }
8337 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8338 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
8339
8340 if (Res.isNoMatch())
8341 Res = parseHwregFunc(HwReg, Offset, Width);
8342
8343 if (Res.isSuccess()) {
8344 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
8345 return ParseStatus::Failure;
8346 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
8347 }
8348
8349 if (Res.isNoMatch() &&
8350 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
8352
8353 if (!Res.isSuccess())
8354 return ParseStatus::Failure;
8355
8356 if (!isUInt<16>(ImmVal))
8357 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8358 Operands.push_back(
8359 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8360 return ParseStatus::Success;
8361}
8362
8363bool AMDGPUOperand::isHwreg() const {
8364 return isImmTy(ImmTyHwreg);
8365}
8366
8367//===----------------------------------------------------------------------===//
8368// sendmsg
8369//===----------------------------------------------------------------------===//
8370
8371bool
8372AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8373 OperandInfoTy &Op,
8374 OperandInfoTy &Stream) {
8375 using namespace llvm::AMDGPU::SendMsg;
8376
8377 Msg.Loc = getLoc();
8378 if (isToken(AsmToken::Identifier) &&
8379 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8380 Msg.IsSymbolic = true;
8381 lex(); // skip message name
8382 } else if (!parseExpr(Msg.Val, "a message name")) {
8383 return false;
8384 }
8385
8386 if (trySkipToken(AsmToken::Comma)) {
8387 Op.IsDefined = true;
8388 Op.Loc = getLoc();
8389 if (isToken(AsmToken::Identifier) &&
8390 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8392 lex(); // skip operation name
8393 } else if (!parseExpr(Op.Val, "an operation name")) {
8394 return false;
8395 }
8396
8397 if (trySkipToken(AsmToken::Comma)) {
8398 Stream.IsDefined = true;
8399 Stream.Loc = getLoc();
8400 if (!parseExpr(Stream.Val))
8401 return false;
8402 }
8403 }
8404
8405 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
8406}
8407
8408bool
8409AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8410 const OperandInfoTy &Op,
8411 const OperandInfoTy &Stream) {
8412 using namespace llvm::AMDGPU::SendMsg;
8413
8414 // Validation strictness depends on whether message is specified
8415 // in a symbolic or in a numeric form. In the latter case
8416 // only encoding possibility is checked.
8417 bool Strict = Msg.IsSymbolic;
8418
8419 if (Strict) {
8420 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8421 Error(Msg.Loc, "specified message id is not supported on this GPU");
8422 return false;
8423 }
8424 } else {
8425 if (!isValidMsgId(Msg.Val, getSTI())) {
8426 Error(Msg.Loc, "invalid message id");
8427 return false;
8428 }
8429 }
8430 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
8431 if (Op.IsDefined) {
8432 Error(Op.Loc, "message does not support operations");
8433 } else {
8434 Error(Msg.Loc, "missing message operation");
8435 }
8436 return false;
8437 }
8438 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
8439 if (Op.Val == OPR_ID_UNSUPPORTED)
8440 Error(Op.Loc, "specified operation id is not supported on this GPU");
8441 else
8442 Error(Op.Loc, "invalid operation id");
8443 return false;
8444 }
8445 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
8446 Stream.IsDefined) {
8447 Error(Stream.Loc, "message operation does not support streams");
8448 return false;
8449 }
8450 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
8451 Error(Stream.Loc, "invalid message stream id");
8452 return false;
8453 }
8454 return true;
8455}
8456
8457ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8458 using namespace llvm::AMDGPU::SendMsg;
8459
8460 int64_t ImmVal = 0;
8461 SMLoc Loc = getLoc();
8462
8463 if (trySkipId("sendmsg", AsmToken::LParen)) {
8464 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8465 OperandInfoTy Op(OP_NONE_);
8466 OperandInfoTy Stream(STREAM_ID_NONE_);
8467 if (parseSendMsgBody(Msg, Op, Stream) &&
8468 validateSendMsg(Msg, Op, Stream)) {
8469 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
8470 } else {
8471 return ParseStatus::Failure;
8472 }
8473 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
8474 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8475 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8476 } else {
8477 return ParseStatus::Failure;
8478 }
8479
8480 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8481 return ParseStatus::Success;
8482}
8483
8484bool AMDGPUOperand::isSendMsg() const {
8485 return isImmTy(ImmTySendMsg);
8486}
8487
8488ParseStatus AMDGPUAsmParser::parseWaitEvent(OperandVector &Operands) {
8489 using namespace llvm::AMDGPU::WaitEvent;
8490
8491 SMLoc Loc = getLoc();
8492 int64_t ImmVal = 0;
8493
8494 StructuredOpField DontWaitExportReady("dont_wait_export_ready", "bit value",
8495 1, 0);
8496 StructuredOpField ExportReady("export_ready", "bit value", 1, 0);
8497
8498 StructuredOpField *TargetBitfield =
8499 isGFX11() ? &DontWaitExportReady : &ExportReady;
8500
8501 ParseStatus Res = parseStructuredOpFields({TargetBitfield});
8502 if (Res.isNoMatch() && parseExpr(ImmVal, "structured immediate"))
8504 else if (Res.isSuccess()) {
8505 if (!validateStructuredOpFields({TargetBitfield}))
8506 return ParseStatus::Failure;
8507 ImmVal = TargetBitfield->Val;
8508 }
8509
8510 if (!Res.isSuccess())
8511 return ParseStatus::Failure;
8512
8513 if (!isUInt<16>(ImmVal))
8514 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8515
8516 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc,
8517 AMDGPUOperand::ImmTyWaitEvent));
8518 return ParseStatus::Success;
8519}
8520
8521bool AMDGPUOperand::isWaitEvent() const { return isImmTy(ImmTyWaitEvent); }
8522
8523//===----------------------------------------------------------------------===//
8524// v_interp
8525//===----------------------------------------------------------------------===//
8526
8527ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8528 StringRef Str;
8529 SMLoc S = getLoc();
8530
8531 if (!parseId(Str))
8532 return ParseStatus::NoMatch;
8533
8534 int Slot = StringSwitch<int>(Str)
8535 .Case("p10", 0)
8536 .Case("p20", 1)
8537 .Case("p0", 2)
8538 .Default(-1);
8539
8540 if (Slot == -1)
8541 return Error(S, "invalid interpolation slot");
8542
8543 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8544 AMDGPUOperand::ImmTyInterpSlot));
8545 return ParseStatus::Success;
8546}
8547
8548ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8549 StringRef Str;
8550 SMLoc S = getLoc();
8551
8552 if (!parseId(Str))
8553 return ParseStatus::NoMatch;
8554
8555 if (!Str.starts_with("attr"))
8556 return Error(S, "invalid interpolation attribute");
8557
8558 StringRef Chan = Str.take_back(2);
8559 int AttrChan = StringSwitch<int>(Chan)
8560 .Case(".x", 0)
8561 .Case(".y", 1)
8562 .Case(".z", 2)
8563 .Case(".w", 3)
8564 .Default(-1);
8565 if (AttrChan == -1)
8566 return Error(S, "invalid or missing interpolation attribute channel");
8567
8568 Str = Str.drop_back(2).drop_front(4);
8569
8570 uint8_t Attr;
8571 if (Str.getAsInteger(10, Attr))
8572 return Error(S, "invalid or missing interpolation attribute number");
8573
8574 if (Attr > 32)
8575 return Error(S, "out of bounds interpolation attribute number");
8576
8577 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8578
8579 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8580 AMDGPUOperand::ImmTyInterpAttr));
8581 Operands.push_back(AMDGPUOperand::CreateImm(
8582 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8583 return ParseStatus::Success;
8584}
8585
8586//===----------------------------------------------------------------------===//
8587// exp
8588//===----------------------------------------------------------------------===//
8589
8590ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8591 using namespace llvm::AMDGPU::Exp;
8592
8593 StringRef Str;
8594 SMLoc S = getLoc();
8595
8596 if (!parseId(Str))
8597 return ParseStatus::NoMatch;
8598
8599 unsigned Id = getTgtId(Str);
8600 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8601 return Error(S, (Id == ET_INVALID)
8602 ? "invalid exp target"
8603 : "exp target is not supported on this GPU");
8604
8605 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8606 AMDGPUOperand::ImmTyExpTgt));
8607 return ParseStatus::Success;
8608}
8609
8610//===----------------------------------------------------------------------===//
8611// parser helpers
8612//===----------------------------------------------------------------------===//
8613
8614bool
8615AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8616 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8617}
8618
8619bool
8620AMDGPUAsmParser::isId(const StringRef Id) const {
8621 return isId(getToken(), Id);
8622}
8623
8624bool
8625AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8626 return getTokenKind() == Kind;
8627}
8628
8629StringRef AMDGPUAsmParser::getId() const {
8630 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8631}
8632
8633bool
8634AMDGPUAsmParser::trySkipId(const StringRef Id) {
8635 if (isId(Id)) {
8636 lex();
8637 return true;
8638 }
8639 return false;
8640}
8641
8642bool
8643AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8644 if (isToken(AsmToken::Identifier)) {
8645 StringRef Tok = getTokenStr();
8646 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8647 lex();
8648 return true;
8649 }
8650 }
8651 return false;
8652}
8653
8654bool
8655AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8656 if (isId(Id) && peekToken().is(Kind)) {
8657 lex();
8658 lex();
8659 return true;
8660 }
8661 return false;
8662}
8663
8664bool
8665AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8666 if (isToken(Kind)) {
8667 lex();
8668 return true;
8669 }
8670 return false;
8671}
8672
8673bool
8674AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8675 const StringRef ErrMsg) {
8676 if (!trySkipToken(Kind)) {
8677 Error(getLoc(), ErrMsg);
8678 return false;
8679 }
8680 return true;
8681}
8682
8683bool
8684AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8685 SMLoc S = getLoc();
8686
8687 const MCExpr *Expr;
8688 if (Parser.parseExpression(Expr))
8689 return false;
8690
8691 if (Expr->evaluateAsAbsolute(Imm))
8692 return true;
8693
8694 if (Expected.empty()) {
8695 Error(S, "expected absolute expression");
8696 } else {
8697 Error(S, Twine("expected ", Expected) +
8698 Twine(" or an absolute expression"));
8699 }
8700 return false;
8701}
8702
8703bool
8704AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8705 SMLoc S = getLoc();
8706
8707 const MCExpr *Expr;
8708 if (Parser.parseExpression(Expr))
8709 return false;
8710
8711 int64_t IntVal;
8712 if (Expr->evaluateAsAbsolute(IntVal)) {
8713 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8714 } else {
8715 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8716 }
8717 return true;
8718}
8719
8720bool
8721AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8722 if (isToken(AsmToken::String)) {
8723 Val = getToken().getStringContents();
8724 lex();
8725 return true;
8726 }
8727 Error(getLoc(), ErrMsg);
8728 return false;
8729}
8730
8731bool
8732AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8733 if (isToken(AsmToken::Identifier)) {
8734 Val = getTokenStr();
8735 lex();
8736 return true;
8737 }
8738 if (!ErrMsg.empty())
8739 Error(getLoc(), ErrMsg);
8740 return false;
8741}
8742
8743AsmToken
8744AMDGPUAsmParser::getToken() const {
8745 return Parser.getTok();
8746}
8747
8748AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8749 return isToken(AsmToken::EndOfStatement)
8750 ? getToken()
8751 : getLexer().peekTok(ShouldSkipSpace);
8752}
8753
8754void
8755AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8756 auto TokCount = getLexer().peekTokens(Tokens);
8757
8758 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8759 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8760}
8761
8763AMDGPUAsmParser::getTokenKind() const {
8764 return getLexer().getKind();
8765}
8766
8767SMLoc
8768AMDGPUAsmParser::getLoc() const {
8769 return getToken().getLoc();
8770}
8771
8772StringRef
8773AMDGPUAsmParser::getTokenStr() const {
8774 return getToken().getString();
8775}
8776
8777void
8778AMDGPUAsmParser::lex() {
8779 Parser.Lex();
8780}
8781
8782const AMDGPUOperand &
8783AMDGPUAsmParser::findMCOperand(const OperandVector &Operands,
8784 int MCOpIdx) const {
8785 for (const auto &Op : Operands) {
8786 const AMDGPUOperand &TargetOp = static_cast<AMDGPUOperand &>(*Op);
8787 if (TargetOp.getMCOpIdx() == MCOpIdx)
8788 return TargetOp;
8789 }
8790 llvm_unreachable("no such MC operand!");
8791}
8792
8793SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8794 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8795}
8796
8797// Returns one of the given locations that comes later in the source.
8798SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8799 return a.getPointer() < b.getPointer() ? b : a;
8800}
8801
8802SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8803 int MCOpIdx) const {
8804 return findMCOperand(Operands, MCOpIdx).getStartLoc();
8805}
8806
8807SMLoc
8808AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8809 const OperandVector &Operands) const {
8810 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8811 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8812 if (Test(Op))
8813 return Op.getStartLoc();
8814 }
8815 return getInstLoc(Operands);
8816}
8817
8818SMLoc
8819AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8820 const OperandVector &Operands) const {
8821 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8822 return getOperandLoc(Test, Operands);
8823}
8824
8825ParseStatus
8826AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8827 if (!trySkipToken(AsmToken::LCurly))
8828 return ParseStatus::NoMatch;
8829
8830 bool First = true;
8831 while (!trySkipToken(AsmToken::RCurly)) {
8832 if (!First &&
8833 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8834 return ParseStatus::Failure;
8835
8836 StringRef Id = getTokenStr();
8837 SMLoc IdLoc = getLoc();
8838 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8839 !skipToken(AsmToken::Colon, "colon expected"))
8840 return ParseStatus::Failure;
8841
8842 const auto *I =
8843 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8844 if (I == Fields.end())
8845 return Error(IdLoc, "unknown field");
8846 if ((*I)->IsDefined)
8847 return Error(IdLoc, "duplicate field");
8848
8849 // TODO: Support symbolic values.
8850 (*I)->Loc = getLoc();
8851 if (!parseExpr((*I)->Val))
8852 return ParseStatus::Failure;
8853 (*I)->IsDefined = true;
8854
8855 First = false;
8856 }
8857 return ParseStatus::Success;
8858}
8859
8860bool AMDGPUAsmParser::validateStructuredOpFields(
8862 return all_of(Fields, [this](const StructuredOpField *F) {
8863 return F->validate(*this);
8864 });
8865}
8866
8867//===----------------------------------------------------------------------===//
8868// swizzle
8869//===----------------------------------------------------------------------===//
8870
8872static unsigned
8873encodeBitmaskPerm(const unsigned AndMask,
8874 const unsigned OrMask,
8875 const unsigned XorMask) {
8876 using namespace llvm::AMDGPU::Swizzle;
8877
8878 return BITMASK_PERM_ENC |
8879 (AndMask << BITMASK_AND_SHIFT) |
8880 (OrMask << BITMASK_OR_SHIFT) |
8881 (XorMask << BITMASK_XOR_SHIFT);
8882}
8883
8884bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8885 const unsigned MaxVal,
8886 const Twine &ErrMsg, SMLoc &Loc) {
8887 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8888 return false;
8889 }
8890 Loc = getLoc();
8891 if (!parseExpr(Op)) {
8892 return false;
8893 }
8894 if (Op < MinVal || Op > MaxVal) {
8895 Error(Loc, ErrMsg);
8896 return false;
8897 }
8898
8899 return true;
8900}
8901
8902bool
8903AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8904 const unsigned MinVal,
8905 const unsigned MaxVal,
8906 const StringRef ErrMsg) {
8907 SMLoc Loc;
8908 for (unsigned i = 0; i < OpNum; ++i) {
8909 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8910 return false;
8911 }
8912
8913 return true;
8914}
8915
8916bool
8917AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8918 using namespace llvm::AMDGPU::Swizzle;
8919
8920 int64_t Lane[LANE_NUM];
8921 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8922 "expected a 2-bit lane id")) {
8924 for (unsigned I = 0; I < LANE_NUM; ++I) {
8925 Imm |= Lane[I] << (LANE_SHIFT * I);
8926 }
8927 return true;
8928 }
8929 return false;
8930}
8931
8932bool
8933AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8934 using namespace llvm::AMDGPU::Swizzle;
8935
8936 SMLoc Loc;
8937 int64_t GroupSize;
8938 int64_t LaneIdx;
8939
8940 if (!parseSwizzleOperand(GroupSize,
8941 2, 32,
8942 "group size must be in the interval [2,32]",
8943 Loc)) {
8944 return false;
8945 }
8946 if (!isPowerOf2_64(GroupSize)) {
8947 Error(Loc, "group size must be a power of two");
8948 return false;
8949 }
8950 if (parseSwizzleOperand(LaneIdx,
8951 0, GroupSize - 1,
8952 "lane id must be in the interval [0,group size - 1]",
8953 Loc)) {
8954 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8955 return true;
8956 }
8957 return false;
8958}
8959
8960bool
8961AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8962 using namespace llvm::AMDGPU::Swizzle;
8963
8964 SMLoc Loc;
8965 int64_t GroupSize;
8966
8967 if (!parseSwizzleOperand(GroupSize,
8968 2, 32,
8969 "group size must be in the interval [2,32]",
8970 Loc)) {
8971 return false;
8972 }
8973 if (!isPowerOf2_64(GroupSize)) {
8974 Error(Loc, "group size must be a power of two");
8975 return false;
8976 }
8977
8978 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8979 return true;
8980}
8981
8982bool
8983AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8984 using namespace llvm::AMDGPU::Swizzle;
8985
8986 SMLoc Loc;
8987 int64_t GroupSize;
8988
8989 if (!parseSwizzleOperand(GroupSize,
8990 1, 16,
8991 "group size must be in the interval [1,16]",
8992 Loc)) {
8993 return false;
8994 }
8995 if (!isPowerOf2_64(GroupSize)) {
8996 Error(Loc, "group size must be a power of two");
8997 return false;
8998 }
8999
9000 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
9001 return true;
9002}
9003
9004bool
9005AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
9006 using namespace llvm::AMDGPU::Swizzle;
9007
9008 if (!skipToken(AsmToken::Comma, "expected a comma")) {
9009 return false;
9010 }
9011
9012 StringRef Ctl;
9013 SMLoc StrLoc = getLoc();
9014 if (!parseString(Ctl)) {
9015 return false;
9016 }
9017 if (Ctl.size() != BITMASK_WIDTH) {
9018 Error(StrLoc, "expected a 5-character mask");
9019 return false;
9020 }
9021
9022 unsigned AndMask = 0;
9023 unsigned OrMask = 0;
9024 unsigned XorMask = 0;
9025
9026 for (size_t i = 0; i < Ctl.size(); ++i) {
9027 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
9028 switch(Ctl[i]) {
9029 default:
9030 Error(StrLoc, "invalid mask");
9031 return false;
9032 case '0':
9033 break;
9034 case '1':
9035 OrMask |= Mask;
9036 break;
9037 case 'p':
9038 AndMask |= Mask;
9039 break;
9040 case 'i':
9041 AndMask |= Mask;
9042 XorMask |= Mask;
9043 break;
9044 }
9045 }
9046
9047 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
9048 return true;
9049}
9050
9051bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
9052 using namespace llvm::AMDGPU::Swizzle;
9053
9054 if (!AMDGPU::isGFX9Plus(getSTI())) {
9055 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
9056 return false;
9057 }
9058
9059 int64_t Swizzle;
9060 SMLoc Loc;
9061 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
9062 "FFT swizzle must be in the interval [0," +
9063 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
9064 Loc))
9065 return false;
9066
9067 Imm = FFT_MODE_ENC | Swizzle;
9068 return true;
9069}
9070
9071bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
9072 using namespace llvm::AMDGPU::Swizzle;
9073
9074 if (!AMDGPU::isGFX9Plus(getSTI())) {
9075 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
9076 return false;
9077 }
9078
9079 SMLoc Loc;
9080 int64_t Direction;
9081
9082 if (!parseSwizzleOperand(Direction, 0, 1,
9083 "direction must be 0 (left) or 1 (right)", Loc))
9084 return false;
9085
9086 int64_t RotateSize;
9087 if (!parseSwizzleOperand(
9088 RotateSize, 0, ROTATE_MAX_SIZE,
9089 "number of threads to rotate must be in the interval [0," +
9090 Twine(ROTATE_MAX_SIZE) + Twine(']'),
9091 Loc))
9092 return false;
9093
9095 (RotateSize << ROTATE_SIZE_SHIFT);
9096 return true;
9097}
9098
9099bool
9100AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
9101
9102 SMLoc OffsetLoc = getLoc();
9103
9104 if (!parseExpr(Imm, "a swizzle macro")) {
9105 return false;
9106 }
9107 if (!isUInt<16>(Imm)) {
9108 Error(OffsetLoc, "expected a 16-bit offset");
9109 return false;
9110 }
9111 return true;
9112}
9113
9114bool
9115AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
9116 using namespace llvm::AMDGPU::Swizzle;
9117
9118 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
9119
9120 SMLoc ModeLoc = getLoc();
9121 bool Ok = false;
9122
9123 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
9124 Ok = parseSwizzleQuadPerm(Imm);
9125 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
9126 Ok = parseSwizzleBitmaskPerm(Imm);
9127 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
9128 Ok = parseSwizzleBroadcast(Imm);
9129 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
9130 Ok = parseSwizzleSwap(Imm);
9131 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
9132 Ok = parseSwizzleReverse(Imm);
9133 } else if (trySkipId(IdSymbolic[ID_FFT])) {
9134 Ok = parseSwizzleFFT(Imm);
9135 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
9136 Ok = parseSwizzleRotate(Imm);
9137 } else {
9138 Error(ModeLoc, "expected a swizzle mode");
9139 }
9140
9141 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
9142 }
9143
9144 return false;
9145}
9146
9147ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
9148 SMLoc S = getLoc();
9149 int64_t Imm = 0;
9150
9151 if (trySkipId("offset")) {
9152
9153 bool Ok = false;
9154 if (skipToken(AsmToken::Colon, "expected a colon")) {
9155 if (trySkipId("swizzle")) {
9156 Ok = parseSwizzleMacro(Imm);
9157 } else {
9158 Ok = parseSwizzleOffset(Imm);
9159 }
9160 }
9161
9162 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
9163
9165 }
9166 return ParseStatus::NoMatch;
9167}
9168
9169bool
9170AMDGPUOperand::isSwizzle() const {
9171 return isImmTy(ImmTySwizzle);
9172}
9173
9174//===----------------------------------------------------------------------===//
9175// VGPR Index Mode
9176//===----------------------------------------------------------------------===//
9177
9178int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
9179
9180 using namespace llvm::AMDGPU::VGPRIndexMode;
9181
9182 if (trySkipToken(AsmToken::RParen)) {
9183 return OFF;
9184 }
9185
9186 int64_t Imm = 0;
9187
9188 while (true) {
9189 unsigned Mode = 0;
9190 SMLoc S = getLoc();
9191
9192 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
9193 if (trySkipId(IdSymbolic[ModeId])) {
9194 Mode = 1 << ModeId;
9195 break;
9196 }
9197 }
9198
9199 if (Mode == 0) {
9200 Error(S, (Imm == 0)?
9201 "expected a VGPR index mode or a closing parenthesis" :
9202 "expected a VGPR index mode");
9203 return UNDEF;
9204 }
9205
9206 if (Imm & Mode) {
9207 Error(S, "duplicate VGPR index mode");
9208 return UNDEF;
9209 }
9210 Imm |= Mode;
9211
9212 if (trySkipToken(AsmToken::RParen))
9213 break;
9214 if (!skipToken(AsmToken::Comma,
9215 "expected a comma or a closing parenthesis"))
9216 return UNDEF;
9217 }
9218
9219 return Imm;
9220}
9221
9222ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
9223
9224 using namespace llvm::AMDGPU::VGPRIndexMode;
9225
9226 int64_t Imm = 0;
9227 SMLoc S = getLoc();
9228
9229 if (trySkipId("gpr_idx", AsmToken::LParen)) {
9230 Imm = parseGPRIdxMacro();
9231 if (Imm == UNDEF)
9232 return ParseStatus::Failure;
9233 } else {
9234 if (getParser().parseAbsoluteExpression(Imm))
9235 return ParseStatus::Failure;
9236 if (Imm < 0 || !isUInt<4>(Imm))
9237 return Error(S, "invalid immediate: only 4-bit values are legal");
9238 }
9239
9240 Operands.push_back(
9241 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
9242 return ParseStatus::Success;
9243}
9244
9245bool AMDGPUOperand::isGPRIdxMode() const {
9246 return isImmTy(ImmTyGprIdxMode);
9247}
9248
9249//===----------------------------------------------------------------------===//
9250// sopp branch targets
9251//===----------------------------------------------------------------------===//
9252
9253ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
9254
9255 // Make sure we are not parsing something
9256 // that looks like a label or an expression but is not.
9257 // This will improve error messages.
9258 if (isRegister() || isModifier())
9259 return ParseStatus::NoMatch;
9260
9261 if (!parseExpr(Operands))
9262 return ParseStatus::Failure;
9263
9264 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
9265 assert(Opr.isImm() || Opr.isExpr());
9266 SMLoc Loc = Opr.getStartLoc();
9267
9268 // Currently we do not support arbitrary expressions as branch targets.
9269 // Only labels and absolute expressions are accepted.
9270 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9271 Error(Loc, "expected an absolute expression or a label");
9272 } else if (Opr.isImm() && !Opr.isS16Imm()) {
9273 Error(Loc, "expected a 16-bit signed jump offset");
9274 }
9275
9276 return ParseStatus::Success;
9277}
9278
9279//===----------------------------------------------------------------------===//
9280// Boolean holding registers
9281//===----------------------------------------------------------------------===//
9282
9283ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
9284 return parseReg(Operands);
9285}
9286
9287//===----------------------------------------------------------------------===//
9288// mubuf
9289//===----------------------------------------------------------------------===//
9290
9291void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9292 const OperandVector &Operands,
9293 bool IsAtomic) {
9294 OptionalImmIndexMap OptionalIdx;
9295 unsigned FirstOperandIdx = 1;
9296 bool IsAtomicReturn = false;
9297
9298 if (IsAtomic) {
9299 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
9301 }
9302
9303 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
9304 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9305
9306 // Add the register arguments
9307 if (Op.isReg()) {
9308 Op.addRegOperands(Inst, 1);
9309 // Insert a tied src for atomic return dst.
9310 // This cannot be postponed as subsequent calls to
9311 // addImmOperands rely on correct number of MC operands.
9312 if (IsAtomicReturn && i == FirstOperandIdx)
9313 Op.addRegOperands(Inst, 1);
9314 continue;
9315 }
9316
9317 // Handle the case where soffset is an immediate
9318 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9319 Op.addImmOperands(Inst, 1);
9320 continue;
9321 }
9322
9323 // Handle tokens like 'offen' which are sometimes hard-coded into the
9324 // asm string. There are no MCInst operands for these.
9325 if (Op.isToken()) {
9326 continue;
9327 }
9328 assert(Op.isImm());
9329
9330 // Handle optional arguments
9331 OptionalIdx[Op.getImmTy()] = i;
9332 }
9333
9334 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
9335 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
9336 // Parse a dummy operand as a placeholder for the SWZ operand. This enforces
9337 // agreement between MCInstrDesc.getNumOperands and MCInst.getNumOperands.
9339}
9340
9341//===----------------------------------------------------------------------===//
9342// smrd
9343//===----------------------------------------------------------------------===//
9344
9345bool AMDGPUOperand::isSMRDOffset8() const {
9346 return isImmLiteral() && isUInt<8>(getImm());
9347}
9348
9349bool AMDGPUOperand::isSMEMOffset() const {
9350 // Offset range is checked later by validator.
9351 return isImmLiteral();
9352}
9353
9354bool AMDGPUOperand::isSMRDLiteralOffset() const {
9355 // 32-bit literals are only supported on CI and we only want to use them
9356 // when the offset is > 8-bits.
9357 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
9358}
9359
9360//===----------------------------------------------------------------------===//
9361// vop3
9362//===----------------------------------------------------------------------===//
9363
9364static bool ConvertOmodMul(int64_t &Mul) {
9365 if (Mul != 1 && Mul != 2 && Mul != 4)
9366 return false;
9367
9368 Mul >>= 1;
9369 return true;
9370}
9371
9372static bool ConvertOmodDiv(int64_t &Div) {
9373 if (Div == 1) {
9374 Div = 0;
9375 return true;
9376 }
9377
9378 if (Div == 2) {
9379 Div = 3;
9380 return true;
9381 }
9382
9383 return false;
9384}
9385
9386// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9387// This is intentional and ensures compatibility with sp3.
9388// See bug 35397 for details.
9389bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9390 if (BoundCtrl == 0 || BoundCtrl == 1) {
9391 if (!isGFX11Plus())
9392 BoundCtrl = 1;
9393 return true;
9394 }
9395 return false;
9396}
9397
9398void AMDGPUAsmParser::onBeginOfFile() {
9399 if (!getParser().getStreamer().getTargetStreamer() ||
9400 getSTI().getTargetTriple().getArch() == Triple::r600)
9401 return;
9402
9403 if (!getTargetStreamer().getTargetID())
9404 getTargetStreamer().initializeTargetID(getSTI(),
9405 getSTI().getFeatureString());
9406
9407 if (isHsaAbi(getSTI()))
9408 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9409}
9410
9411/// Parse AMDGPU specific expressions.
9412///
9413/// expr ::= or(expr, ...) |
9414/// max(expr, ...) |
9415/// min(expr, ...)
9416///
9417bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9418 using AGVK = AMDGPUMCExpr::VariantKind;
9419
9420 if (isToken(AsmToken::Identifier)) {
9421 StringRef TokenId = getTokenStr();
9422 AGVK VK = StringSwitch<AGVK>(TokenId)
9423 .Case("max", AGVK::AGVK_Max)
9424 .Case("min", AGVK::AGVK_Min)
9425 .Case("or", AGVK::AGVK_Or)
9426 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
9427 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9428 .Case("alignto", AGVK::AGVK_AlignTo)
9429 .Case("occupancy", AGVK::AGVK_Occupancy)
9430 .Case("instprefsize", AGVK::AGVK_InstPrefSize)
9431 .Default(AGVK::AGVK_None);
9432
9433 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
9435 uint64_t CommaCount = 0;
9436 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9437 lex(); // Eat '('
9438 while (true) {
9439 if (trySkipToken(AsmToken::RParen)) {
9440 if (Exprs.empty()) {
9441 Error(getToken().getLoc(),
9442 "empty " + Twine(TokenId) + " expression");
9443 return true;
9444 }
9445 if (CommaCount + 1 != Exprs.size()) {
9446 Error(getToken().getLoc(),
9447 "mismatch of commas in " + Twine(TokenId) + " expression");
9448 return true;
9449 }
9450 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
9451 return false;
9452 }
9453 const MCExpr *Expr;
9454 if (getParser().parseExpression(Expr, EndLoc))
9455 return true;
9456 Exprs.push_back(Expr);
9457 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
9458 if (LastTokenWasComma)
9459 CommaCount++;
9460 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
9461 Error(getToken().getLoc(),
9462 "unexpected token in " + Twine(TokenId) + " expression");
9463 return true;
9464 }
9465 }
9466 }
9467 }
9468 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
9469}
9470
9471ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9472 StringRef Name = getTokenStr();
9473 if (Name == "mul") {
9474 return parseIntWithPrefix("mul", Operands,
9475 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
9476 }
9477
9478 if (Name == "div") {
9479 return parseIntWithPrefix("div", Operands,
9480 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
9481 }
9482
9483 return ParseStatus::NoMatch;
9484}
9485
9486// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9487// the number of src operands present, then copies that bit into src0_modifiers.
9488static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9489 int Opc = Inst.getOpcode();
9490 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9491 if (OpSelIdx == -1)
9492 return;
9493
9494 int SrcNum;
9495 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9496 AMDGPU::OpName::src2};
9497 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
9498 ++SrcNum)
9499 ;
9500 assert(SrcNum > 0);
9501
9502 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9503
9504 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
9505 if (DstIdx == -1)
9506 return;
9507
9508 const MCOperand &DstOp = Inst.getOperand(DstIdx);
9509 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
9510 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9511 if (DstOp.isReg() &&
9512 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
9513 if (AMDGPU::isHi16Reg(DstOp.getReg(), MRI))
9514 ModVal |= SISrcMods::DST_OP_SEL;
9515 } else {
9516 if ((OpSel & (1 << SrcNum)) != 0)
9517 ModVal |= SISrcMods::DST_OP_SEL;
9518 }
9519 Inst.getOperand(ModIdx).setImm(ModVal);
9520}
9521
9522void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9523 const OperandVector &Operands) {
9524 cvtVOP3P(Inst, Operands);
9525 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9526}
9527
9528void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9529 OptionalImmIndexMap &OptionalIdx) {
9530 cvtVOP3P(Inst, Operands, OptionalIdx);
9531 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9532}
9533
9534static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9535 return
9536 // 1. This operand is input modifiers
9537 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9538 // 2. This is not last operand
9539 && Desc.NumOperands > (OpNum + 1)
9540 // 3. Next operand is register class
9541 && Desc.operands()[OpNum + 1].RegClass != -1
9542 // 4. Next register is not tied to any other operand
9543 && Desc.getOperandConstraint(OpNum + 1,
9545}
9546
9547void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) {
9548 unsigned Opc = Inst.getOpcode();
9549 constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9550 AMDGPU::OpName::src2};
9551 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9552 AMDGPU::OpName::src1_modifiers,
9553 AMDGPU::OpName::src2_modifiers};
9554 for (int J = 0; J < 3; ++J) {
9555 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9556 if (OpIdx == -1)
9557 // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but
9558 // no src1. So continue instead of break.
9559 continue;
9560
9561 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9562 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9563
9564 if ((OpSel & (1 << J)) != 0)
9565 ModVal |= SISrcMods::OP_SEL_0;
9566 // op_sel[3] is encoded in src0_modifiers.
9567 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9568 ModVal |= SISrcMods::DST_OP_SEL;
9569
9570 Inst.getOperand(ModIdx).setImm(ModVal);
9571 }
9572}
9573
9574void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9575{
9576 OptionalImmIndexMap OptionalIdx;
9577 unsigned Opc = Inst.getOpcode();
9578
9579 unsigned I = 1;
9580 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9581 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9582 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9583 }
9584
9585 for (unsigned E = Operands.size(); I != E; ++I) {
9586 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9588 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9589 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9590 Op.isInterpAttrChan()) {
9591 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9592 } else if (Op.isImmModifier()) {
9593 OptionalIdx[Op.getImmTy()] = I;
9594 } else {
9595 llvm_unreachable("unhandled operand type");
9596 }
9597 }
9598
9599 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9600 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9601 AMDGPUOperand::ImmTyHigh);
9602
9603 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9604 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9605 AMDGPUOperand::ImmTyClamp);
9606
9607 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9608 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9609 AMDGPUOperand::ImmTyOModSI);
9610
9611 // Some v_interp instructions use op_sel[3] for dst.
9612 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9613 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9614 AMDGPUOperand::ImmTyOpSel);
9615 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9616 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9617
9618 cvtOpSelHelper(Inst, OpSel);
9619 }
9620}
9621
9622void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9623{
9624 OptionalImmIndexMap OptionalIdx;
9625 unsigned Opc = Inst.getOpcode();
9626
9627 unsigned I = 1;
9628 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9629 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9630 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9631 }
9632
9633 for (unsigned E = Operands.size(); I != E; ++I) {
9634 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9636 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9637 } else if (Op.isImmModifier()) {
9638 OptionalIdx[Op.getImmTy()] = I;
9639 } else {
9640 llvm_unreachable("unhandled operand type");
9641 }
9642 }
9643
9644 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9645
9646 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9647 if (OpSelIdx != -1)
9648 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9649
9650 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9651
9652 if (OpSelIdx == -1)
9653 return;
9654
9655 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9656 cvtOpSelHelper(Inst, OpSel);
9657}
9658
9659void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9660 const OperandVector &Operands) {
9661 OptionalImmIndexMap OptionalIdx;
9662 unsigned Opc = Inst.getOpcode();
9663 unsigned I = 1;
9664 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9665
9666 const MCInstrDesc &Desc = MII.get(Opc);
9667
9668 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9669 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9670
9671 for (unsigned E = Operands.size(); I != E; ++I) {
9672 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9673 int NumOperands = Inst.getNumOperands();
9674 // The order of operands in MCInst and parsed operands are different.
9675 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9676 // indices for parsing scale values correctly.
9677 if (NumOperands == CbszOpIdx) {
9680 }
9681 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9682 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9683 } else if (Op.isImmModifier()) {
9684 OptionalIdx[Op.getImmTy()] = I;
9685 } else {
9686 Op.addRegOrImmOperands(Inst, 1);
9687 }
9688 }
9689
9690 // Insert CBSZ and BLGP operands for F8F6F4 variants
9691 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9692 if (CbszIdx != OptionalIdx.end()) {
9693 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9694 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9695 }
9696
9697 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9698 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9699 if (BlgpIdx != OptionalIdx.end()) {
9700 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9701 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9702 }
9703
9704 // Add dummy src_modifiers
9707
9708 // Handle op_sel fields
9709
9710 unsigned OpSel = 0;
9711 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9712 if (OpselIdx != OptionalIdx.end()) {
9713 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9714 .getImm();
9715 }
9716
9717 unsigned OpSelHi = 0;
9718 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9719 if (OpselHiIdx != OptionalIdx.end()) {
9720 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9721 .getImm();
9722 }
9723 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9724 AMDGPU::OpName::src1_modifiers};
9725
9726 for (unsigned J = 0; J < 2; ++J) {
9727 unsigned ModVal = 0;
9728 if (OpSel & (1 << J))
9729 ModVal |= SISrcMods::OP_SEL_0;
9730 if (OpSelHi & (1 << J))
9731 ModVal |= SISrcMods::OP_SEL_1;
9732
9733 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9734 Inst.getOperand(ModIdx).setImm(ModVal);
9735 }
9736}
9737
9738void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9739 OptionalImmIndexMap &OptionalIdx) {
9740 unsigned Opc = Inst.getOpcode();
9741
9742 unsigned I = 1;
9743 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9744 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9745 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9746 }
9747
9748 for (unsigned E = Operands.size(); I != E; ++I) {
9749 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9751 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9752 } else if (Op.isImmModifier()) {
9753 OptionalIdx[Op.getImmTy()] = I;
9754 } else {
9755 Op.addRegOrImmOperands(Inst, 1);
9756 }
9757 }
9758
9759 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))
9760 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9761 AMDGPUOperand::ImmTyScaleSel);
9762
9763 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9764 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9765 AMDGPUOperand::ImmTyClamp);
9766
9767 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9768 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9769 Inst.addOperand(Inst.getOperand(0));
9770 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9771 AMDGPUOperand::ImmTyByteSel);
9772 }
9773
9774 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9775 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9776 AMDGPUOperand::ImmTyOModSI);
9777
9778 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9779 // it has src2 register operand that is tied to dst operand
9780 // we don't allow modifiers for this operand in assembler so src2_modifiers
9781 // should be 0.
9782 if (isMAC(Opc)) {
9783 auto *it = Inst.begin();
9784 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9785 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9786 ++it;
9787 // Copy the operand to ensure it's not invalidated when Inst grows.
9788 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9789 }
9790}
9791
9792void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9793 OptionalImmIndexMap OptionalIdx;
9794 cvtVOP3(Inst, Operands, OptionalIdx);
9795}
9796
9797void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9798 OptionalImmIndexMap &OptIdx) {
9799 const int Opc = Inst.getOpcode();
9800 const MCInstrDesc &Desc = MII.get(Opc);
9801
9802 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9803
9804 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9805 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9806 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9807 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9808 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx11 ||
9809 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx11 ||
9810 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9811 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12 ||
9812 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx13 ||
9813 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx13) {
9814 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9815 Inst.addOperand(Inst.getOperand(0));
9816 }
9817
9818 // Append vdst_in only if a previous converter (cvtVOP3DPP for DPP variants,
9819 // cvtVOP3 for byte_sel variants) hasn't already placed it. Use the position
9820 // of the named operand to detect that, the same way cvtVOP3DPP does
9821 // internally.
9822 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9823 if (VdstInIdx != -1 && VdstInIdx == static_cast<int>(Inst.getNumOperands()))
9824 Inst.addOperand(Inst.getOperand(0));
9825
9826 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9827 if (BitOp3Idx != -1) {
9828 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9829 }
9830
9831 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9832 // instruction, and then figure out where to actually put the modifiers
9833
9834 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9835 if (OpSelIdx != -1) {
9836 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9837 }
9838
9839 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9840 if (OpSelHiIdx != -1) {
9841 int DefaultVal = IsPacked ? -1 : 0;
9842 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9843 DefaultVal);
9844 }
9845
9846 int MatrixAFMTIdx =
9847 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
9848 if (MatrixAFMTIdx != -1) {
9849 addOptionalImmOperand(Inst, Operands, OptIdx,
9850 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9851 }
9852
9853 int MatrixBFMTIdx =
9854 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
9855 if (MatrixBFMTIdx != -1) {
9856 addOptionalImmOperand(Inst, Operands, OptIdx,
9857 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9858 }
9859
9860 int MatrixAScaleIdx =
9861 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
9862 if (MatrixAScaleIdx != -1) {
9863 addOptionalImmOperand(Inst, Operands, OptIdx,
9864 AMDGPUOperand::ImmTyMatrixAScale, 0);
9865 }
9866
9867 int MatrixBScaleIdx =
9868 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
9869 if (MatrixBScaleIdx != -1) {
9870 addOptionalImmOperand(Inst, Operands, OptIdx,
9871 AMDGPUOperand::ImmTyMatrixBScale, 0);
9872 }
9873
9874 int MatrixAScaleFmtIdx =
9875 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9876 if (MatrixAScaleFmtIdx != -1) {
9877 addOptionalImmOperand(Inst, Operands, OptIdx,
9878 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9879 }
9880
9881 int MatrixBScaleFmtIdx =
9882 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9883 if (MatrixBScaleFmtIdx != -1) {
9884 addOptionalImmOperand(Inst, Operands, OptIdx,
9885 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9886 }
9887
9888 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9889 addOptionalImmOperand(Inst, Operands, OptIdx,
9890 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9891
9892 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9893 addOptionalImmOperand(Inst, Operands, OptIdx,
9894 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9895
9896 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9897 if (NegLoIdx != -1)
9898 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9899
9900 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9901 if (NegHiIdx != -1)
9902 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9903
9904 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9905 AMDGPU::OpName::src2};
9906 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9907 AMDGPU::OpName::src1_modifiers,
9908 AMDGPU::OpName::src2_modifiers};
9909
9910 unsigned OpSel = 0;
9911 unsigned OpSelHi = 0;
9912 unsigned NegLo = 0;
9913 unsigned NegHi = 0;
9914
9915 if (OpSelIdx != -1)
9916 OpSel = Inst.getOperand(OpSelIdx).getImm();
9917
9918 if (OpSelHiIdx != -1)
9919 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9920
9921 if (NegLoIdx != -1)
9922 NegLo = Inst.getOperand(NegLoIdx).getImm();
9923
9924 if (NegHiIdx != -1)
9925 NegHi = Inst.getOperand(NegHiIdx).getImm();
9926
9927 for (int J = 0; J < 3; ++J) {
9928 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9929 if (OpIdx == -1)
9930 break;
9931
9932 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9933
9934 if (ModIdx == -1)
9935 continue;
9936
9937 uint32_t ModVal = 0;
9938
9939 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9940 if (SrcOp.isReg() && getMRI()
9941 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9942 .contains(SrcOp.getReg())) {
9943 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9944 if (VGPRSuffixIsHi)
9945 ModVal |= SISrcMods::OP_SEL_0;
9946 } else {
9947 if ((OpSel & (1 << J)) != 0)
9948 ModVal |= SISrcMods::OP_SEL_0;
9949 }
9950
9951 if ((OpSelHi & (1 << J)) != 0)
9952 ModVal |= SISrcMods::OP_SEL_1;
9953
9954 if ((NegLo & (1 << J)) != 0)
9955 ModVal |= SISrcMods::NEG;
9956
9957 if ((NegHi & (1 << J)) != 0)
9958 ModVal |= SISrcMods::NEG_HI;
9959
9960 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9961 }
9962}
9963
9964void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9965 OptionalImmIndexMap OptIdx;
9966 cvtVOP3(Inst, Operands, OptIdx);
9967 cvtVOP3P(Inst, Operands, OptIdx);
9968}
9969
9971 unsigned i, unsigned Opc,
9972 AMDGPU::OpName OpName) {
9973 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9974 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9975 else
9976 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9977}
9978
9979void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9980 unsigned Opc = Inst.getOpcode();
9981
9982 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9983 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9984 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9985 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9986 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9987
9988 OptionalImmIndexMap OptIdx;
9989 for (unsigned i = 5; i < Operands.size(); ++i) {
9990 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9991 OptIdx[Op.getImmTy()] = i;
9992 }
9993
9994 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9995 addOptionalImmOperand(Inst, Operands, OptIdx,
9996 AMDGPUOperand::ImmTyIndexKey8bit);
9997
9998 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9999 addOptionalImmOperand(Inst, Operands, OptIdx,
10000 AMDGPUOperand::ImmTyIndexKey16bit);
10001
10002 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
10003 addOptionalImmOperand(Inst, Operands, OptIdx,
10004 AMDGPUOperand::ImmTyIndexKey32bit);
10005
10006 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10007 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
10008
10009 cvtVOP3P(Inst, Operands, OptIdx);
10010}
10011
10012//===----------------------------------------------------------------------===//
10013// VOPD
10014//===----------------------------------------------------------------------===//
10015
10016ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
10017 if (!hasVOPD(getSTI()))
10018 return ParseStatus::NoMatch;
10019
10020 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
10021 SMLoc S = getLoc();
10022 lex();
10023 lex();
10024 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
10025 SMLoc OpYLoc = getLoc();
10026 StringRef OpYName;
10027 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
10028 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
10029 return ParseStatus::Success;
10030 }
10031 return Error(OpYLoc, "expected a VOPDY instruction after ::");
10032 }
10033 return ParseStatus::NoMatch;
10034}
10035
10036// Create VOPD MCInst operands using parsed assembler operands.
10037void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
10038 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10039
10040 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
10041 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
10043 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10044 return;
10045 }
10046 if (Op.isReg()) {
10047 Op.addRegOperands(Inst, 1);
10048 return;
10049 }
10050 if (Op.isImm()) {
10051 Op.addImmOperands(Inst, 1);
10052 return;
10053 }
10054 llvm_unreachable("Unhandled operand type in cvtVOPD");
10055 };
10056
10057 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
10058
10059 // MCInst operands are ordered as follows:
10060 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
10061
10062 for (auto CompIdx : VOPD::COMPONENTS) {
10063 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
10064 }
10065
10066 for (auto CompIdx : VOPD::COMPONENTS) {
10067 const auto &CInfo = InstInfo[CompIdx];
10068 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
10069 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
10070 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
10071 if (CInfo.hasSrc2Acc())
10072 addOp(CInfo.getIndexOfDstInParsedOperands());
10073 }
10074
10075 int BitOp3Idx =
10076 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
10077 if (BitOp3Idx != -1) {
10078 OptionalImmIndexMap OptIdx;
10079 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
10080 if (Op.isImm())
10081 OptIdx[Op.getImmTy()] = Operands.size() - 1;
10082
10083 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
10084 }
10085}
10086
10087//===----------------------------------------------------------------------===//
10088// dpp
10089//===----------------------------------------------------------------------===//
10090
10091bool AMDGPUOperand::isDPP8() const {
10092 return isImmTy(ImmTyDPP8);
10093}
10094
10095bool AMDGPUOperand::isDPPCtrl() const {
10096 using namespace AMDGPU::DPP;
10097
10098 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
10099 if (result) {
10100 int64_t Imm = getImm();
10101 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
10102 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
10103 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
10104 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
10105 (Imm == DppCtrl::WAVE_SHL1) ||
10106 (Imm == DppCtrl::WAVE_ROL1) ||
10107 (Imm == DppCtrl::WAVE_SHR1) ||
10108 (Imm == DppCtrl::WAVE_ROR1) ||
10109 (Imm == DppCtrl::ROW_MIRROR) ||
10110 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
10111 (Imm == DppCtrl::BCAST15) ||
10112 (Imm == DppCtrl::BCAST31) ||
10113 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
10114 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
10115 }
10116 return false;
10117}
10118
10119//===----------------------------------------------------------------------===//
10120// mAI
10121//===----------------------------------------------------------------------===//
10122
10123bool AMDGPUOperand::isBLGP() const {
10124 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
10125}
10126
10127bool AMDGPUOperand::isS16Imm() const {
10128 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
10129}
10130
10131bool AMDGPUOperand::isU16Imm() const {
10132 return isImmLiteral() && isUInt<16>(getImm());
10133}
10134
10135//===----------------------------------------------------------------------===//
10136// dim
10137//===----------------------------------------------------------------------===//
10138
10139bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
10140 // We want to allow "dim:1D" etc.,
10141 // but the initial 1 is tokenized as an integer.
10142 std::string Token;
10143 if (isToken(AsmToken::Integer)) {
10144 SMLoc Loc = getToken().getEndLoc();
10145 Token = std::string(getTokenStr());
10146 lex();
10147 if (getLoc() != Loc)
10148 return false;
10149 }
10150
10151 StringRef Suffix;
10152 if (!parseId(Suffix))
10153 return false;
10154 Token += Suffix;
10155
10156 StringRef DimId = Token;
10157 DimId.consume_front("SQ_RSRC_IMG_");
10158
10159 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
10160 if (!DimInfo)
10161 return false;
10162
10163 Encoding = DimInfo->Encoding;
10164 return true;
10165}
10166
10167ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
10168 if (!isGFX10Plus())
10169 return ParseStatus::NoMatch;
10170
10171 SMLoc S = getLoc();
10172
10173 if (!trySkipId("dim", AsmToken::Colon))
10174 return ParseStatus::NoMatch;
10175
10176 unsigned Encoding;
10177 SMLoc Loc = getLoc();
10178 if (!parseDimId(Encoding))
10179 return Error(Loc, "invalid dim value");
10180
10181 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
10182 AMDGPUOperand::ImmTyDim));
10183 return ParseStatus::Success;
10184}
10185
10186//===----------------------------------------------------------------------===//
10187// dpp
10188//===----------------------------------------------------------------------===//
10189
10190ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
10191 SMLoc S = getLoc();
10192
10193 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
10194 return ParseStatus::NoMatch;
10195
10196 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
10197
10198 int64_t Sels[8];
10199
10200 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
10201 return ParseStatus::Failure;
10202
10203 for (size_t i = 0; i < 8; ++i) {
10204 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
10205 return ParseStatus::Failure;
10206
10207 SMLoc Loc = getLoc();
10208 if (getParser().parseAbsoluteExpression(Sels[i]))
10209 return ParseStatus::Failure;
10210 if (0 > Sels[i] || 7 < Sels[i])
10211 return Error(Loc, "expected a 3-bit value");
10212 }
10213
10214 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10215 return ParseStatus::Failure;
10216
10217 unsigned DPP8 = 0;
10218 for (size_t i = 0; i < 8; ++i)
10219 DPP8 |= (Sels[i] << (i * 3));
10220
10221 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
10222 return ParseStatus::Success;
10223}
10224
10225bool
10226AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
10227 const OperandVector &Operands) {
10228 if (Ctrl == "row_newbcast")
10229 return isGFX90A();
10230
10231 if (Ctrl == "row_share" ||
10232 Ctrl == "row_xmask")
10233 return isGFX10Plus();
10234
10235 if (Ctrl == "wave_shl" ||
10236 Ctrl == "wave_shr" ||
10237 Ctrl == "wave_rol" ||
10238 Ctrl == "wave_ror" ||
10239 Ctrl == "row_bcast")
10240 return isVI() || isGFX9();
10241
10242 return Ctrl == "row_mirror" ||
10243 Ctrl == "row_half_mirror" ||
10244 Ctrl == "quad_perm" ||
10245 Ctrl == "row_shl" ||
10246 Ctrl == "row_shr" ||
10247 Ctrl == "row_ror";
10248}
10249
10250int64_t
10251AMDGPUAsmParser::parseDPPCtrlPerm() {
10252 // quad_perm:[%d,%d,%d,%d]
10253
10254 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
10255 return -1;
10256
10257 int64_t Val = 0;
10258 for (int i = 0; i < 4; ++i) {
10259 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
10260 return -1;
10261
10262 int64_t Temp;
10263 SMLoc Loc = getLoc();
10264 if (getParser().parseAbsoluteExpression(Temp))
10265 return -1;
10266 if (Temp < 0 || Temp > 3) {
10267 Error(Loc, "expected a 2-bit value");
10268 return -1;
10269 }
10270
10271 Val += (Temp << i * 2);
10272 }
10273
10274 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10275 return -1;
10276
10277 return Val;
10278}
10279
10280int64_t
10281AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10282 using namespace AMDGPU::DPP;
10283
10284 // sel:%d
10285
10286 int64_t Val;
10287 SMLoc Loc = getLoc();
10288
10289 if (getParser().parseAbsoluteExpression(Val))
10290 return -1;
10291
10292 struct DppCtrlCheck {
10293 int64_t Ctrl;
10294 int Lo;
10295 int Hi;
10296 };
10297
10298 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
10299 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10300 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10301 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10302 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10303 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10304 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10305 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10306 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10307 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10308 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10309 .Default({-1, 0, 0});
10310
10311 bool Valid;
10312 if (Check.Ctrl == -1) {
10313 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
10314 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10315 } else {
10316 Valid = Check.Lo <= Val && Val <= Check.Hi;
10317 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
10318 }
10319
10320 if (!Valid) {
10321 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
10322 return -1;
10323 }
10324
10325 return Val;
10326}
10327
10328ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10329 using namespace AMDGPU::DPP;
10330
10331 if (!isToken(AsmToken::Identifier) ||
10332 !isSupportedDPPCtrl(getTokenStr(), Operands))
10333 return ParseStatus::NoMatch;
10334
10335 SMLoc S = getLoc();
10336 int64_t Val = -1;
10337 StringRef Ctrl;
10338
10339 parseId(Ctrl);
10340
10341 if (Ctrl == "row_mirror") {
10342 Val = DppCtrl::ROW_MIRROR;
10343 } else if (Ctrl == "row_half_mirror") {
10344 Val = DppCtrl::ROW_HALF_MIRROR;
10345 } else {
10346 if (skipToken(AsmToken::Colon, "expected a colon")) {
10347 if (Ctrl == "quad_perm") {
10348 Val = parseDPPCtrlPerm();
10349 } else {
10350 Val = parseDPPCtrlSel(Ctrl);
10351 }
10352 }
10353 }
10354
10355 if (Val == -1)
10356 return ParseStatus::Failure;
10357
10358 Operands.push_back(
10359 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10360 return ParseStatus::Success;
10361}
10362
10363void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10364 bool IsDPP8) {
10365 OptionalImmIndexMap OptionalIdx;
10366 unsigned Opc = Inst.getOpcode();
10367 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10368
10369 // MAC instructions are special because they have 'old'
10370 // operand which is not tied to dst (but assumed to be).
10371 // They also have dummy unused src2_modifiers.
10372 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
10373 int Src2ModIdx =
10374 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
10375 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10376 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
10377
10378 unsigned I = 1;
10379 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10380 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10381 }
10382
10383 int Fi = 0;
10384 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
10385 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10386 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx13 ||
10387 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10388 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx13 ||
10389 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10390 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx13 ||
10391 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
10392 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx13;
10393
10394 for (unsigned E = Operands.size(); I != E; ++I) {
10395
10396 if (IsMAC) {
10397 int NumOperands = Inst.getNumOperands();
10398 if (OldIdx == NumOperands) {
10399 // Handle old operand
10400 constexpr int DST_IDX = 0;
10401 Inst.addOperand(Inst.getOperand(DST_IDX));
10402 } else if (Src2ModIdx == NumOperands) {
10403 // Add unused dummy src2_modifiers
10405 }
10406 }
10407
10408 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10409 Inst.addOperand(Inst.getOperand(0));
10410 }
10411
10412 if (IsVOP3CvtSrDpp) {
10413 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10415 Inst.addOperand(MCOperand::createReg(MCRegister()));
10416 }
10417 }
10418
10419 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10421 if (TiedTo != -1) {
10422 assert((unsigned)TiedTo < Inst.getNumOperands());
10423 // handle tied old or src2 for MAC instructions
10424 Inst.addOperand(Inst.getOperand(TiedTo));
10425 }
10426 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10427 // Add the register arguments
10428 if (IsDPP8 && Op.isDppFI()) {
10429 Fi = Op.getImm();
10430 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10431 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10432 } else if (Op.isReg()) {
10433 Op.addRegOperands(Inst, 1);
10434 } else if (Op.isImm() &&
10435 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10436 Op.addImmOperands(Inst, 1);
10437 } else if (Op.isImm()) {
10438 OptionalIdx[Op.getImmTy()] = I;
10439 } else {
10440 llvm_unreachable("unhandled operand type");
10441 }
10442 }
10443
10444 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10445 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10446 AMDGPUOperand::ImmTyClamp);
10447
10448 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
10449 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10450 Inst.addOperand(Inst.getOperand(0));
10451 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10452 AMDGPUOperand::ImmTyByteSel);
10453 }
10454
10455 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10456 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
10457
10458 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10459 cvtVOP3P(Inst, Operands, OptionalIdx);
10460 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10461 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10462 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
10463 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
10464 }
10465
10466 if (IsDPP8) {
10467 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
10468 using namespace llvm::AMDGPU::DPP;
10469 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10470 } else {
10471 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
10472 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10473 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10474 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10475
10476 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
10477 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10478 AMDGPUOperand::ImmTyDppFI);
10479 }
10480}
10481
10482void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10483 OptionalImmIndexMap OptionalIdx;
10484
10485 unsigned I = 1;
10486 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10487 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10488 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10489 }
10490
10491 int Fi = 0;
10492 for (unsigned E = Operands.size(); I != E; ++I) {
10493 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10495 if (TiedTo != -1) {
10496 assert((unsigned)TiedTo < Inst.getNumOperands());
10497 // handle tied old or src2 for MAC instructions
10498 Inst.addOperand(Inst.getOperand(TiedTo));
10499 }
10500 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10501 // Add the register arguments
10502 if (Op.isReg() && validateVccOperand(Op.getReg())) {
10503 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10504 // Skip it.
10505 continue;
10506 }
10507
10508 if (IsDPP8) {
10509 if (Op.isDPP8()) {
10510 Op.addImmOperands(Inst, 1);
10511 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10512 Op.addRegWithFPInputModsOperands(Inst, 2);
10513 } else if (Op.isDppFI()) {
10514 Fi = Op.getImm();
10515 } else if (Op.isReg()) {
10516 Op.addRegOperands(Inst, 1);
10517 } else {
10518 llvm_unreachable("Invalid operand type");
10519 }
10520 } else {
10522 Op.addRegWithFPInputModsOperands(Inst, 2);
10523 } else if (Op.isReg()) {
10524 Op.addRegOperands(Inst, 1);
10525 } else if (Op.isDPPCtrl()) {
10526 Op.addImmOperands(Inst, 1);
10527 } else if (Op.isImm()) {
10528 // Handle optional arguments
10529 OptionalIdx[Op.getImmTy()] = I;
10530 } else {
10531 llvm_unreachable("Invalid operand type");
10532 }
10533 }
10534 }
10535
10536 if (IsDPP8) {
10537 using namespace llvm::AMDGPU::DPP;
10538 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10539 } else {
10540 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10541 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10542 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10543 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
10544 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10545 AMDGPUOperand::ImmTyDppFI);
10546 }
10547 }
10548}
10549
10550//===----------------------------------------------------------------------===//
10551// sdwa
10552//===----------------------------------------------------------------------===//
10553
10554ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10555 StringRef Prefix,
10556 AMDGPUOperand::ImmTy Type) {
10557 return parseStringOrIntWithPrefix(
10558 Operands, Prefix,
10559 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10560 Type);
10561}
10562
10563ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10564 return parseStringOrIntWithPrefix(
10565 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10566 AMDGPUOperand::ImmTySDWADstUnused);
10567}
10568
10569void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10570 cvtSDWA(Inst, Operands, SDWAInstType::VOP1);
10571}
10572
10573void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10574 cvtSDWA(Inst, Operands, SDWAInstType::VOP2);
10575}
10576
10577void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10578 cvtSDWA(Inst, Operands, SDWAInstType::VOP2, true, true);
10579}
10580
10581void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10582 cvtSDWA(Inst, Operands, SDWAInstType::VOP2, false, true);
10583}
10584
10585void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10586 cvtSDWA(Inst, Operands, SDWAInstType::VOPC, isVI());
10587}
10588
10589void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10590 SDWAInstType BasicInstType, bool SkipDstVcc,
10591 bool SkipSrcVcc) {
10592 using namespace llvm::AMDGPU::SDWA;
10593
10594 OptionalImmIndexMap OptionalIdx;
10595 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10596 bool SkippedVcc = false;
10597
10598 unsigned I = 1;
10599 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10600 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10601 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10602 }
10603
10604 for (unsigned E = Operands.size(); I != E; ++I) {
10605 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10606 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10607 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10608 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10609 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10610 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10611 // Skip VCC only if we didn't skip it on previous iteration.
10612 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10613 if (BasicInstType == SDWAInstType::VOP2 &&
10614 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10615 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10616 SkippedVcc = true;
10617 continue;
10618 }
10619 if (BasicInstType == SDWAInstType::VOPC && Inst.getNumOperands() == 0) {
10620 SkippedVcc = true;
10621 continue;
10622 }
10623 }
10625 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10626 } else if (Op.isImm()) {
10627 // Handle optional arguments
10628 OptionalIdx[Op.getImmTy()] = I;
10629 } else {
10630 llvm_unreachable("Invalid operand type");
10631 }
10632 SkippedVcc = false;
10633 }
10634
10635 const unsigned Opc = Inst.getOpcode();
10636 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10637 Opc != AMDGPU::V_NOP_sdwa_vi) {
10638 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10639 switch (BasicInstType) {
10640 case SDWAInstType::VOP1:
10641 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10642 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10643 AMDGPUOperand::ImmTyClamp, 0);
10644
10645 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10646 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10647 AMDGPUOperand::ImmTyOModSI, 0);
10648
10649 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10650 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10651 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10652
10653 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10654 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10655 AMDGPUOperand::ImmTySDWADstUnused,
10656 DstUnused::UNUSED_PRESERVE);
10657
10658 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10659 break;
10660
10661 case SDWAInstType::VOP2:
10662 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10663 AMDGPUOperand::ImmTyClamp, 0);
10664
10665 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10666 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10667
10668 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10669 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10670 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10671 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10672 break;
10673
10674 case SDWAInstType::VOPC:
10675 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10676 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10677 AMDGPUOperand::ImmTyClamp, 0);
10678 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10679 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10680 break;
10681 }
10682 }
10683
10684 // special case v_mac_{f16, f32}:
10685 // it has src2 register operand that is tied to dst operand
10686 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10687 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10688 auto *it = Inst.begin();
10689 std::advance(
10690 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10691 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10692 }
10693}
10694
10695/// Force static initialization.
10696extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10701
10702#define GET_MATCHER_IMPLEMENTATION
10703#define GET_MNEMONIC_SPELL_CHECKER
10704#define GET_MNEMONIC_CHECKER
10705#include "AMDGPUGenAsmMatcher.inc"
10706
10707ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10708 unsigned MCK) {
10709 switch (MCK) {
10710 case MCK_addr64:
10711 return parseTokenOp("addr64", Operands);
10712 case MCK_done:
10713 return parseNamedBit("done", Operands, AMDGPUOperand::ImmTyDone, true);
10714 case MCK_idxen:
10715 return parseTokenOp("idxen", Operands);
10716 case MCK_lds:
10717 return parseNamedBit("lds", Operands, AMDGPUOperand::ImmTyLDS,
10718 /*IgnoreNegative=*/true);
10719 case MCK_offen:
10720 return parseTokenOp("offen", Operands);
10721 case MCK_off:
10722 return parseTokenOp("off", Operands);
10723 case MCK_row_95_en:
10724 return parseNamedBit("row_en", Operands, AMDGPUOperand::ImmTyRowEn, true);
10725 case MCK_gds:
10726 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10727 case MCK_tfe:
10728 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10729 }
10730 return tryCustomParseOperand(Operands, MCK);
10731}
10732
10733// This function should be defined after auto-generated include so that we have
10734// MatchClassKind enum defined
10735unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10736 unsigned Kind) {
10737 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10738 // But MatchInstructionImpl() expects to meet token and fails to validate
10739 // operand. This method checks if we are given immediate operand but expect to
10740 // get corresponding token.
10741 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10742 switch (Kind) {
10743 case MCK_addr64:
10744 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10745 case MCK_gds:
10746 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10747 case MCK_lds:
10748 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10749 case MCK_idxen:
10750 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10751 case MCK_offen:
10752 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10753 case MCK_tfe:
10754 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10755 case MCK_done:
10756 return Operand.isDone() ? Match_Success : Match_InvalidOperand;
10757 case MCK_row_95_en:
10758 return Operand.isRowEn() ? Match_Success : Match_InvalidOperand;
10759 case MCK_SSrc_b32:
10760 // When operands have expression values, they will return true for isToken,
10761 // because it is not possible to distinguish between a token and an
10762 // expression at parse time. MatchInstructionImpl() will always try to
10763 // match an operand as a token, when isToken returns true, and when the
10764 // name of the expression is not a valid token, the match will fail,
10765 // so we need to handle it here.
10766 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10767 case MCK_SSrc_f32:
10768 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10769 case MCK_SOPPBrTarget:
10770 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10771 case MCK_VReg32OrOff:
10772 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10773 case MCK_InterpSlot:
10774 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10775 case MCK_InterpAttr:
10776 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10777 case MCK_InterpAttrChan:
10778 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10779 case MCK_SReg_64:
10780 case MCK_SReg_64_XEXEC:
10781 // Null is defined as a 32-bit register but
10782 // it should also be enabled with 64-bit operands or larger.
10783 // The following code enables it for SReg_64 and larger operands
10784 // used as source and destination. Remaining source
10785 // operands are handled in isInlinableImm.
10786 case MCK_SReg_96:
10787 case MCK_SReg_128:
10788 case MCK_SReg_256:
10789 case MCK_SReg_512:
10790 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10791 default:
10792 return Match_InvalidOperand;
10793 }
10794}
10795
10796//===----------------------------------------------------------------------===//
10797// endpgm
10798//===----------------------------------------------------------------------===//
10799
10800ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10801 SMLoc S = getLoc();
10802 int64_t Imm = 0;
10803
10804 if (!parseExpr(Imm)) {
10805 // The operand is optional, if not present default to 0
10806 Imm = 0;
10807 }
10808
10809 if (!isUInt<16>(Imm))
10810 return Error(S, "expected a 16-bit value");
10811
10812 Operands.push_back(
10813 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10814 return ParseStatus::Success;
10815}
10816
10817bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10818
10819//===----------------------------------------------------------------------===//
10820// Split Barrier
10821//===----------------------------------------------------------------------===//
10822
10823bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
#define Success
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
Enums shared between the AMDGPU backend (LLVM) and the ELF linker (LLD) for the .amdgpu....
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
@ Default
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(GsymDataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:253
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:484
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
BinaryOperator * Mul
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5912
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:130
size_t size() const
Get the array size.
Definition ArrayRef.h:141
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
Register getReg() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
SMLoc getLoc() const
Definition MCInst.h:208
void setLoc(SMLoc loc)
Definition MCInst.h:207
unsigned getOpcode() const
Definition MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232
void addOperand(const MCOperand Op)
Definition MCInst.h:215
iterator begin()
Definition MCInst.h:227
size_t size() const
Definition MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Definition MCInstrInfo.h:80
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
void setImm(int64_t Val)
Definition MCInst.h:89
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
bool isImm() const
Definition MCInst.h:66
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
const MCExpr * getExpr() const
Definition MCInst.h:118
bool isExpr() const
Definition MCInst.h:69
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool regsOverlap(MCRegister RegA, MCRegister RegB) const
Returns true if the two registers are equal or alias each other.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
void setRedefinable(bool Value)
Mark this symbol as redefinable.
Definition MCSymbol.h:210
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Definition Register.h:100
Represents a location in source code.
Definition SMLoc.h:22
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:35
constexpr const char * getPointer() const
Definition SMLoc.h:33
constexpr bool isValid() const
Definition SMLoc.h:28
SMLoc Start
Definition SMLoc.h:49
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:685
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:270
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:655
bool contains(StringRef key) const
Check if the set contains the given key.
Definition StringSet.h:60
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:212
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
static constexpr CustomOperand Operands[]
unsigned getAddressableNumSGPRs(const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
unsigned getLocalMemorySize(const MCSubtargetInfo &STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
constexpr const char *const ModMatrixFmt[]
constexpr const char *const ModMatrixScaleFmt[]
constexpr const char *const ModMatrixScale[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
FuncInfoFlags
Per-function flags packed into INFO_FLAGS entries.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
bool isValidWMMAScaleFmtCombination(unsigned AFmt, unsigned AScale, unsigned BFmt, unsigned BScale)
@ OPERAND_REG_IMM_V2FP64
Definition SIDefines.h:220
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:238
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:206
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:213
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:229
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:226
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:231
@ OPERAND_REG_IMM_V2INT64
Definition SIDefines.h:216
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:215
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:205
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:212
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:211
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:214
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:223
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:217
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:209
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:232
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:243
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:244
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:218
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:208
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:228
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:224
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:230
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:245
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:227
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:207
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:235
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
bool isGFX1250Plus(const MCSubtargetInfo &STI)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1433
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ OPERAND_IMMEDIATE
Definition MCInstrDesc.h:61
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
@ Valid
The data is already valid.
Context & getContext() const
Definition BasicBlock.h:99
bool isNull(StringRef S)
Definition YAMLTraits.h:571
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition Error.h:1129
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
StringMapEntry< Value * > ValueName
Definition Value.h:56
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:206
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
LLVM_ABI void PrintError(const Twine &Msg)
Definition Error.cpp:104
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:299
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:362
Op::Description Desc
Target & getTheR600Target()
The target for R600 GPUs.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:31
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
#define N
RegisterKind Kind
StringLiteral Name
void initDefault(const MCSubtargetInfo &STI, MCContext &Ctx, bool InitMCExpr=true)
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
SmallVector< std::pair< MCSymbol *, std::string >, 4 > IndirectCalls
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 8 > Calls
SmallVector< FuncInfo, 8 > Funcs
SmallVector< std::pair< MCSymbol *, std::string >, 4 > TypeIds
SmallVector< std::pair< MCSymbol *, MCSymbol * >, 4 > Uses
Instruction set architecture version.
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...