LLVM 22.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/MC/MCSymbol.h"
46#include <optional>
47
48using namespace llvm;
49using namespace llvm::AMDGPU;
50using namespace llvm::amdhsa;
51
52namespace {
53
54class AMDGPUAsmParser;
55
56enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
57
58//===----------------------------------------------------------------------===//
59// Operand
60//===----------------------------------------------------------------------===//
61
62class AMDGPUOperand : public MCParsedAsmOperand {
63 enum KindTy {
64 Token,
65 Immediate,
66 Register,
67 Expression
68 } Kind;
69
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
72
73public:
74 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
75 : Kind(Kind_), AsmParser(AsmParser_) {}
76
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
78
79 struct Modifiers {
80 bool Abs = false;
81 bool Neg = false;
82 bool Sext = false;
83 LitModifier Lit = LitModifier::None;
84
85 bool hasFPModifiers() const { return Abs || Neg; }
86 bool hasIntModifiers() const { return Sext; }
87 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
88
89 int64_t getFPModifiersOperand() const {
90 int64_t Operand = 0;
91 Operand |= Abs ? SISrcMods::ABS : 0u;
92 Operand |= Neg ? SISrcMods::NEG : 0u;
93 return Operand;
94 }
95
96 int64_t getIntModifiersOperand() const {
97 int64_t Operand = 0;
98 Operand |= Sext ? SISrcMods::SEXT : 0u;
99 return Operand;
100 }
101
102 int64_t getModifiersOperand() const {
103 assert(!(hasFPModifiers() && hasIntModifiers())
104 && "fp and int modifiers should not be used simultaneously");
105 if (hasFPModifiers())
106 return getFPModifiersOperand();
107 if (hasIntModifiers())
108 return getIntModifiersOperand();
109 return 0;
110 }
111
112 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
113 };
114
115 enum ImmTy {
116 ImmTyNone,
117 ImmTyGDS,
118 ImmTyLDS,
119 ImmTyOffen,
120 ImmTyIdxen,
121 ImmTyAddr64,
122 ImmTyOffset,
123 ImmTyInstOffset,
124 ImmTyOffset0,
125 ImmTyOffset1,
126 ImmTySMEMOffsetMod,
127 ImmTyCPol,
128 ImmTyTFE,
129 ImmTyD16,
130 ImmTyClamp,
131 ImmTyOModSI,
132 ImmTySDWADstSel,
133 ImmTySDWASrc0Sel,
134 ImmTySDWASrc1Sel,
135 ImmTySDWADstUnused,
136 ImmTyDMask,
137 ImmTyDim,
138 ImmTyUNorm,
139 ImmTyDA,
140 ImmTyR128A16,
141 ImmTyA16,
142 ImmTyLWE,
143 ImmTyExpTgt,
144 ImmTyExpCompr,
145 ImmTyExpVM,
146 ImmTyFORMAT,
147 ImmTyHwreg,
148 ImmTyOff,
149 ImmTySendMsg,
150 ImmTyInterpSlot,
151 ImmTyInterpAttr,
152 ImmTyInterpAttrChan,
153 ImmTyOpSel,
154 ImmTyOpSelHi,
155 ImmTyNegLo,
156 ImmTyNegHi,
157 ImmTyIndexKey8bit,
158 ImmTyIndexKey16bit,
159 ImmTyIndexKey32bit,
160 ImmTyDPP8,
161 ImmTyDppCtrl,
162 ImmTyDppRowMask,
163 ImmTyDppBankMask,
164 ImmTyDppBoundCtrl,
165 ImmTyDppFI,
166 ImmTySwizzle,
167 ImmTyGprIdxMode,
168 ImmTyHigh,
169 ImmTyBLGP,
170 ImmTyCBSZ,
171 ImmTyABID,
172 ImmTyEndpgm,
173 ImmTyWaitVDST,
174 ImmTyWaitEXP,
175 ImmTyWaitVAVDst,
176 ImmTyWaitVMVSrc,
177 ImmTyBitOp3,
178 ImmTyMatrixAFMT,
179 ImmTyMatrixBFMT,
180 ImmTyMatrixAScale,
181 ImmTyMatrixBScale,
182 ImmTyMatrixAScaleFmt,
183 ImmTyMatrixBScaleFmt,
184 ImmTyMatrixAReuse,
185 ImmTyMatrixBReuse,
186 ImmTyScaleSel,
187 ImmTyByteSel,
188 };
189
190private:
191 struct TokOp {
192 const char *Data;
193 unsigned Length;
194 };
195
196 struct ImmOp {
197 int64_t Val;
198 ImmTy Type;
199 bool IsFPImm;
200 Modifiers Mods;
201 };
202
203 struct RegOp {
204 MCRegister RegNo;
205 Modifiers Mods;
206 };
207
208 union {
209 TokOp Tok;
210 ImmOp Imm;
211 RegOp Reg;
212 const MCExpr *Expr;
213 };
214
215 // The index of the associated MCInst operand.
216 mutable int MCOpIdx = -1;
217
218public:
219 bool isToken() const override { return Kind == Token; }
220
221 bool isSymbolRefExpr() const {
222 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
223 }
224
225 bool isImm() const override {
226 return Kind == Immediate;
227 }
228
229 bool isInlinableImm(MVT type) const;
230 bool isLiteralImm(MVT type) const;
231
232 bool isRegKind() const {
233 return Kind == Register;
234 }
235
236 bool isReg() const override {
237 return isRegKind() && !hasModifiers();
238 }
239
240 bool isRegOrInline(unsigned RCID, MVT type) const {
241 return isRegClass(RCID) || isInlinableImm(type);
242 }
243
244 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
245 return isRegOrInline(RCID, type) || isLiteralImm(type);
246 }
247
248 bool isRegOrImmWithInt16InputMods() const {
249 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
250 }
251
252 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
254 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
255 }
256
257 bool isRegOrImmWithInt32InputMods() const {
258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259 }
260
261 bool isRegOrInlineImmWithInt16InputMods() const {
262 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
263 }
264
265 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
266 return isRegOrInline(
267 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
268 }
269
270 bool isRegOrInlineImmWithInt32InputMods() const {
271 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
272 }
273
274 bool isRegOrImmWithInt64InputMods() const {
275 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
276 }
277
278 bool isRegOrImmWithFP16InputMods() const {
279 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
280 }
281
282 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
284 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
285 }
286
287 bool isRegOrImmWithFP32InputMods() const {
288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
289 }
290
291 bool isRegOrImmWithFP64InputMods() const {
292 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
293 }
294
295 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
296 return isRegOrInline(
297 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
298 }
299
300 bool isRegOrInlineImmWithFP32InputMods() const {
301 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
302 }
303
304 bool isRegOrInlineImmWithFP64InputMods() const {
305 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
306 }
307
308 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
309
310 bool isVRegWithFP32InputMods() const {
311 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
312 }
313
314 bool isVRegWithFP64InputMods() const {
315 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
316 }
317
318 bool isPackedFP16InputMods() const {
319 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
320 }
321
322 bool isPackedVGPRFP32InputMods() const {
323 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
324 }
325
326 bool isVReg() const {
327 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
328 isRegClass(AMDGPU::VReg_64RegClassID) ||
329 isRegClass(AMDGPU::VReg_96RegClassID) ||
330 isRegClass(AMDGPU::VReg_128RegClassID) ||
331 isRegClass(AMDGPU::VReg_160RegClassID) ||
332 isRegClass(AMDGPU::VReg_192RegClassID) ||
333 isRegClass(AMDGPU::VReg_256RegClassID) ||
334 isRegClass(AMDGPU::VReg_512RegClassID) ||
335 isRegClass(AMDGPU::VReg_1024RegClassID);
336 }
337
338 bool isVReg32() const {
339 return isRegClass(AMDGPU::VGPR_32RegClassID);
340 }
341
342 bool isVReg32OrOff() const {
343 return isOff() || isVReg32();
344 }
345
346 bool isNull() const {
347 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
348 }
349
350 bool isVRegWithInputMods() const;
351 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
352 template <bool IsFake16> bool isT16VRegWithInputMods() const;
353
354 bool isSDWAOperand(MVT type) const;
355 bool isSDWAFP16Operand() const;
356 bool isSDWAFP32Operand() const;
357 bool isSDWAInt16Operand() const;
358 bool isSDWAInt32Operand() const;
359
360 bool isImmTy(ImmTy ImmT) const {
361 return isImm() && Imm.Type == ImmT;
362 }
363
364 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
365
366 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
367
368 bool isImmModifier() const {
369 return isImm() && Imm.Type != ImmTyNone;
370 }
371
372 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
373 bool isDim() const { return isImmTy(ImmTyDim); }
374 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
375 bool isOff() const { return isImmTy(ImmTyOff); }
376 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
377 bool isOffen() const { return isImmTy(ImmTyOffen); }
378 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
379 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
380 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
381 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
382 bool isGDS() const { return isImmTy(ImmTyGDS); }
383 bool isLDS() const { return isImmTy(ImmTyLDS); }
384 bool isCPol() const { return isImmTy(ImmTyCPol); }
385 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
386 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
387 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
388 bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
389 bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
390 bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
391 bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
392 bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
393 bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
394 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
395 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
396 bool isTFE() const { return isImmTy(ImmTyTFE); }
397 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
398 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
399 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
400 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
401 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
402 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
403 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
404 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
405 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
406 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
407 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
408 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
409 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
410 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
411
412 bool isRegOrImm() const {
413 return isReg() || isImm();
414 }
415
416 bool isRegClass(unsigned RCID) const;
417
418 bool isInlineValue() const;
419
420 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
421 return isRegOrInline(RCID, type) && !hasModifiers();
422 }
423
424 bool isSCSrcB16() const {
425 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
426 }
427
428 bool isSCSrcV2B16() const {
429 return isSCSrcB16();
430 }
431
432 bool isSCSrc_b32() const {
433 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
434 }
435
436 bool isSCSrc_b64() const {
437 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
438 }
439
440 bool isBoolReg() const;
441
442 bool isSCSrcF16() const {
443 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
444 }
445
446 bool isSCSrcV2F16() const {
447 return isSCSrcF16();
448 }
449
450 bool isSCSrcF32() const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
452 }
453
454 bool isSCSrcF64() const {
455 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
456 }
457
458 bool isSSrc_b32() const {
459 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
460 }
461
462 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
463
464 bool isSSrcV2B16() const {
465 llvm_unreachable("cannot happen");
466 return isSSrc_b16();
467 }
468
469 bool isSSrc_b64() const {
470 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
471 // See isVSrc64().
472 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
473 (((const MCTargetAsmParser *)AsmParser)
474 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
475 isExpr());
476 }
477
478 bool isSSrc_f32() const {
479 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
480 }
481
482 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
483
484 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
485
486 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
487
488 bool isSSrcV2F16() const {
489 llvm_unreachable("cannot happen");
490 return isSSrc_f16();
491 }
492
493 bool isSSrcV2FP32() const {
494 llvm_unreachable("cannot happen");
495 return isSSrc_f32();
496 }
497
498 bool isSCSrcV2FP32() const {
499 llvm_unreachable("cannot happen");
500 return isSCSrcF32();
501 }
502
503 bool isSSrcV2INT32() const {
504 llvm_unreachable("cannot happen");
505 return isSSrc_b32();
506 }
507
508 bool isSCSrcV2INT32() const {
509 llvm_unreachable("cannot happen");
510 return isSCSrc_b32();
511 }
512
513 bool isSSrcOrLds_b32() const {
514 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
515 isLiteralImm(MVT::i32) || isExpr();
516 }
517
518 bool isVCSrc_b32() const {
519 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
520 }
521
522 bool isVCSrc_b32_Lo256() const {
523 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
524 }
525
526 bool isVCSrc_b64_Lo256() const {
527 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
528 }
529
530 bool isVCSrc_b64() const {
531 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
532 }
533
534 bool isVCSrcT_b16() const {
535 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
536 }
537
538 bool isVCSrcTB16_Lo128() const {
539 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
540 }
541
542 bool isVCSrcFake16B16_Lo128() const {
543 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
544 }
545
546 bool isVCSrc_b16() const {
547 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
548 }
549
550 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
551
552 bool isVCSrc_f32() const {
553 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
554 }
555
556 bool isVCSrc_f64() const {
557 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
558 }
559
560 bool isVCSrcTBF16() const {
561 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
562 }
563
564 bool isVCSrcT_f16() const {
565 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
566 }
567
568 bool isVCSrcT_bf16() const {
569 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
570 }
571
572 bool isVCSrcTBF16_Lo128() const {
573 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
574 }
575
576 bool isVCSrcTF16_Lo128() const {
577 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
578 }
579
580 bool isVCSrcFake16BF16_Lo128() const {
581 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
582 }
583
584 bool isVCSrcFake16F16_Lo128() const {
585 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
586 }
587
588 bool isVCSrc_bf16() const {
589 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
590 }
591
592 bool isVCSrc_f16() const {
593 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
594 }
595
596 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
597
598 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
599
600 bool isVSrc_b32() const {
601 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
602 }
603
604 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
605
606 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
607
608 bool isVSrcT_b16_Lo128() const {
609 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
610 }
611
612 bool isVSrcFake16_b16_Lo128() const {
613 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
614 }
615
616 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
617
618 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
619
620 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
621
622 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
623
624 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
625
626 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
627
628 bool isVSrc_f32() const {
629 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
630 }
631
632 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
633
634 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
635
636 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
637
638 bool isVSrcT_bf16_Lo128() const {
639 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
640 }
641
642 bool isVSrcT_f16_Lo128() const {
643 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
644 }
645
646 bool isVSrcFake16_bf16_Lo128() const {
647 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
648 }
649
650 bool isVSrcFake16_f16_Lo128() const {
651 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
652 }
653
654 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
655
656 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
657
658 bool isVSrc_v2bf16() const {
659 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
660 }
661
662 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
663
664 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
665
666 bool isVISrcB32() const {
667 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
668 }
669
670 bool isVISrcB16() const {
671 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
672 }
673
674 bool isVISrcV2B16() const {
675 return isVISrcB16();
676 }
677
678 bool isVISrcF32() const {
679 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
680 }
681
682 bool isVISrcF16() const {
683 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
684 }
685
686 bool isVISrcV2F16() const {
687 return isVISrcF16() || isVISrcB32();
688 }
689
690 bool isVISrc_64_bf16() const {
691 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
692 }
693
694 bool isVISrc_64_f16() const {
695 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
696 }
697
698 bool isVISrc_64_b32() const {
699 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
700 }
701
702 bool isVISrc_64B64() const {
703 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
704 }
705
706 bool isVISrc_64_f64() const {
707 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
708 }
709
710 bool isVISrc_64V2FP32() const {
711 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
712 }
713
714 bool isVISrc_64V2INT32() const {
715 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
716 }
717
718 bool isVISrc_256_b32() const {
719 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
720 }
721
722 bool isVISrc_256_f32() const {
723 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
724 }
725
726 bool isVISrc_256B64() const {
727 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
728 }
729
730 bool isVISrc_256_f64() const {
731 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
732 }
733
734 bool isVISrc_512_f64() const {
735 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
736 }
737
738 bool isVISrc_128B16() const {
739 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
740 }
741
742 bool isVISrc_128V2B16() const {
743 return isVISrc_128B16();
744 }
745
746 bool isVISrc_128_b32() const {
747 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
748 }
749
750 bool isVISrc_128_f32() const {
751 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
752 }
753
754 bool isVISrc_256V2FP32() const {
755 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
756 }
757
758 bool isVISrc_256V2INT32() const {
759 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
760 }
761
762 bool isVISrc_512_b32() const {
763 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
764 }
765
766 bool isVISrc_512B16() const {
767 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
768 }
769
770 bool isVISrc_512V2B16() const {
771 return isVISrc_512B16();
772 }
773
774 bool isVISrc_512_f32() const {
775 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
776 }
777
778 bool isVISrc_512F16() const {
779 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
780 }
781
782 bool isVISrc_512V2F16() const {
783 return isVISrc_512F16() || isVISrc_512_b32();
784 }
785
786 bool isVISrc_1024_b32() const {
787 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
788 }
789
790 bool isVISrc_1024B16() const {
791 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
792 }
793
794 bool isVISrc_1024V2B16() const {
795 return isVISrc_1024B16();
796 }
797
798 bool isVISrc_1024_f32() const {
799 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
800 }
801
802 bool isVISrc_1024F16() const {
803 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
804 }
805
806 bool isVISrc_1024V2F16() const {
807 return isVISrc_1024F16() || isVISrc_1024_b32();
808 }
809
810 bool isAISrcB32() const {
811 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
812 }
813
814 bool isAISrcB16() const {
815 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
816 }
817
818 bool isAISrcV2B16() const {
819 return isAISrcB16();
820 }
821
822 bool isAISrcF32() const {
823 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
824 }
825
826 bool isAISrcF16() const {
827 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
828 }
829
830 bool isAISrcV2F16() const {
831 return isAISrcF16() || isAISrcB32();
832 }
833
834 bool isAISrc_64B64() const {
835 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
836 }
837
838 bool isAISrc_64_f64() const {
839 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
840 }
841
842 bool isAISrc_128_b32() const {
843 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
844 }
845
846 bool isAISrc_128B16() const {
847 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
848 }
849
850 bool isAISrc_128V2B16() const {
851 return isAISrc_128B16();
852 }
853
854 bool isAISrc_128_f32() const {
855 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
856 }
857
858 bool isAISrc_128F16() const {
859 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
860 }
861
862 bool isAISrc_128V2F16() const {
863 return isAISrc_128F16() || isAISrc_128_b32();
864 }
865
866 bool isVISrc_128_bf16() const {
867 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
868 }
869
870 bool isVISrc_128_f16() const {
871 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
872 }
873
874 bool isVISrc_128V2F16() const {
875 return isVISrc_128_f16() || isVISrc_128_b32();
876 }
877
878 bool isAISrc_256B64() const {
879 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
880 }
881
882 bool isAISrc_256_f64() const {
883 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
884 }
885
886 bool isAISrc_512_b32() const {
887 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
888 }
889
890 bool isAISrc_512B16() const {
891 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
892 }
893
894 bool isAISrc_512V2B16() const {
895 return isAISrc_512B16();
896 }
897
898 bool isAISrc_512_f32() const {
899 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
900 }
901
902 bool isAISrc_512F16() const {
903 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
904 }
905
906 bool isAISrc_512V2F16() const {
907 return isAISrc_512F16() || isAISrc_512_b32();
908 }
909
910 bool isAISrc_1024_b32() const {
911 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
912 }
913
914 bool isAISrc_1024B16() const {
915 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
916 }
917
918 bool isAISrc_1024V2B16() const {
919 return isAISrc_1024B16();
920 }
921
922 bool isAISrc_1024_f32() const {
923 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
924 }
925
926 bool isAISrc_1024F16() const {
927 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
928 }
929
930 bool isAISrc_1024V2F16() const {
931 return isAISrc_1024F16() || isAISrc_1024_b32();
932 }
933
934 bool isKImmFP32() const {
935 return isLiteralImm(MVT::f32);
936 }
937
938 bool isKImmFP16() const {
939 return isLiteralImm(MVT::f16);
940 }
941
942 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
943
944 bool isMem() const override {
945 return false;
946 }
947
948 bool isExpr() const {
949 return Kind == Expression;
950 }
951
952 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
953
954 bool isSWaitCnt() const;
955 bool isDepCtr() const;
956 bool isSDelayALU() const;
957 bool isHwreg() const;
958 bool isSendMsg() const;
959 bool isSplitBarrier() const;
960 bool isSwizzle() const;
961 bool isSMRDOffset8() const;
962 bool isSMEMOffset() const;
963 bool isSMRDLiteralOffset() const;
964 bool isDPP8() const;
965 bool isDPPCtrl() const;
966 bool isBLGP() const;
967 bool isGPRIdxMode() const;
968 bool isS16Imm() const;
969 bool isU16Imm() const;
970 bool isEndpgm() const;
971
972 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
973 return [this, P]() { return P(*this); };
974 }
975
976 StringRef getToken() const {
977 assert(isToken());
978 return StringRef(Tok.Data, Tok.Length);
979 }
980
981 int64_t getImm() const {
982 assert(isImm());
983 return Imm.Val;
984 }
985
986 void setImm(int64_t Val) {
987 assert(isImm());
988 Imm.Val = Val;
989 }
990
991 ImmTy getImmTy() const {
992 assert(isImm());
993 return Imm.Type;
994 }
995
996 MCRegister getReg() const override {
997 assert(isRegKind());
998 return Reg.RegNo;
999 }
1000
1001 SMLoc getStartLoc() const override {
1002 return StartLoc;
1003 }
1004
1005 SMLoc getEndLoc() const override {
1006 return EndLoc;
1007 }
1008
1009 SMRange getLocRange() const {
1010 return SMRange(StartLoc, EndLoc);
1011 }
1012
1013 int getMCOpIdx() const { return MCOpIdx; }
1014
1015 Modifiers getModifiers() const {
1016 assert(isRegKind() || isImmTy(ImmTyNone));
1017 return isRegKind() ? Reg.Mods : Imm.Mods;
1018 }
1019
1020 void setModifiers(Modifiers Mods) {
1021 assert(isRegKind() || isImmTy(ImmTyNone));
1022 if (isRegKind())
1023 Reg.Mods = Mods;
1024 else
1025 Imm.Mods = Mods;
1026 }
1027
1028 bool hasModifiers() const {
1029 return getModifiers().hasModifiers();
1030 }
1031
1032 bool hasFPModifiers() const {
1033 return getModifiers().hasFPModifiers();
1034 }
1035
1036 bool hasIntModifiers() const {
1037 return getModifiers().hasIntModifiers();
1038 }
1039
1040 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1041
1042 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1043
1044 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1045
1046 void addRegOperands(MCInst &Inst, unsigned N) const;
1047
1048 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1049 if (isRegKind())
1050 addRegOperands(Inst, N);
1051 else
1052 addImmOperands(Inst, N);
1053 }
1054
1055 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1056 Modifiers Mods = getModifiers();
1057 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1058 if (isRegKind()) {
1059 addRegOperands(Inst, N);
1060 } else {
1061 addImmOperands(Inst, N, false);
1062 }
1063 }
1064
1065 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1066 assert(!hasIntModifiers());
1067 addRegOrImmWithInputModsOperands(Inst, N);
1068 }
1069
1070 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1071 assert(!hasFPModifiers());
1072 addRegOrImmWithInputModsOperands(Inst, N);
1073 }
1074
1075 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1076 Modifiers Mods = getModifiers();
1077 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1078 assert(isRegKind());
1079 addRegOperands(Inst, N);
1080 }
1081
1082 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1083 assert(!hasIntModifiers());
1084 addRegWithInputModsOperands(Inst, N);
1085 }
1086
1087 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1088 assert(!hasFPModifiers());
1089 addRegWithInputModsOperands(Inst, N);
1090 }
1091
1092 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1093 // clang-format off
1094 switch (Type) {
1095 case ImmTyNone: OS << "None"; break;
1096 case ImmTyGDS: OS << "GDS"; break;
1097 case ImmTyLDS: OS << "LDS"; break;
1098 case ImmTyOffen: OS << "Offen"; break;
1099 case ImmTyIdxen: OS << "Idxen"; break;
1100 case ImmTyAddr64: OS << "Addr64"; break;
1101 case ImmTyOffset: OS << "Offset"; break;
1102 case ImmTyInstOffset: OS << "InstOffset"; break;
1103 case ImmTyOffset0: OS << "Offset0"; break;
1104 case ImmTyOffset1: OS << "Offset1"; break;
1105 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1106 case ImmTyCPol: OS << "CPol"; break;
1107 case ImmTyIndexKey8bit: OS << "index_key"; break;
1108 case ImmTyIndexKey16bit: OS << "index_key"; break;
1109 case ImmTyIndexKey32bit: OS << "index_key"; break;
1110 case ImmTyTFE: OS << "TFE"; break;
1111 case ImmTyD16: OS << "D16"; break;
1112 case ImmTyFORMAT: OS << "FORMAT"; break;
1113 case ImmTyClamp: OS << "Clamp"; break;
1114 case ImmTyOModSI: OS << "OModSI"; break;
1115 case ImmTyDPP8: OS << "DPP8"; break;
1116 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1117 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1118 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1119 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1120 case ImmTyDppFI: OS << "DppFI"; break;
1121 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1122 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1123 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1124 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1125 case ImmTyDMask: OS << "DMask"; break;
1126 case ImmTyDim: OS << "Dim"; break;
1127 case ImmTyUNorm: OS << "UNorm"; break;
1128 case ImmTyDA: OS << "DA"; break;
1129 case ImmTyR128A16: OS << "R128A16"; break;
1130 case ImmTyA16: OS << "A16"; break;
1131 case ImmTyLWE: OS << "LWE"; break;
1132 case ImmTyOff: OS << "Off"; break;
1133 case ImmTyExpTgt: OS << "ExpTgt"; break;
1134 case ImmTyExpCompr: OS << "ExpCompr"; break;
1135 case ImmTyExpVM: OS << "ExpVM"; break;
1136 case ImmTyHwreg: OS << "Hwreg"; break;
1137 case ImmTySendMsg: OS << "SendMsg"; break;
1138 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1139 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1140 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1141 case ImmTyOpSel: OS << "OpSel"; break;
1142 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1143 case ImmTyNegLo: OS << "NegLo"; break;
1144 case ImmTyNegHi: OS << "NegHi"; break;
1145 case ImmTySwizzle: OS << "Swizzle"; break;
1146 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1147 case ImmTyHigh: OS << "High"; break;
1148 case ImmTyBLGP: OS << "BLGP"; break;
1149 case ImmTyCBSZ: OS << "CBSZ"; break;
1150 case ImmTyABID: OS << "ABID"; break;
1151 case ImmTyEndpgm: OS << "Endpgm"; break;
1152 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1153 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1154 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1155 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1156 case ImmTyBitOp3: OS << "BitOp3"; break;
1157 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1158 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1159 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1160 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1161 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1162 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1163 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1164 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1165 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1166 case ImmTyByteSel: OS << "ByteSel" ; break;
1167 }
1168 // clang-format on
1169 }
1170
1171 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1172 switch (Kind) {
1173 case Register:
1174 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1175 << " mods: " << Reg.Mods << '>';
1176 break;
1177 case Immediate:
1178 OS << '<' << getImm();
1179 if (getImmTy() != ImmTyNone) {
1180 OS << " type: "; printImmTy(OS, getImmTy());
1181 }
1182 OS << " mods: " << Imm.Mods << '>';
1183 break;
1184 case Token:
1185 OS << '\'' << getToken() << '\'';
1186 break;
1187 case Expression:
1188 OS << "<expr ";
1189 MAI.printExpr(OS, *Expr);
1190 OS << '>';
1191 break;
1192 }
1193 }
1194
1195 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1196 int64_t Val, SMLoc Loc,
1197 ImmTy Type = ImmTyNone,
1198 bool IsFPImm = false) {
1199 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1200 Op->Imm.Val = Val;
1201 Op->Imm.IsFPImm = IsFPImm;
1202 Op->Imm.Type = Type;
1203 Op->Imm.Mods = Modifiers();
1204 Op->StartLoc = Loc;
1205 Op->EndLoc = Loc;
1206 return Op;
1207 }
1208
1209 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1210 StringRef Str, SMLoc Loc,
1211 bool HasExplicitEncodingSize = true) {
1212 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1213 Res->Tok.Data = Str.data();
1214 Res->Tok.Length = Str.size();
1215 Res->StartLoc = Loc;
1216 Res->EndLoc = Loc;
1217 return Res;
1218 }
1219
1220 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1221 MCRegister Reg, SMLoc S, SMLoc E) {
1222 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1223 Op->Reg.RegNo = Reg;
1224 Op->Reg.Mods = Modifiers();
1225 Op->StartLoc = S;
1226 Op->EndLoc = E;
1227 return Op;
1228 }
1229
1230 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1231 const class MCExpr *Expr, SMLoc S) {
1232 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1233 Op->Expr = Expr;
1234 Op->StartLoc = S;
1235 Op->EndLoc = S;
1236 return Op;
1237 }
1238};
1239
1240raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1241 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1242 return OS;
1243}
1244
1245//===----------------------------------------------------------------------===//
1246// AsmParser
1247//===----------------------------------------------------------------------===//
1248
1249// TODO: define GET_SUBTARGET_FEATURE_NAME
1250#define GET_REGISTER_MATCHER
1251#include "AMDGPUGenAsmMatcher.inc"
1252#undef GET_REGISTER_MATCHER
1253#undef GET_SUBTARGET_FEATURE_NAME
1254
1255// Holds info related to the current kernel, e.g. count of SGPRs used.
1256// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1257// .amdgpu_hsa_kernel or at EOF.
1258class KernelScopeInfo {
1259 int SgprIndexUnusedMin = -1;
1260 int VgprIndexUnusedMin = -1;
1261 int AgprIndexUnusedMin = -1;
1262 MCContext *Ctx = nullptr;
1263 MCSubtargetInfo const *MSTI = nullptr;
1264
1265 void usesSgprAt(int i) {
1266 if (i >= SgprIndexUnusedMin) {
1267 SgprIndexUnusedMin = ++i;
1268 if (Ctx) {
1269 MCSymbol* const Sym =
1270 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1271 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1272 }
1273 }
1274 }
1275
1276 void usesVgprAt(int i) {
1277 if (i >= VgprIndexUnusedMin) {
1278 VgprIndexUnusedMin = ++i;
1279 if (Ctx) {
1280 MCSymbol* const Sym =
1281 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1282 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1283 VgprIndexUnusedMin);
1284 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1285 }
1286 }
1287 }
1288
1289 void usesAgprAt(int i) {
1290 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1291 if (!hasMAIInsts(*MSTI))
1292 return;
1293
1294 if (i >= AgprIndexUnusedMin) {
1295 AgprIndexUnusedMin = ++i;
1296 if (Ctx) {
1297 MCSymbol* const Sym =
1298 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1299 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1300
1301 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1302 MCSymbol* const vSym =
1303 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1304 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1305 VgprIndexUnusedMin);
1306 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1307 }
1308 }
1309 }
1310
1311public:
1312 KernelScopeInfo() = default;
1313
1314 void initialize(MCContext &Context) {
1315 Ctx = &Context;
1316 MSTI = Ctx->getSubtargetInfo();
1317
1318 usesSgprAt(SgprIndexUnusedMin = -1);
1319 usesVgprAt(VgprIndexUnusedMin = -1);
1320 if (hasMAIInsts(*MSTI)) {
1321 usesAgprAt(AgprIndexUnusedMin = -1);
1322 }
1323 }
1324
1325 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1326 unsigned RegWidth) {
1327 switch (RegKind) {
1328 case IS_SGPR:
1329 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1330 break;
1331 case IS_AGPR:
1332 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1333 break;
1334 case IS_VGPR:
1335 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1336 break;
1337 default:
1338 break;
1339 }
1340 }
1341};
1342
1343class AMDGPUAsmParser : public MCTargetAsmParser {
1344 MCAsmParser &Parser;
1345
1346 unsigned ForcedEncodingSize = 0;
1347 bool ForcedDPP = false;
1348 bool ForcedSDWA = false;
1349 KernelScopeInfo KernelScope;
1350
1351 /// @name Auto-generated Match Functions
1352 /// {
1353
1354#define GET_ASSEMBLER_HEADER
1355#include "AMDGPUGenAsmMatcher.inc"
1356
1357 /// }
1358
1359private:
1360 void createConstantSymbol(StringRef Id, int64_t Val);
1361
1362 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1363 bool OutOfRangeError(SMRange Range);
1364 /// Calculate VGPR/SGPR blocks required for given target, reserved
1365 /// registers, and user-specified NextFreeXGPR values.
1366 ///
1367 /// \param Features [in] Target features, used for bug corrections.
1368 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1369 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1370 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1371 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1372 /// descriptor field, if valid.
1373 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1374 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1375 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1376 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1377 /// \param VGPRBlocks [out] Result VGPR block count.
1378 /// \param SGPRBlocks [out] Result SGPR block count.
1379 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1380 const MCExpr *FlatScrUsed, bool XNACKUsed,
1381 std::optional<bool> EnableWavefrontSize32,
1382 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1383 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1384 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1385 bool ParseDirectiveAMDGCNTarget();
1386 bool ParseDirectiveAMDHSACodeObjectVersion();
1387 bool ParseDirectiveAMDHSAKernel();
1388 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1389 bool ParseDirectiveAMDKernelCodeT();
1390 // TODO: Possibly make subtargetHasRegister const.
1391 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1392 bool ParseDirectiveAMDGPUHsaKernel();
1393
1394 bool ParseDirectiveISAVersion();
1395 bool ParseDirectiveHSAMetadata();
1396 bool ParseDirectivePALMetadataBegin();
1397 bool ParseDirectivePALMetadata();
1398 bool ParseDirectiveAMDGPULDS();
1399
1400 /// Common code to parse out a block of text (typically YAML) between start and
1401 /// end directives.
1402 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1403 const char *AssemblerDirectiveEnd,
1404 std::string &CollectString);
1405
1406 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1407 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1408 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1409 unsigned &RegNum, unsigned &RegWidth,
1410 bool RestoreOnFailure = false);
1411 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1412 unsigned &RegNum, unsigned &RegWidth,
1414 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1415 unsigned &RegWidth,
1417 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1418 unsigned &RegWidth,
1420 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1421 unsigned &RegWidth,
1423 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1424 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1425 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1426
1427 bool isRegister();
1428 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1429 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1430 void initializeGprCountSymbol(RegisterKind RegKind);
1431 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1432 unsigned RegWidth);
1433 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1434 bool IsAtomic);
1435
1436public:
1437 enum OperandMode {
1438 OperandMode_Default,
1439 OperandMode_NSA,
1440 };
1441
1442 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1443
1444 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1445 const MCInstrInfo &MII,
1446 const MCTargetOptions &Options)
1447 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1449
1450 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1451
1452 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1453 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1454 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1455 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1456 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1457 } else {
1458 createConstantSymbol(".option.machine_version_major", ISA.Major);
1459 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1460 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1461 }
1462 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1463 initializeGprCountSymbol(IS_VGPR);
1464 initializeGprCountSymbol(IS_SGPR);
1465 } else
1466 KernelScope.initialize(getContext());
1467
1468 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1469 createConstantSymbol(Symbol, Code);
1470
1471 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1472 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1473 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1474 }
1475
1476 bool hasMIMG_R128() const {
1477 return AMDGPU::hasMIMG_R128(getSTI());
1478 }
1479
1480 bool hasPackedD16() const {
1481 return AMDGPU::hasPackedD16(getSTI());
1482 }
1483
1484 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1485
1486 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1487
1488 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1489
1490 bool isSI() const {
1491 return AMDGPU::isSI(getSTI());
1492 }
1493
1494 bool isCI() const {
1495 return AMDGPU::isCI(getSTI());
1496 }
1497
1498 bool isVI() const {
1499 return AMDGPU::isVI(getSTI());
1500 }
1501
1502 bool isGFX9() const {
1503 return AMDGPU::isGFX9(getSTI());
1504 }
1505
1506 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1507 bool isGFX90A() const {
1508 return AMDGPU::isGFX90A(getSTI());
1509 }
1510
1511 bool isGFX940() const {
1512 return AMDGPU::isGFX940(getSTI());
1513 }
1514
1515 bool isGFX9Plus() const {
1516 return AMDGPU::isGFX9Plus(getSTI());
1517 }
1518
1519 bool isGFX10() const {
1520 return AMDGPU::isGFX10(getSTI());
1521 }
1522
1523 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1524
1525 bool isGFX11() const {
1526 return AMDGPU::isGFX11(getSTI());
1527 }
1528
1529 bool isGFX11Plus() const {
1530 return AMDGPU::isGFX11Plus(getSTI());
1531 }
1532
1533 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1534
1535 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1536
1537 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1538
1539 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1540
1541 bool isGFX10_BEncoding() const {
1542 return AMDGPU::isGFX10_BEncoding(getSTI());
1543 }
1544
1545 bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
1546
1547 bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
1548
1549 bool hasInv2PiInlineImm() const {
1550 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1551 }
1552
1553 bool has64BitLiterals() const {
1554 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1555 }
1556
1557 bool hasFlatOffsets() const {
1558 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1559 }
1560
1561 bool hasTrue16Insts() const {
1562 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1563 }
1564
1565 bool hasArchitectedFlatScratch() const {
1566 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1567 }
1568
1569 bool hasSGPR102_SGPR103() const {
1570 return !isVI() && !isGFX9();
1571 }
1572
1573 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1574
1575 bool hasIntClamp() const {
1576 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1577 }
1578
1579 bool hasPartialNSAEncoding() const {
1580 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1581 }
1582
1583 bool hasGloballyAddressableScratch() const {
1584 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1585 }
1586
1587 unsigned getNSAMaxSize(bool HasSampler = false) const {
1588 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1589 }
1590
1591 unsigned getMaxNumUserSGPRs() const {
1592 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1593 }
1594
1595 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1596
1597 AMDGPUTargetStreamer &getTargetStreamer() {
1598 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1599 return static_cast<AMDGPUTargetStreamer &>(TS);
1600 }
1601
1602 MCContext &getContext() const {
1603 // We need this const_cast because for some reason getContext() is not const
1604 // in MCAsmParser.
1605 return const_cast<AMDGPUAsmParser *>(this)->MCTargetAsmParser::getContext();
1606 }
1607
1608 const MCRegisterInfo *getMRI() const {
1609 return getContext().getRegisterInfo();
1610 }
1611
1612 const MCInstrInfo *getMII() const {
1613 return &MII;
1614 }
1615
1616 // FIXME: This should not be used. Instead, should use queries derived from
1617 // getAvailableFeatures().
1618 const FeatureBitset &getFeatureBits() const {
1619 return getSTI().getFeatureBits();
1620 }
1621
1622 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1623 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1624 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1625
1626 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1627 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1628 bool isForcedDPP() const { return ForcedDPP; }
1629 bool isForcedSDWA() const { return ForcedSDWA; }
1630 ArrayRef<unsigned> getMatchedVariants() const;
1631 StringRef getMatchedVariantName() const;
1632
1633 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1634 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1635 bool RestoreOnFailure);
1636 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1637 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1638 SMLoc &EndLoc) override;
1639 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1640 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1641 unsigned Kind) override;
1642 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1643 OperandVector &Operands, MCStreamer &Out,
1644 uint64_t &ErrorInfo,
1645 bool MatchingInlineAsm) override;
1646 bool ParseDirective(AsmToken DirectiveID) override;
1647 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1648 OperandMode Mode = OperandMode_Default);
1649 StringRef parseMnemonicSuffix(StringRef Name);
1650 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1651 SMLoc NameLoc, OperandVector &Operands) override;
1652 //bool ProcessInstruction(MCInst &Inst);
1653
1654 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1655
1656 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1657
1658 ParseStatus
1659 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1660 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1661 std::function<bool(int64_t &)> ConvertResult = nullptr);
1662
1663 ParseStatus parseOperandArrayWithPrefix(
1664 const char *Prefix, OperandVector &Operands,
1665 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1666 bool (*ConvertResult)(int64_t &) = nullptr);
1667
1668 ParseStatus
1669 parseNamedBit(StringRef Name, OperandVector &Operands,
1670 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1671 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1672 ParseStatus parseCPol(OperandVector &Operands);
1673 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1674 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1675 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1676 SMLoc &StringLoc);
1677 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1678 StringRef Name,
1679 ArrayRef<const char *> Ids,
1680 int64_t &IntVal);
1681 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1682 StringRef Name,
1683 ArrayRef<const char *> Ids,
1684 AMDGPUOperand::ImmTy Type);
1685
1686 bool isModifier();
1687 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1688 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1689 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1690 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1691 bool parseSP3NegModifier();
1692 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1693 LitModifier Lit = LitModifier::None);
1694 ParseStatus parseReg(OperandVector &Operands);
1695 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1696 LitModifier Lit = LitModifier::None);
1697 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1698 bool AllowImm = true);
1699 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1700 bool AllowImm = true);
1701 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1702 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1703 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1704 ParseStatus tryParseIndexKey(OperandVector &Operands,
1705 AMDGPUOperand::ImmTy ImmTy);
1706 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1707 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1708 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1709 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1710 AMDGPUOperand::ImmTy Type);
1711 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1712 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1713 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1714 AMDGPUOperand::ImmTy Type);
1715 ParseStatus parseMatrixAScale(OperandVector &Operands);
1716 ParseStatus parseMatrixBScale(OperandVector &Operands);
1717 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1718 AMDGPUOperand::ImmTy Type);
1719 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1720 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1721
1722 ParseStatus parseDfmtNfmt(int64_t &Format);
1723 ParseStatus parseUfmt(int64_t &Format);
1724 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1725 int64_t &Format);
1726 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1727 int64_t &Format);
1728 ParseStatus parseFORMAT(OperandVector &Operands);
1729 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1730 ParseStatus parseNumericFormat(int64_t &Format);
1731 ParseStatus parseFlatOffset(OperandVector &Operands);
1732 ParseStatus parseR128A16(OperandVector &Operands);
1733 ParseStatus parseBLGP(OperandVector &Operands);
1734 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1735 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1736
1737 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1738
1739 bool parseCnt(int64_t &IntVal);
1740 ParseStatus parseSWaitCnt(OperandVector &Operands);
1741
1742 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1743 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1744 ParseStatus parseDepCtr(OperandVector &Operands);
1745
1746 bool parseDelay(int64_t &Delay);
1747 ParseStatus parseSDelayALU(OperandVector &Operands);
1748
1749 ParseStatus parseHwreg(OperandVector &Operands);
1750
1751private:
1752 struct OperandInfoTy {
1753 SMLoc Loc;
1754 int64_t Val;
1755 bool IsSymbolic = false;
1756 bool IsDefined = false;
1757
1758 OperandInfoTy(int64_t Val) : Val(Val) {}
1759 };
1760
1761 struct StructuredOpField : OperandInfoTy {
1762 StringLiteral Id;
1763 StringLiteral Desc;
1764 unsigned Width;
1765 bool IsDefined = false;
1766
1767 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1768 int64_t Default)
1769 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1770 virtual ~StructuredOpField() = default;
1771
1772 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1773 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1774 return false;
1775 }
1776
1777 virtual bool validate(AMDGPUAsmParser &Parser) const {
1778 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1779 return Error(Parser, "not supported on this GPU");
1780 if (!isUIntN(Width, Val))
1781 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1782 return true;
1783 }
1784 };
1785
1786 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1787 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1788
1789 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1790 bool validateSendMsg(const OperandInfoTy &Msg,
1791 const OperandInfoTy &Op,
1792 const OperandInfoTy &Stream);
1793
1794 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1795 OperandInfoTy &Width);
1796
1797 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1798
1799 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1800 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1801 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1802
1803 SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1804 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1805 const OperandVector &Operands) const;
1806 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1807 const OperandVector &Operands) const;
1808 SMLoc getInstLoc(const OperandVector &Operands) const;
1809
1810 bool validateInstruction(const MCInst &Inst, SMLoc IDLoc,
1811 const OperandVector &Operands);
1812 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1813 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1814 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1815 bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1816 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1817 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1818 bool AsVOPD3);
1819 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1820 bool tryVOPD(const MCInst &Inst);
1821 bool tryVOPD3(const MCInst &Inst);
1822 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1823
1824 bool validateIntClampSupported(const MCInst &Inst);
1825 bool validateMIMGAtomicDMask(const MCInst &Inst);
1826 bool validateMIMGGatherDMask(const MCInst &Inst);
1827 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1828 bool validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc);
1829 bool validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc);
1830 bool validateMIMGD16(const MCInst &Inst);
1831 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1832 bool validateTensorR128(const MCInst &Inst);
1833 bool validateMIMGMSAA(const MCInst &Inst);
1834 bool validateOpSel(const MCInst &Inst);
1835 bool validateTrue16OpSel(const MCInst &Inst);
1836 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1837 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1838 bool validateVccOperand(MCRegister Reg) const;
1839 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1840 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1841 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1842 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1843 bool validateAGPRLdSt(const MCInst &Inst) const;
1844 bool validateVGPRAlign(const MCInst &Inst) const;
1845 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1846 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1847 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1848 bool validateDivScale(const MCInst &Inst);
1849 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1850 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1851 SMLoc IDLoc);
1852 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1853 const unsigned CPol);
1854 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1855 bool validateSetVgprMSB(const MCInst &Inst, const OperandVector &Operands);
1856 bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1857 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1858 unsigned getConstantBusLimit(unsigned Opcode) const;
1859 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1860 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1861 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1862
1863 bool isSupportedMnemo(StringRef Mnemo,
1864 const FeatureBitset &FBS);
1865 bool isSupportedMnemo(StringRef Mnemo,
1866 const FeatureBitset &FBS,
1867 ArrayRef<unsigned> Variants);
1868 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1869
1870 bool isId(const StringRef Id) const;
1871 bool isId(const AsmToken &Token, const StringRef Id) const;
1872 bool isToken(const AsmToken::TokenKind Kind) const;
1873 StringRef getId() const;
1874 bool trySkipId(const StringRef Id);
1875 bool trySkipId(const StringRef Pref, const StringRef Id);
1876 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1877 bool trySkipToken(const AsmToken::TokenKind Kind);
1878 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1879 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1880 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1881
1882 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1883 AsmToken::TokenKind getTokenKind() const;
1884 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1886 StringRef getTokenStr() const;
1887 AsmToken peekToken(bool ShouldSkipSpace = true);
1888 AsmToken getToken() const;
1889 SMLoc getLoc() const;
1890 void lex();
1891
1892public:
1893 void onBeginOfFile() override;
1894 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1895
1896 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1897
1898 ParseStatus parseExpTgt(OperandVector &Operands);
1899 ParseStatus parseSendMsg(OperandVector &Operands);
1900 ParseStatus parseInterpSlot(OperandVector &Operands);
1901 ParseStatus parseInterpAttr(OperandVector &Operands);
1902 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1903 ParseStatus parseBoolReg(OperandVector &Operands);
1904
1905 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1906 const unsigned MaxVal, const Twine &ErrMsg,
1907 SMLoc &Loc);
1908 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1909 const unsigned MinVal,
1910 const unsigned MaxVal,
1911 const StringRef ErrMsg);
1912 ParseStatus parseSwizzle(OperandVector &Operands);
1913 bool parseSwizzleOffset(int64_t &Imm);
1914 bool parseSwizzleMacro(int64_t &Imm);
1915 bool parseSwizzleQuadPerm(int64_t &Imm);
1916 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1917 bool parseSwizzleBroadcast(int64_t &Imm);
1918 bool parseSwizzleSwap(int64_t &Imm);
1919 bool parseSwizzleReverse(int64_t &Imm);
1920 bool parseSwizzleFFT(int64_t &Imm);
1921 bool parseSwizzleRotate(int64_t &Imm);
1922
1923 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1924 int64_t parseGPRIdxMacro();
1925
1926 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1927 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1928
1929 ParseStatus parseOModSI(OperandVector &Operands);
1930
1931 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1932 OptionalImmIndexMap &OptionalIdx);
1933 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1934 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1935 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1936 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1937 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1938
1939 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1940 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1941 OptionalImmIndexMap &OptionalIdx);
1942 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1943 OptionalImmIndexMap &OptionalIdx);
1944
1945 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1946 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1947
1948 bool parseDimId(unsigned &Encoding);
1949 ParseStatus parseDim(OperandVector &Operands);
1950 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1951 ParseStatus parseDPP8(OperandVector &Operands);
1952 ParseStatus parseDPPCtrl(OperandVector &Operands);
1953 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1954 int64_t parseDPPCtrlSel(StringRef Ctrl);
1955 int64_t parseDPPCtrlPerm();
1956 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1957 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1958 cvtDPP(Inst, Operands, true);
1959 }
1960 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1961 bool IsDPP8 = false);
1962 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1963 cvtVOP3DPP(Inst, Operands, true);
1964 }
1965
1966 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1967 AMDGPUOperand::ImmTy Type);
1968 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1969 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1970 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1971 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1972 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1973 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1974 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1975 uint64_t BasicInstType,
1976 bool SkipDstVcc = false,
1977 bool SkipSrcVcc = false);
1978
1979 ParseStatus parseEndpgm(OperandVector &Operands);
1980
1981 ParseStatus parseVOPD(OperandVector &Operands);
1982};
1983
1984} // end anonymous namespace
1985
1986// May be called with integer type with equivalent bitwidth.
1987static const fltSemantics *getFltSemantics(unsigned Size) {
1988 switch (Size) {
1989 case 4:
1990 return &APFloat::IEEEsingle();
1991 case 8:
1992 return &APFloat::IEEEdouble();
1993 case 2:
1994 return &APFloat::IEEEhalf();
1995 default:
1996 llvm_unreachable("unsupported fp type");
1997 }
1998}
1999
2001 return getFltSemantics(VT.getSizeInBits() / 8);
2002}
2003
2005 switch (OperandType) {
2006 // When floating-point immediate is used as operand of type i16, the 32-bit
2007 // representation of the constant truncated to the 16 LSBs should be used.
2022 return &APFloat::IEEEsingle();
2029 return &APFloat::IEEEdouble();
2036 return &APFloat::IEEEhalf();
2041 return &APFloat::BFloat();
2042 default:
2043 llvm_unreachable("unsupported fp type");
2044 }
2045}
2046
2047//===----------------------------------------------------------------------===//
2048// Operand
2049//===----------------------------------------------------------------------===//
2050
2051static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2052 bool Lost;
2053
2054 // Convert literal to single precision
2057 &Lost);
2058 // We allow precision lost but not overflow or underflow
2059 if (Status != APFloat::opOK &&
2060 Lost &&
2061 ((Status & APFloat::opOverflow) != 0 ||
2062 (Status & APFloat::opUnderflow) != 0)) {
2063 return false;
2064 }
2065
2066 return true;
2067}
2068
2069static bool isSafeTruncation(int64_t Val, unsigned Size) {
2070 return isUIntN(Size, Val) || isIntN(Size, Val);
2071}
2072
2073static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2074 if (VT.getScalarType() == MVT::i16)
2075 return isInlinableLiteral32(Val, HasInv2Pi);
2076
2077 if (VT.getScalarType() == MVT::f16)
2078 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2079
2080 assert(VT.getScalarType() == MVT::bf16);
2081
2082 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2083}
2084
2085bool AMDGPUOperand::isInlinableImm(MVT type) const {
2086
2087 // This is a hack to enable named inline values like
2088 // shared_base with both 32-bit and 64-bit operands.
2089 // Note that these values are defined as
2090 // 32-bit operands only.
2091 if (isInlineValue()) {
2092 return true;
2093 }
2094
2095 if (!isImmTy(ImmTyNone)) {
2096 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2097 return false;
2098 }
2099 // TODO: We should avoid using host float here. It would be better to
2100 // check the float bit values which is what a few other places do.
2101 // We've had bot failures before due to weird NaN support on mips hosts.
2102
2103 APInt Literal(64, Imm.Val);
2104
2105 if (Imm.IsFPImm) { // We got fp literal token
2106 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2108 AsmParser->hasInv2PiInlineImm());
2109 }
2110
2111 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2112 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2113 return false;
2114
2115 if (type.getScalarSizeInBits() == 16) {
2116 bool Lost = false;
2117 switch (type.getScalarType().SimpleTy) {
2118 default:
2119 llvm_unreachable("unknown 16-bit type");
2120 case MVT::bf16:
2121 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2122 &Lost);
2123 break;
2124 case MVT::f16:
2125 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2126 &Lost);
2127 break;
2128 case MVT::i16:
2129 FPLiteral.convert(APFloatBase::IEEEsingle(),
2130 APFloat::rmNearestTiesToEven, &Lost);
2131 break;
2132 }
2133 // We need to use 32-bit representation here because when a floating-point
2134 // inline constant is used as an i16 operand, its 32-bit representation
2135 // representation will be used. We will need the 32-bit value to check if
2136 // it is FP inline constant.
2137 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2138 return isInlineableLiteralOp16(ImmVal, type,
2139 AsmParser->hasInv2PiInlineImm());
2140 }
2141
2142 // Check if single precision literal is inlinable
2144 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2145 AsmParser->hasInv2PiInlineImm());
2146 }
2147
2148 // We got int literal token.
2149 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2151 AsmParser->hasInv2PiInlineImm());
2152 }
2153
2154 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2155 return false;
2156 }
2157
2158 if (type.getScalarSizeInBits() == 16) {
2160 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2161 type, AsmParser->hasInv2PiInlineImm());
2162 }
2163
2165 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2166 AsmParser->hasInv2PiInlineImm());
2167}
2168
2169bool AMDGPUOperand::isLiteralImm(MVT type) const {
2170 // Check that this immediate can be added as literal
2171 if (!isImmTy(ImmTyNone)) {
2172 return false;
2173 }
2174
2175 bool Allow64Bit =
2176 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2177
2178 if (!Imm.IsFPImm) {
2179 // We got int literal token.
2180
2181 if (type == MVT::f64 && hasFPModifiers()) {
2182 // Cannot apply fp modifiers to int literals preserving the same semantics
2183 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2184 // disable these cases.
2185 return false;
2186 }
2187
2188 unsigned Size = type.getSizeInBits();
2189 if (Size == 64) {
2190 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2191 return true;
2192 Size = 32;
2193 }
2194
2195 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2196 // types.
2197 return isSafeTruncation(Imm.Val, Size);
2198 }
2199
2200 // We got fp literal token
2201 if (type == MVT::f64) { // Expected 64-bit fp operand
2202 // We would set low 64-bits of literal to zeroes but we accept this literals
2203 return true;
2204 }
2205
2206 if (type == MVT::i64) { // Expected 64-bit int operand
2207 // We don't allow fp literals in 64-bit integer instructions. It is
2208 // unclear how we should encode them.
2209 return false;
2210 }
2211
2212 // We allow fp literals with f16x2 operands assuming that the specified
2213 // literal goes into the lower half and the upper half is zero. We also
2214 // require that the literal may be losslessly converted to f16.
2215 //
2216 // For i16x2 operands, we assume that the specified literal is encoded as a
2217 // single-precision float. This is pretty odd, but it matches SP3 and what
2218 // happens in hardware.
2219 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2220 : (type == MVT::v2i16) ? MVT::f32
2221 : (type == MVT::v2f32) ? MVT::f32
2222 : type;
2223
2224 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2225 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2226}
2227
2228bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2229 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2230}
2231
2232bool AMDGPUOperand::isVRegWithInputMods() const {
2233 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2234 // GFX90A allows DPP on 64-bit operands.
2235 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2236 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2237}
2238
2239template <bool IsFake16>
2240bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2241 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2242 : AMDGPU::VGPR_16_Lo128RegClassID);
2243}
2244
2245template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2246 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2247 : AMDGPU::VGPR_16RegClassID);
2248}
2249
2250bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2251 if (AsmParser->isVI())
2252 return isVReg32();
2253 if (AsmParser->isGFX9Plus())
2254 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2255 return false;
2256}
2257
2258bool AMDGPUOperand::isSDWAFP16Operand() const {
2259 return isSDWAOperand(MVT::f16);
2260}
2261
2262bool AMDGPUOperand::isSDWAFP32Operand() const {
2263 return isSDWAOperand(MVT::f32);
2264}
2265
2266bool AMDGPUOperand::isSDWAInt16Operand() const {
2267 return isSDWAOperand(MVT::i16);
2268}
2269
2270bool AMDGPUOperand::isSDWAInt32Operand() const {
2271 return isSDWAOperand(MVT::i32);
2272}
2273
2274bool AMDGPUOperand::isBoolReg() const {
2275 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2276 (AsmParser->isWave32() && isSCSrc_b32()));
2277}
2278
2279uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2280{
2281 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2282 assert(Size == 2 || Size == 4 || Size == 8);
2283
2284 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2285
2286 if (Imm.Mods.Abs) {
2287 Val &= ~FpSignMask;
2288 }
2289 if (Imm.Mods.Neg) {
2290 Val ^= FpSignMask;
2291 }
2292
2293 return Val;
2294}
2295
2296void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2297 MCOpIdx = Inst.getNumOperands();
2298
2299 if (isExpr()) {
2301 return;
2302 }
2303
2304 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2305 Inst.getNumOperands())) {
2306 addLiteralImmOperand(Inst, Imm.Val,
2307 ApplyModifiers &
2308 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2309 } else {
2310 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2312 }
2313}
2314
2315void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2316 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2317 auto OpNum = Inst.getNumOperands();
2318 // Check that this operand accepts literals
2319 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2320
2321 if (ApplyModifiers) {
2322 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2323 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2324 Val = applyInputFPModifiers(Val, Size);
2325 }
2326
2327 APInt Literal(64, Val);
2328 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2329
2330 bool CanUse64BitLiterals =
2331 AsmParser->has64BitLiterals() &&
2332 !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
2333 MCContext &Ctx = AsmParser->getContext();
2334
2335 if (Imm.IsFPImm) { // We got fp literal token
2336 switch (OpTy) {
2342 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2343 AsmParser->hasInv2PiInlineImm())) {
2344 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2345 return;
2346 }
2347
2348 // Non-inlineable
2349 if (AMDGPU::isSISrcFPOperand(InstDesc,
2350 OpNum)) { // Expected 64-bit fp operand
2351 bool HasMandatoryLiteral =
2352 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2353 // For fp operands we check if low 32 bits are zeros
2354 if (Literal.getLoBits(32) != 0 &&
2355 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2356 !HasMandatoryLiteral) {
2357 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2358 Inst.getLoc(),
2359 "Can't encode literal as exact 64-bit floating-point operand. "
2360 "Low 32-bits will be set to zero");
2361 Val &= 0xffffffff00000000u;
2362 }
2363
2364 if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 ||
2367 CanUse64BitLiterals && Lo_32(Val) != 0) {
2369 AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx)));
2370 } else {
2372 }
2373 return;
2374 }
2375
2376 // We don't allow fp literals in 64-bit integer instructions. It is
2377 // unclear how we should encode them. This case should be checked earlier
2378 // in predicate methods (isLiteralImm())
2379 llvm_unreachable("fp literal in 64-bit integer instruction.");
2380
2382 if (CanUse64BitLiterals && Lo_32(Val) != 0) {
2384 AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx)));
2385 } else {
2387 }
2388 return;
2389
2394 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2395 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2396 // loss of precision. The constant represents ideomatic fp32 value of
2397 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2398 // bits. Prevent rounding below.
2399 Inst.addOperand(MCOperand::createImm(0x3e22));
2400 return;
2401 }
2402 [[fallthrough]];
2403
2424 bool lost;
2425 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2426 // Convert literal to single precision
2427 FPLiteral.convert(*getOpFltSemantics(OpTy),
2428 APFloat::rmNearestTiesToEven, &lost);
2429 // We allow precision lost but not overflow or underflow. This should be
2430 // checked earlier in isLiteralImm()
2431
2432 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2433 Inst.addOperand(MCOperand::createImm(ImmVal));
2434 return;
2435 }
2436 default:
2437 llvm_unreachable("invalid operand size");
2438 }
2439
2440 return;
2441 }
2442
2443 // We got int literal token.
2444 // Only sign extend inline immediates.
2445 switch (OpTy) {
2460 return;
2461
2464 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2466 return;
2467 }
2468
2469 // When the 32 MSBs are not zero (effectively means it can't be safely
2470 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2471 // the lit modifier is explicitly used, we need to truncate it to the 32
2472 // LSBs.
2473 if (!AsmParser->has64BitLiterals() ||
2474 getModifiers().Lit == LitModifier::Lit)
2475 Val = Lo_32(Val);
2476
2477 if (CanUse64BitLiterals && (!isInt<32>(Val) || !isUInt<32>(Val))) {
2479 AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx)));
2480 } else {
2482 }
2483 return;
2484
2488 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2490 return;
2491 }
2492
2493 // If the target doesn't support 64-bit literals, we need to use the
2494 // constant as the high 32 MSBs of a double-precision floating point value.
2495 if (!AsmParser->has64BitLiterals()) {
2496 Val = static_cast<uint64_t>(Val) << 32;
2497 } else {
2498 // Now the target does support 64-bit literals, there are two cases
2499 // where we still want to use src_literal encoding:
2500 // 1) explicitly forced by using lit modifier;
2501 // 2) the value is a valid 32-bit representation (signed or unsigned),
2502 // meanwhile not forced by lit64 modifier.
2503 if (getModifiers().Lit == LitModifier::Lit ||
2504 (getModifiers().Lit != LitModifier::Lit64 &&
2505 (isInt<32>(Val) || isUInt<32>(Val))))
2506 Val = static_cast<uint64_t>(Val) << 32;
2507 }
2508
2509 if (CanUse64BitLiterals && Lo_32(Val) != 0) {
2511 AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx)));
2512 } else {
2514 }
2515 return;
2516
2529 return;
2530
2532 if ((isInt<32>(Val) || isUInt<32>(Val)) &&
2533 getModifiers().Lit != LitModifier::Lit64)
2534 Val <<= 32;
2535
2536 if (CanUse64BitLiterals && Lo_32(Val) != 0) {
2538 AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx)));
2539 } else {
2541 }
2542 return;
2543
2544 default:
2545 llvm_unreachable("invalid operand type");
2546 }
2547}
2548
2549void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2550 MCOpIdx = Inst.getNumOperands();
2551 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2552}
2553
2554bool AMDGPUOperand::isInlineValue() const {
2555 return isRegKind() && ::isInlineValue(getReg());
2556}
2557
2558//===----------------------------------------------------------------------===//
2559// AsmParser
2560//===----------------------------------------------------------------------===//
2561
2562void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2563 // TODO: make those pre-defined variables read-only.
2564 // Currently there is none suitable machinery in the core llvm-mc for this.
2565 // MCSymbol::isRedefinable is intended for another purpose, and
2566 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2567 MCContext &Ctx = getContext();
2568 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2570}
2571
2572static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2573 if (Is == IS_VGPR) {
2574 switch (RegWidth) {
2575 default: return -1;
2576 case 32:
2577 return AMDGPU::VGPR_32RegClassID;
2578 case 64:
2579 return AMDGPU::VReg_64RegClassID;
2580 case 96:
2581 return AMDGPU::VReg_96RegClassID;
2582 case 128:
2583 return AMDGPU::VReg_128RegClassID;
2584 case 160:
2585 return AMDGPU::VReg_160RegClassID;
2586 case 192:
2587 return AMDGPU::VReg_192RegClassID;
2588 case 224:
2589 return AMDGPU::VReg_224RegClassID;
2590 case 256:
2591 return AMDGPU::VReg_256RegClassID;
2592 case 288:
2593 return AMDGPU::VReg_288RegClassID;
2594 case 320:
2595 return AMDGPU::VReg_320RegClassID;
2596 case 352:
2597 return AMDGPU::VReg_352RegClassID;
2598 case 384:
2599 return AMDGPU::VReg_384RegClassID;
2600 case 512:
2601 return AMDGPU::VReg_512RegClassID;
2602 case 1024:
2603 return AMDGPU::VReg_1024RegClassID;
2604 }
2605 } else if (Is == IS_TTMP) {
2606 switch (RegWidth) {
2607 default: return -1;
2608 case 32:
2609 return AMDGPU::TTMP_32RegClassID;
2610 case 64:
2611 return AMDGPU::TTMP_64RegClassID;
2612 case 128:
2613 return AMDGPU::TTMP_128RegClassID;
2614 case 256:
2615 return AMDGPU::TTMP_256RegClassID;
2616 case 512:
2617 return AMDGPU::TTMP_512RegClassID;
2618 }
2619 } else if (Is == IS_SGPR) {
2620 switch (RegWidth) {
2621 default: return -1;
2622 case 32:
2623 return AMDGPU::SGPR_32RegClassID;
2624 case 64:
2625 return AMDGPU::SGPR_64RegClassID;
2626 case 96:
2627 return AMDGPU::SGPR_96RegClassID;
2628 case 128:
2629 return AMDGPU::SGPR_128RegClassID;
2630 case 160:
2631 return AMDGPU::SGPR_160RegClassID;
2632 case 192:
2633 return AMDGPU::SGPR_192RegClassID;
2634 case 224:
2635 return AMDGPU::SGPR_224RegClassID;
2636 case 256:
2637 return AMDGPU::SGPR_256RegClassID;
2638 case 288:
2639 return AMDGPU::SGPR_288RegClassID;
2640 case 320:
2641 return AMDGPU::SGPR_320RegClassID;
2642 case 352:
2643 return AMDGPU::SGPR_352RegClassID;
2644 case 384:
2645 return AMDGPU::SGPR_384RegClassID;
2646 case 512:
2647 return AMDGPU::SGPR_512RegClassID;
2648 }
2649 } else if (Is == IS_AGPR) {
2650 switch (RegWidth) {
2651 default: return -1;
2652 case 32:
2653 return AMDGPU::AGPR_32RegClassID;
2654 case 64:
2655 return AMDGPU::AReg_64RegClassID;
2656 case 96:
2657 return AMDGPU::AReg_96RegClassID;
2658 case 128:
2659 return AMDGPU::AReg_128RegClassID;
2660 case 160:
2661 return AMDGPU::AReg_160RegClassID;
2662 case 192:
2663 return AMDGPU::AReg_192RegClassID;
2664 case 224:
2665 return AMDGPU::AReg_224RegClassID;
2666 case 256:
2667 return AMDGPU::AReg_256RegClassID;
2668 case 288:
2669 return AMDGPU::AReg_288RegClassID;
2670 case 320:
2671 return AMDGPU::AReg_320RegClassID;
2672 case 352:
2673 return AMDGPU::AReg_352RegClassID;
2674 case 384:
2675 return AMDGPU::AReg_384RegClassID;
2676 case 512:
2677 return AMDGPU::AReg_512RegClassID;
2678 case 1024:
2679 return AMDGPU::AReg_1024RegClassID;
2680 }
2681 }
2682 return -1;
2683}
2684
2687 .Case("exec", AMDGPU::EXEC)
2688 .Case("vcc", AMDGPU::VCC)
2689 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2690 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2691 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2692 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2693 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2694 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2695 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2696 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2697 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2698 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2699 .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2700 .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2701 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2702 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2703 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2704 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2705 .Case("m0", AMDGPU::M0)
2706 .Case("vccz", AMDGPU::SRC_VCCZ)
2707 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2708 .Case("execz", AMDGPU::SRC_EXECZ)
2709 .Case("src_execz", AMDGPU::SRC_EXECZ)
2710 .Case("scc", AMDGPU::SRC_SCC)
2711 .Case("src_scc", AMDGPU::SRC_SCC)
2712 .Case("tba", AMDGPU::TBA)
2713 .Case("tma", AMDGPU::TMA)
2714 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2715 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2716 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2717 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2718 .Case("vcc_lo", AMDGPU::VCC_LO)
2719 .Case("vcc_hi", AMDGPU::VCC_HI)
2720 .Case("exec_lo", AMDGPU::EXEC_LO)
2721 .Case("exec_hi", AMDGPU::EXEC_HI)
2722 .Case("tma_lo", AMDGPU::TMA_LO)
2723 .Case("tma_hi", AMDGPU::TMA_HI)
2724 .Case("tba_lo", AMDGPU::TBA_LO)
2725 .Case("tba_hi", AMDGPU::TBA_HI)
2726 .Case("pc", AMDGPU::PC_REG)
2727 .Case("null", AMDGPU::SGPR_NULL)
2728 .Default(AMDGPU::NoRegister);
2729}
2730
2731bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2732 SMLoc &EndLoc, bool RestoreOnFailure) {
2733 auto R = parseRegister();
2734 if (!R) return true;
2735 assert(R->isReg());
2736 RegNo = R->getReg();
2737 StartLoc = R->getStartLoc();
2738 EndLoc = R->getEndLoc();
2739 return false;
2740}
2741
2742bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2743 SMLoc &EndLoc) {
2744 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2745}
2746
2747ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2748 SMLoc &EndLoc) {
2749 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2750 bool PendingErrors = getParser().hasPendingError();
2751 getParser().clearPendingErrors();
2752 if (PendingErrors)
2753 return ParseStatus::Failure;
2754 if (Result)
2755 return ParseStatus::NoMatch;
2756 return ParseStatus::Success;
2757}
2758
2759bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2760 RegisterKind RegKind,
2761 MCRegister Reg1, SMLoc Loc) {
2762 switch (RegKind) {
2763 case IS_SPECIAL:
2764 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2765 Reg = AMDGPU::EXEC;
2766 RegWidth = 64;
2767 return true;
2768 }
2769 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2770 Reg = AMDGPU::FLAT_SCR;
2771 RegWidth = 64;
2772 return true;
2773 }
2774 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2775 Reg = AMDGPU::XNACK_MASK;
2776 RegWidth = 64;
2777 return true;
2778 }
2779 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2780 Reg = AMDGPU::VCC;
2781 RegWidth = 64;
2782 return true;
2783 }
2784 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2785 Reg = AMDGPU::TBA;
2786 RegWidth = 64;
2787 return true;
2788 }
2789 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2790 Reg = AMDGPU::TMA;
2791 RegWidth = 64;
2792 return true;
2793 }
2794 Error(Loc, "register does not fit in the list");
2795 return false;
2796 case IS_VGPR:
2797 case IS_SGPR:
2798 case IS_AGPR:
2799 case IS_TTMP:
2800 if (Reg1 != Reg + RegWidth / 32) {
2801 Error(Loc, "registers in a list must have consecutive indices");
2802 return false;
2803 }
2804 RegWidth += 32;
2805 return true;
2806 default:
2807 llvm_unreachable("unexpected register kind");
2808 }
2809}
2810
2811struct RegInfo {
2813 RegisterKind Kind;
2814};
2815
2816static constexpr RegInfo RegularRegisters[] = {
2817 {{"v"}, IS_VGPR},
2818 {{"s"}, IS_SGPR},
2819 {{"ttmp"}, IS_TTMP},
2820 {{"acc"}, IS_AGPR},
2821 {{"a"}, IS_AGPR},
2822};
2823
2824static bool isRegularReg(RegisterKind Kind) {
2825 return Kind == IS_VGPR ||
2826 Kind == IS_SGPR ||
2827 Kind == IS_TTMP ||
2828 Kind == IS_AGPR;
2829}
2830
2832 for (const RegInfo &Reg : RegularRegisters)
2833 if (Str.starts_with(Reg.Name))
2834 return &Reg;
2835 return nullptr;
2836}
2837
2838static bool getRegNum(StringRef Str, unsigned& Num) {
2839 return !Str.getAsInteger(10, Num);
2840}
2841
2842bool
2843AMDGPUAsmParser::isRegister(const AsmToken &Token,
2844 const AsmToken &NextToken) const {
2845
2846 // A list of consecutive registers: [s0,s1,s2,s3]
2847 if (Token.is(AsmToken::LBrac))
2848 return true;
2849
2850 if (!Token.is(AsmToken::Identifier))
2851 return false;
2852
2853 // A single register like s0 or a range of registers like s[0:1]
2854
2855 StringRef Str = Token.getString();
2856 const RegInfo *Reg = getRegularRegInfo(Str);
2857 if (Reg) {
2858 StringRef RegName = Reg->Name;
2859 StringRef RegSuffix = Str.substr(RegName.size());
2860 if (!RegSuffix.empty()) {
2861 RegSuffix.consume_back(".l");
2862 RegSuffix.consume_back(".h");
2863 unsigned Num;
2864 // A single register with an index: rXX
2865 if (getRegNum(RegSuffix, Num))
2866 return true;
2867 } else {
2868 // A range of registers: r[XX:YY].
2869 if (NextToken.is(AsmToken::LBrac))
2870 return true;
2871 }
2872 }
2873
2874 return getSpecialRegForName(Str).isValid();
2875}
2876
2877bool
2878AMDGPUAsmParser::isRegister()
2879{
2880 return isRegister(getToken(), peekToken());
2881}
2882
2883MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2884 unsigned SubReg, unsigned RegWidth,
2885 SMLoc Loc) {
2886 assert(isRegularReg(RegKind));
2887
2888 unsigned AlignSize = 1;
2889 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2890 // SGPR and TTMP registers must be aligned.
2891 // Max required alignment is 4 dwords.
2892 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2893 }
2894
2895 if (RegNum % AlignSize != 0) {
2896 Error(Loc, "invalid register alignment");
2897 return MCRegister();
2898 }
2899
2900 unsigned RegIdx = RegNum / AlignSize;
2901 int RCID = getRegClass(RegKind, RegWidth);
2902 if (RCID == -1) {
2903 Error(Loc, "invalid or unsupported register size");
2904 return MCRegister();
2905 }
2906
2907 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2908 const MCRegisterClass RC = TRI->getRegClass(RCID);
2909 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2910 Error(Loc, "register index is out of range");
2911 return AMDGPU::NoRegister;
2912 }
2913
2914 if (RegKind == IS_VGPR && !isGFX1250() && RegIdx + RegWidth / 32 > 256) {
2915 Error(Loc, "register index is out of range");
2916 return MCRegister();
2917 }
2918
2919 MCRegister Reg = RC.getRegister(RegIdx);
2920
2921 if (SubReg) {
2922 Reg = TRI->getSubReg(Reg, SubReg);
2923
2924 // Currently all regular registers have their .l and .h subregisters, so
2925 // we should never need to generate an error here.
2926 assert(Reg && "Invalid subregister!");
2927 }
2928
2929 return Reg;
2930}
2931
2932bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
2933 unsigned &SubReg) {
2934 int64_t RegLo, RegHi;
2935 if (!skipToken(AsmToken::LBrac, "missing register index"))
2936 return false;
2937
2938 SMLoc FirstIdxLoc = getLoc();
2939 SMLoc SecondIdxLoc;
2940
2941 if (!parseExpr(RegLo))
2942 return false;
2943
2944 if (trySkipToken(AsmToken::Colon)) {
2945 SecondIdxLoc = getLoc();
2946 if (!parseExpr(RegHi))
2947 return false;
2948 } else {
2949 RegHi = RegLo;
2950 }
2951
2952 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2953 return false;
2954
2955 if (!isUInt<32>(RegLo)) {
2956 Error(FirstIdxLoc, "invalid register index");
2957 return false;
2958 }
2959
2960 if (!isUInt<32>(RegHi)) {
2961 Error(SecondIdxLoc, "invalid register index");
2962 return false;
2963 }
2964
2965 if (RegLo > RegHi) {
2966 Error(FirstIdxLoc, "first register index should not exceed second index");
2967 return false;
2968 }
2969
2970 if (RegHi == RegLo) {
2971 StringRef RegSuffix = getTokenStr();
2972 if (RegSuffix == ".l") {
2973 SubReg = AMDGPU::lo16;
2974 lex();
2975 } else if (RegSuffix == ".h") {
2976 SubReg = AMDGPU::hi16;
2977 lex();
2978 }
2979 }
2980
2981 Num = static_cast<unsigned>(RegLo);
2982 RegWidth = 32 * ((RegHi - RegLo) + 1);
2983
2984 return true;
2985}
2986
2987MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2988 unsigned &RegNum,
2989 unsigned &RegWidth,
2990 SmallVectorImpl<AsmToken> &Tokens) {
2991 assert(isToken(AsmToken::Identifier));
2992 MCRegister Reg = getSpecialRegForName(getTokenStr());
2993 if (Reg) {
2994 RegNum = 0;
2995 RegWidth = 32;
2996 RegKind = IS_SPECIAL;
2997 Tokens.push_back(getToken());
2998 lex(); // skip register name
2999 }
3000 return Reg;
3001}
3002
3003MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3004 unsigned &RegNum,
3005 unsigned &RegWidth,
3006 SmallVectorImpl<AsmToken> &Tokens) {
3007 assert(isToken(AsmToken::Identifier));
3008 StringRef RegName = getTokenStr();
3009 auto Loc = getLoc();
3010
3011 const RegInfo *RI = getRegularRegInfo(RegName);
3012 if (!RI) {
3013 Error(Loc, "invalid register name");
3014 return MCRegister();
3015 }
3016
3017 Tokens.push_back(getToken());
3018 lex(); // skip register name
3019
3020 RegKind = RI->Kind;
3021 StringRef RegSuffix = RegName.substr(RI->Name.size());
3022 unsigned SubReg = NoSubRegister;
3023 if (!RegSuffix.empty()) {
3024 if (RegSuffix.consume_back(".l"))
3025 SubReg = AMDGPU::lo16;
3026 else if (RegSuffix.consume_back(".h"))
3027 SubReg = AMDGPU::hi16;
3028
3029 // Single 32-bit register: vXX.
3030 if (!getRegNum(RegSuffix, RegNum)) {
3031 Error(Loc, "invalid register index");
3032 return MCRegister();
3033 }
3034 RegWidth = 32;
3035 } else {
3036 // Range of registers: v[XX:YY]. ":YY" is optional.
3037 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3038 return MCRegister();
3039 }
3040
3041 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3042}
3043
3044MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3045 unsigned &RegNum, unsigned &RegWidth,
3046 SmallVectorImpl<AsmToken> &Tokens) {
3047 MCRegister Reg;
3048 auto ListLoc = getLoc();
3049
3050 if (!skipToken(AsmToken::LBrac,
3051 "expected a register or a list of registers")) {
3052 return MCRegister();
3053 }
3054
3055 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3056
3057 auto Loc = getLoc();
3058 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3059 return MCRegister();
3060 if (RegWidth != 32) {
3061 Error(Loc, "expected a single 32-bit register");
3062 return MCRegister();
3063 }
3064
3065 for (; trySkipToken(AsmToken::Comma); ) {
3066 RegisterKind NextRegKind;
3067 MCRegister NextReg;
3068 unsigned NextRegNum, NextRegWidth;
3069 Loc = getLoc();
3070
3071 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3072 NextRegNum, NextRegWidth,
3073 Tokens)) {
3074 return MCRegister();
3075 }
3076 if (NextRegWidth != 32) {
3077 Error(Loc, "expected a single 32-bit register");
3078 return MCRegister();
3079 }
3080 if (NextRegKind != RegKind) {
3081 Error(Loc, "registers in a list must be of the same kind");
3082 return MCRegister();
3083 }
3084 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3085 return MCRegister();
3086 }
3087
3088 if (!skipToken(AsmToken::RBrac,
3089 "expected a comma or a closing square bracket")) {
3090 return MCRegister();
3091 }
3092
3093 if (isRegularReg(RegKind))
3094 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3095
3096 return Reg;
3097}
3098
3099bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3100 MCRegister &Reg, unsigned &RegNum,
3101 unsigned &RegWidth,
3102 SmallVectorImpl<AsmToken> &Tokens) {
3103 auto Loc = getLoc();
3104 Reg = MCRegister();
3105
3106 if (isToken(AsmToken::Identifier)) {
3107 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3108 if (!Reg)
3109 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3110 } else {
3111 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3112 }
3113
3114 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3115 if (!Reg) {
3116 assert(Parser.hasPendingError());
3117 return false;
3118 }
3119
3120 if (!subtargetHasRegister(*TRI, Reg)) {
3121 if (Reg == AMDGPU::SGPR_NULL) {
3122 Error(Loc, "'null' operand is not supported on this GPU");
3123 } else {
3125 " register not available on this GPU");
3126 }
3127 return false;
3128 }
3129
3130 return true;
3131}
3132
3133bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3134 MCRegister &Reg, unsigned &RegNum,
3135 unsigned &RegWidth,
3136 bool RestoreOnFailure /*=false*/) {
3137 Reg = MCRegister();
3138
3140 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3141 if (RestoreOnFailure) {
3142 while (!Tokens.empty()) {
3143 getLexer().UnLex(Tokens.pop_back_val());
3144 }
3145 }
3146 return true;
3147 }
3148 return false;
3149}
3150
3151std::optional<StringRef>
3152AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3153 switch (RegKind) {
3154 case IS_VGPR:
3155 return StringRef(".amdgcn.next_free_vgpr");
3156 case IS_SGPR:
3157 return StringRef(".amdgcn.next_free_sgpr");
3158 default:
3159 return std::nullopt;
3160 }
3161}
3162
3163void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3164 auto SymbolName = getGprCountSymbolName(RegKind);
3165 assert(SymbolName && "initializing invalid register kind");
3166 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3168 Sym->setRedefinable(true);
3169}
3170
3171bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3172 unsigned DwordRegIndex,
3173 unsigned RegWidth) {
3174 // Symbols are only defined for GCN targets
3175 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3176 return true;
3177
3178 auto SymbolName = getGprCountSymbolName(RegKind);
3179 if (!SymbolName)
3180 return true;
3181 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3182
3183 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3184 int64_t OldCount;
3185
3186 if (!Sym->isVariable())
3187 return !Error(getLoc(),
3188 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3189 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3190 return !Error(
3191 getLoc(),
3192 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3193
3194 if (OldCount <= NewMax)
3196
3197 return true;
3198}
3199
3200std::unique_ptr<AMDGPUOperand>
3201AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3202 const auto &Tok = getToken();
3203 SMLoc StartLoc = Tok.getLoc();
3204 SMLoc EndLoc = Tok.getEndLoc();
3205 RegisterKind RegKind;
3206 MCRegister Reg;
3207 unsigned RegNum, RegWidth;
3208
3209 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3210 return nullptr;
3211 }
3212 if (isHsaAbi(getSTI())) {
3213 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3214 return nullptr;
3215 } else
3216 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3217 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3218}
3219
3220ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3221 bool HasSP3AbsModifier, LitModifier Lit) {
3222 // TODO: add syntactic sugar for 1/(2*PI)
3223
3224 if (isRegister() || isModifier())
3225 return ParseStatus::NoMatch;
3226
3227 if (Lit == LitModifier::None) {
3228 if (trySkipId("lit"))
3229 Lit = LitModifier::Lit;
3230 else if (trySkipId("lit64"))
3231 Lit = LitModifier::Lit64;
3232
3233 if (Lit != LitModifier::None) {
3234 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3235 return ParseStatus::Failure;
3236 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, Lit);
3237 if (S.isSuccess() &&
3238 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3239 return ParseStatus::Failure;
3240 return S;
3241 }
3242 }
3243
3244 const auto& Tok = getToken();
3245 const auto& NextTok = peekToken();
3246 bool IsReal = Tok.is(AsmToken::Real);
3247 SMLoc S = getLoc();
3248 bool Negate = false;
3249
3250 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3251 lex();
3252 IsReal = true;
3253 Negate = true;
3254 }
3255
3256 AMDGPUOperand::Modifiers Mods;
3257 Mods.Lit = Lit;
3258
3259 if (IsReal) {
3260 // Floating-point expressions are not supported.
3261 // Can only allow floating-point literals with an
3262 // optional sign.
3263
3264 StringRef Num = getTokenStr();
3265 lex();
3266
3267 APFloat RealVal(APFloat::IEEEdouble());
3268 auto roundMode = APFloat::rmNearestTiesToEven;
3269 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3270 return ParseStatus::Failure;
3271 if (Negate)
3272 RealVal.changeSign();
3273
3274 Operands.push_back(
3275 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3276 AMDGPUOperand::ImmTyNone, true));
3277 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3278 Op.setModifiers(Mods);
3279
3280 return ParseStatus::Success;
3281
3282 } else {
3283 int64_t IntVal;
3284 const MCExpr *Expr;
3285 SMLoc S = getLoc();
3286
3287 if (HasSP3AbsModifier) {
3288 // This is a workaround for handling expressions
3289 // as arguments of SP3 'abs' modifier, for example:
3290 // |1.0|
3291 // |-1|
3292 // |1+x|
3293 // This syntax is not compatible with syntax of standard
3294 // MC expressions (due to the trailing '|').
3295 SMLoc EndLoc;
3296 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3297 return ParseStatus::Failure;
3298 } else {
3299 if (Parser.parseExpression(Expr))
3300 return ParseStatus::Failure;
3301 }
3302
3303 if (Expr->evaluateAsAbsolute(IntVal)) {
3304 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3305 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3306 Op.setModifiers(Mods);
3307 } else {
3308 if (Lit != LitModifier::None)
3309 return ParseStatus::NoMatch;
3310 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3311 }
3312
3313 return ParseStatus::Success;
3314 }
3315
3316 return ParseStatus::NoMatch;
3317}
3318
3319ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3320 if (!isRegister())
3321 return ParseStatus::NoMatch;
3322
3323 if (auto R = parseRegister()) {
3324 assert(R->isReg());
3325 Operands.push_back(std::move(R));
3326 return ParseStatus::Success;
3327 }
3328 return ParseStatus::Failure;
3329}
3330
3331ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3332 bool HasSP3AbsMod, LitModifier Lit) {
3333 ParseStatus Res = parseReg(Operands);
3334 if (!Res.isNoMatch())
3335 return Res;
3336 if (isModifier())
3337 return ParseStatus::NoMatch;
3338 return parseImm(Operands, HasSP3AbsMod, Lit);
3339}
3340
3341bool
3342AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3343 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3344 const auto &str = Token.getString();
3345 return str == "abs" || str == "neg" || str == "sext";
3346 }
3347 return false;
3348}
3349
3350bool
3351AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3352 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3353}
3354
3355bool
3356AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3357 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3358}
3359
3360bool
3361AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3362 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3363}
3364
3365// Check if this is an operand modifier or an opcode modifier
3366// which may look like an expression but it is not. We should
3367// avoid parsing these modifiers as expressions. Currently
3368// recognized sequences are:
3369// |...|
3370// abs(...)
3371// neg(...)
3372// sext(...)
3373// -reg
3374// -|...|
3375// -abs(...)
3376// name:...
3377//
3378bool
3379AMDGPUAsmParser::isModifier() {
3380
3381 AsmToken Tok = getToken();
3382 AsmToken NextToken[2];
3383 peekTokens(NextToken);
3384
3385 return isOperandModifier(Tok, NextToken[0]) ||
3386 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3387 isOpcodeModifierWithVal(Tok, NextToken[0]);
3388}
3389
3390// Check if the current token is an SP3 'neg' modifier.
3391// Currently this modifier is allowed in the following context:
3392//
3393// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3394// 2. Before an 'abs' modifier: -abs(...)
3395// 3. Before an SP3 'abs' modifier: -|...|
3396//
3397// In all other cases "-" is handled as a part
3398// of an expression that follows the sign.
3399//
3400// Note: When "-" is followed by an integer literal,
3401// this is interpreted as integer negation rather
3402// than a floating-point NEG modifier applied to N.
3403// Beside being contr-intuitive, such use of floating-point
3404// NEG modifier would have resulted in different meaning
3405// of integer literals used with VOP1/2/C and VOP3,
3406// for example:
3407// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3408// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3409// Negative fp literals with preceding "-" are
3410// handled likewise for uniformity
3411//
3412bool
3413AMDGPUAsmParser::parseSP3NegModifier() {
3414
3415 AsmToken NextToken[2];
3416 peekTokens(NextToken);
3417
3418 if (isToken(AsmToken::Minus) &&
3419 (isRegister(NextToken[0], NextToken[1]) ||
3420 NextToken[0].is(AsmToken::Pipe) ||
3421 isId(NextToken[0], "abs"))) {
3422 lex();
3423 return true;
3424 }
3425
3426 return false;
3427}
3428
3429ParseStatus
3430AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3431 bool AllowImm) {
3432 bool Neg, SP3Neg;
3433 bool Abs, SP3Abs;
3434 SMLoc Loc;
3435
3436 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3437 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3438 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3439
3440 SP3Neg = parseSP3NegModifier();
3441
3442 Loc = getLoc();
3443 Neg = trySkipId("neg");
3444 if (Neg && SP3Neg)
3445 return Error(Loc, "expected register or immediate");
3446 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3447 return ParseStatus::Failure;
3448
3449 Abs = trySkipId("abs");
3450 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3451 return ParseStatus::Failure;
3452
3453 LitModifier Lit = LitModifier::None;
3454 if (trySkipId("lit")) {
3455 Lit = LitModifier::Lit;
3456 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3457 return ParseStatus::Failure;
3458 } else if (trySkipId("lit64")) {
3459 Lit = LitModifier::Lit64;
3460 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3461 return ParseStatus::Failure;
3462 if (!has64BitLiterals())
3463 return Error(Loc, "lit64 is not supported on this GPU");
3464 }
3465
3466 Loc = getLoc();
3467 SP3Abs = trySkipToken(AsmToken::Pipe);
3468 if (Abs && SP3Abs)
3469 return Error(Loc, "expected register or immediate");
3470
3471 ParseStatus Res;
3472 if (AllowImm) {
3473 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3474 } else {
3475 Res = parseReg(Operands);
3476 }
3477 if (!Res.isSuccess())
3478 return (SP3Neg || Neg || SP3Abs || Abs || Lit != LitModifier::None)
3480 : Res;
3481
3482 if (Lit != LitModifier::None && !Operands.back()->isImm())
3483 Error(Loc, "expected immediate with lit modifier");
3484
3485 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3486 return ParseStatus::Failure;
3487 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3488 return ParseStatus::Failure;
3489 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3490 return ParseStatus::Failure;
3491 if (Lit != LitModifier::None &&
3492 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3493 return ParseStatus::Failure;
3494
3495 AMDGPUOperand::Modifiers Mods;
3496 Mods.Abs = Abs || SP3Abs;
3497 Mods.Neg = Neg || SP3Neg;
3498 Mods.Lit = Lit;
3499
3500 if (Mods.hasFPModifiers() || Lit != LitModifier::None) {
3501 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3502 if (Op.isExpr())
3503 return Error(Op.getStartLoc(), "expected an absolute expression");
3504 Op.setModifiers(Mods);
3505 }
3506 return ParseStatus::Success;
3507}
3508
3509ParseStatus
3510AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3511 bool AllowImm) {
3512 bool Sext = trySkipId("sext");
3513 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3514 return ParseStatus::Failure;
3515
3516 ParseStatus Res;
3517 if (AllowImm) {
3518 Res = parseRegOrImm(Operands);
3519 } else {
3520 Res = parseReg(Operands);
3521 }
3522 if (!Res.isSuccess())
3523 return Sext ? ParseStatus::Failure : Res;
3524
3525 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3526 return ParseStatus::Failure;
3527
3528 AMDGPUOperand::Modifiers Mods;
3529 Mods.Sext = Sext;
3530
3531 if (Mods.hasIntModifiers()) {
3532 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3533 if (Op.isExpr())
3534 return Error(Op.getStartLoc(), "expected an absolute expression");
3535 Op.setModifiers(Mods);
3536 }
3537
3538 return ParseStatus::Success;
3539}
3540
3541ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3542 return parseRegOrImmWithFPInputMods(Operands, false);
3543}
3544
3545ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3546 return parseRegOrImmWithIntInputMods(Operands, false);
3547}
3548
3549ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3550 auto Loc = getLoc();
3551 if (trySkipId("off")) {
3552 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3553 AMDGPUOperand::ImmTyOff, false));
3554 return ParseStatus::Success;
3555 }
3556
3557 if (!isRegister())
3558 return ParseStatus::NoMatch;
3559
3560 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3561 if (Reg) {
3562 Operands.push_back(std::move(Reg));
3563 return ParseStatus::Success;
3564 }
3565
3566 return ParseStatus::Failure;
3567}
3568
3569unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3570 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3571
3572 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3573 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3574 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3575 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3576 return Match_InvalidOperand;
3577
3578 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3579 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3580 // v_mac_f32/16 allow only dst_sel == DWORD;
3581 auto OpNum =
3582 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3583 const auto &Op = Inst.getOperand(OpNum);
3584 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3585 return Match_InvalidOperand;
3586 }
3587 }
3588
3589 // Asm can first try to match VOPD or VOPD3. By failing early here with
3590 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3591 // Checking later during validateInstruction does not give a chance to retry
3592 // parsing as a different encoding.
3593 if (tryAnotherVOPDEncoding(Inst))
3594 return Match_InvalidOperand;
3595
3596 return Match_Success;
3597}
3598
3608
3609// What asm variants we should check
3610ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3611 if (isForcedDPP() && isForcedVOP3()) {
3612 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3613 return ArrayRef(Variants);
3614 }
3615 if (getForcedEncodingSize() == 32) {
3616 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3617 return ArrayRef(Variants);
3618 }
3619
3620 if (isForcedVOP3()) {
3621 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3622 return ArrayRef(Variants);
3623 }
3624
3625 if (isForcedSDWA()) {
3626 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3628 return ArrayRef(Variants);
3629 }
3630
3631 if (isForcedDPP()) {
3632 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3633 return ArrayRef(Variants);
3634 }
3635
3636 return getAllVariants();
3637}
3638
3639StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3640 if (isForcedDPP() && isForcedVOP3())
3641 return "e64_dpp";
3642
3643 if (getForcedEncodingSize() == 32)
3644 return "e32";
3645
3646 if (isForcedVOP3())
3647 return "e64";
3648
3649 if (isForcedSDWA())
3650 return "sdwa";
3651
3652 if (isForcedDPP())
3653 return "dpp";
3654
3655 return "";
3656}
3657
3658unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3659 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3660 for (MCPhysReg Reg : Desc.implicit_uses()) {
3661 switch (Reg) {
3662 case AMDGPU::FLAT_SCR:
3663 case AMDGPU::VCC:
3664 case AMDGPU::VCC_LO:
3665 case AMDGPU::VCC_HI:
3666 case AMDGPU::M0:
3667 return Reg;
3668 default:
3669 break;
3670 }
3671 }
3672 return AMDGPU::NoRegister;
3673}
3674
3675// NB: This code is correct only when used to check constant
3676// bus limitations because GFX7 support no f16 inline constants.
3677// Note that there are no cases when a GFX7 opcode violates
3678// constant bus limitations due to the use of an f16 constant.
3679bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3680 unsigned OpIdx) const {
3681 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3682
3685 return false;
3686 }
3687
3688 const MCOperand &MO = Inst.getOperand(OpIdx);
3689
3690 int64_t Val = MO.isImm() ? MO.getImm() : getLitValue(MO.getExpr());
3691 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3692
3693 switch (OpSize) { // expected operand size
3694 case 8:
3695 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3696 case 4:
3697 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3698 case 2: {
3699 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3702 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3703
3707
3711
3715
3718 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3719
3722 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3723
3725 return false;
3726
3727 llvm_unreachable("invalid operand type");
3728 }
3729 default:
3730 llvm_unreachable("invalid operand size");
3731 }
3732}
3733
3734unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3735 if (!isGFX10Plus())
3736 return 1;
3737
3738 switch (Opcode) {
3739 // 64-bit shift instructions can use only one scalar value input
3740 case AMDGPU::V_LSHLREV_B64_e64:
3741 case AMDGPU::V_LSHLREV_B64_gfx10:
3742 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3743 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3744 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3745 case AMDGPU::V_LSHRREV_B64_e64:
3746 case AMDGPU::V_LSHRREV_B64_gfx10:
3747 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3748 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3749 case AMDGPU::V_ASHRREV_I64_e64:
3750 case AMDGPU::V_ASHRREV_I64_gfx10:
3751 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3752 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3753 case AMDGPU::V_LSHL_B64_e64:
3754 case AMDGPU::V_LSHR_B64_e64:
3755 case AMDGPU::V_ASHR_I64_e64:
3756 return 1;
3757 default:
3758 return 2;
3759 }
3760}
3761
3762constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3764
3765// Get regular operand indices in the same order as specified
3766// in the instruction (but append mandatory literals to the end).
3768 bool AddMandatoryLiterals = false) {
3769
3770 int16_t ImmIdx =
3771 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3772
3773 if (isVOPD(Opcode)) {
3774 int16_t ImmXIdx =
3775 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3776
3777 return {getNamedOperandIdx(Opcode, OpName::src0X),
3778 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3779 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3780 getNamedOperandIdx(Opcode, OpName::src0Y),
3781 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3782 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3783 ImmXIdx,
3784 ImmIdx};
3785 }
3786
3787 return {getNamedOperandIdx(Opcode, OpName::src0),
3788 getNamedOperandIdx(Opcode, OpName::src1),
3789 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3790}
3791
3792bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3793 const MCOperand &MO = Inst.getOperand(OpIdx);
3794 if (MO.isImm())
3795 return !isInlineConstant(Inst, OpIdx);
3796 if (MO.isReg()) {
3797 auto Reg = MO.getReg();
3798 if (!Reg)
3799 return false;
3800 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3801 auto PReg = mc2PseudoReg(Reg);
3802 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3803 }
3804 return true;
3805}
3806
3807// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3808// Writelane is special in that it can use SGPR and M0 (which would normally
3809// count as using the constant bus twice - but in this case it is allowed since
3810// the lane selector doesn't count as a use of the constant bus). However, it is
3811// still required to abide by the 1 SGPR rule.
3812static bool checkWriteLane(const MCInst &Inst) {
3813 const unsigned Opcode = Inst.getOpcode();
3814 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3815 return false;
3816 const MCOperand &LaneSelOp = Inst.getOperand(2);
3817 if (!LaneSelOp.isReg())
3818 return false;
3819 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3820 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3821}
3822
3823bool AMDGPUAsmParser::validateConstantBusLimitations(
3824 const MCInst &Inst, const OperandVector &Operands) {
3825 const unsigned Opcode = Inst.getOpcode();
3826 const MCInstrDesc &Desc = MII.get(Opcode);
3827 MCRegister LastSGPR;
3828 unsigned ConstantBusUseCount = 0;
3829 unsigned NumLiterals = 0;
3830 unsigned LiteralSize;
3831
3832 if (!(Desc.TSFlags &
3835 !isVOPD(Opcode))
3836 return true;
3837
3838 if (checkWriteLane(Inst))
3839 return true;
3840
3841 // Check special imm operands (used by madmk, etc)
3842 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3843 ++NumLiterals;
3844 LiteralSize = 4;
3845 }
3846
3847 SmallDenseSet<unsigned> SGPRsUsed;
3848 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3849 if (SGPRUsed != AMDGPU::NoRegister) {
3850 SGPRsUsed.insert(SGPRUsed);
3851 ++ConstantBusUseCount;
3852 }
3853
3854 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3855
3856 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3857
3858 for (int OpIdx : OpIndices) {
3859 if (OpIdx == -1)
3860 continue;
3861
3862 const MCOperand &MO = Inst.getOperand(OpIdx);
3863 if (usesConstantBus(Inst, OpIdx)) {
3864 if (MO.isReg()) {
3865 LastSGPR = mc2PseudoReg(MO.getReg());
3866 // Pairs of registers with a partial intersections like these
3867 // s0, s[0:1]
3868 // flat_scratch_lo, flat_scratch
3869 // flat_scratch_lo, flat_scratch_hi
3870 // are theoretically valid but they are disabled anyway.
3871 // Note that this code mimics SIInstrInfo::verifyInstruction
3872 if (SGPRsUsed.insert(LastSGPR).second) {
3873 ++ConstantBusUseCount;
3874 }
3875 } else { // Expression or a literal
3876
3877 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3878 continue; // special operand like VINTERP attr_chan
3879
3880 // An instruction may use only one literal.
3881 // This has been validated on the previous step.
3882 // See validateVOPLiteral.
3883 // This literal may be used as more than one operand.
3884 // If all these operands are of the same size,
3885 // this literal counts as one scalar value.
3886 // Otherwise it counts as 2 scalar values.
3887 // See "GFX10 Shader Programming", section 3.6.2.3.
3888
3890 if (Size < 4)
3891 Size = 4;
3892
3893 if (NumLiterals == 0) {
3894 NumLiterals = 1;
3895 LiteralSize = Size;
3896 } else if (LiteralSize != Size) {
3897 NumLiterals = 2;
3898 }
3899 }
3900 }
3901
3902 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3903 Error(getOperandLoc(Operands, OpIdx),
3904 "invalid operand (violates constant bus restrictions)");
3905 return false;
3906 }
3907 }
3908 return true;
3909}
3910
3911std::optional<unsigned>
3912AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
3913
3914 const unsigned Opcode = Inst.getOpcode();
3915 if (!isVOPD(Opcode))
3916 return {};
3917
3918 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3919
3920 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3921 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3922 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3923 ? Opr.getReg()
3924 : MCRegister();
3925 };
3926
3927 // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3928 // source-cache.
3929 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3930 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3931 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
3932 bool AllowSameVGPR = isGFX1250();
3933
3934 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3935 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3936 int I = getNamedOperandIdx(Opcode, OpName);
3937 const MCOperand &Op = Inst.getOperand(I);
3938 if (!Op.isImm())
3939 continue;
3940 int64_t Imm = Op.getImm();
3941 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
3942 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
3943 return (unsigned)I;
3944 }
3945
3946 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3947 OpName::vsrc2Y, OpName::imm}) {
3948 int I = getNamedOperandIdx(Opcode, OpName);
3949 if (I == -1)
3950 continue;
3951 const MCOperand &Op = Inst.getOperand(I);
3952 if (Op.isImm())
3953 return (unsigned)I;
3954 }
3955 }
3956
3957 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3958 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
3959 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
3960
3961 return InvalidCompOprIdx;
3962}
3963
3964bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
3965 const OperandVector &Operands) {
3966
3967 unsigned Opcode = Inst.getOpcode();
3968 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
3969
3970 if (AsVOPD3) {
3971 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
3972 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
3973 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
3974 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
3975 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
3976 }
3977 }
3978
3979 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
3980 if (!InvalidCompOprIdx.has_value())
3981 return true;
3982
3983 auto CompOprIdx = *InvalidCompOprIdx;
3984 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3985 auto ParsedIdx =
3986 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3987 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3988 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3989
3990 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3991 if (CompOprIdx == VOPD::Component::DST) {
3992 if (AsVOPD3)
3993 Error(Loc, "dst registers must be distinct");
3994 else
3995 Error(Loc, "one dst register must be even and the other odd");
3996 } else {
3997 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3998 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3999 " operands must use different VGPR banks");
4000 }
4001
4002 return false;
4003}
4004
4005// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4006// potentially used as VOPD3 with the same operands.
4007bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4008 // First check if it fits VOPD
4009 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4010 if (!InvalidCompOprIdx.has_value())
4011 return false;
4012
4013 // Then if it fits VOPD3
4014 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4015 if (InvalidCompOprIdx.has_value()) {
4016 // If failed operand is dst it is better to show error about VOPD3
4017 // instruction as it has more capabilities and error message will be
4018 // more informative. If the dst is not legal for VOPD3, then it is not
4019 // legal for VOPD either.
4020 if (*InvalidCompOprIdx == VOPD::Component::DST)
4021 return true;
4022
4023 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4024 // with a conflict in tied implicit src2 of fmac and no asm operand to
4025 // to point to.
4026 return false;
4027 }
4028 return true;
4029}
4030
4031// \returns true is a VOPD3 instruction can be also represented as a shorter
4032// VOPD encoding.
4033bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4034 const unsigned Opcode = Inst.getOpcode();
4035 const auto &II = getVOPDInstInfo(Opcode, &MII);
4036 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4037 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4038 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4039 return false;
4040
4041 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4042 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4043 // be parsed as VOPD which does not accept src2.
4044 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4045 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4046 return false;
4047
4048 // If any modifiers are set this cannot be VOPD.
4049 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4050 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4051 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4052 int I = getNamedOperandIdx(Opcode, OpName);
4053 if (I == -1)
4054 continue;
4055 if (Inst.getOperand(I).getImm())
4056 return false;
4057 }
4058
4059 return !tryVOPD3(Inst);
4060}
4061
4062// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4063// form but switch to VOPD3 otherwise.
4064bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4065 const unsigned Opcode = Inst.getOpcode();
4066 if (!isGFX1250() || !isVOPD(Opcode))
4067 return false;
4068
4069 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4070 return tryVOPD(Inst);
4071 return tryVOPD3(Inst);
4072}
4073
4074bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4075
4076 const unsigned Opc = Inst.getOpcode();
4077 const MCInstrDesc &Desc = MII.get(Opc);
4078
4079 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4080 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4081 assert(ClampIdx != -1);
4082 return Inst.getOperand(ClampIdx).getImm() == 0;
4083 }
4084
4085 return true;
4086}
4087
4090
4091bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) {
4092
4093 const unsigned Opc = Inst.getOpcode();
4094 const MCInstrDesc &Desc = MII.get(Opc);
4095
4096 if ((Desc.TSFlags & MIMGFlags) == 0)
4097 return true;
4098
4099 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4100 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4101 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4102
4103 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4104 return true;
4105
4106 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4107 return true;
4108
4109 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
4110 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4111 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4112 if (DMask == 0)
4113 DMask = 1;
4114
4115 bool IsPackedD16 = false;
4116 unsigned DataSize =
4117 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4118 if (hasPackedD16()) {
4119 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4120 IsPackedD16 = D16Idx >= 0;
4121 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4122 DataSize = (DataSize + 1) / 2;
4123 }
4124
4125 if ((VDataSize / 4) == DataSize + TFESize)
4126 return true;
4127
4128 StringRef Modifiers;
4129 if (isGFX90A())
4130 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4131 else
4132 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4133
4134 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4135 return false;
4136}
4137
4138bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
4139 const unsigned Opc = Inst.getOpcode();
4140 const MCInstrDesc &Desc = MII.get(Opc);
4141
4142 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4143 return true;
4144
4145 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4146
4147 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4149 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4150 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4151 ? AMDGPU::OpName::srsrc
4152 : AMDGPU::OpName::rsrc;
4153 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4154 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4155 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4156
4157 assert(VAddr0Idx != -1);
4158 assert(SrsrcIdx != -1);
4159 assert(SrsrcIdx > VAddr0Idx);
4160
4161 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4162 if (BaseOpcode->BVH) {
4163 if (IsA16 == BaseOpcode->A16)
4164 return true;
4165 Error(IDLoc, "image address size does not match a16");
4166 return false;
4167 }
4168
4169 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4170 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4171 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4172 unsigned ActualAddrSize =
4173 IsNSA ? SrsrcIdx - VAddr0Idx
4174 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
4175
4176 unsigned ExpectedAddrSize =
4177 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4178
4179 if (IsNSA) {
4180 if (hasPartialNSAEncoding() &&
4181 ExpectedAddrSize >
4183 int VAddrLastIdx = SrsrcIdx - 1;
4184 unsigned VAddrLastSize =
4185 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
4186
4187 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4188 }
4189 } else {
4190 if (ExpectedAddrSize > 12)
4191 ExpectedAddrSize = 16;
4192
4193 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4194 // This provides backward compatibility for assembly created
4195 // before 160b/192b/224b types were directly supported.
4196 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4197 return true;
4198 }
4199
4200 if (ActualAddrSize == ExpectedAddrSize)
4201 return true;
4202
4203 Error(IDLoc, "image address size does not match dim and a16");
4204 return false;
4205}
4206
4207bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4208
4209 const unsigned Opc = Inst.getOpcode();
4210 const MCInstrDesc &Desc = MII.get(Opc);
4211
4212 if ((Desc.TSFlags & MIMGFlags) == 0)
4213 return true;
4214 if (!Desc.mayLoad() || !Desc.mayStore())
4215 return true; // Not atomic
4216
4217 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4218 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4219
4220 // This is an incomplete check because image_atomic_cmpswap
4221 // may only use 0x3 and 0xf while other atomic operations
4222 // may use 0x1 and 0x3. However these limitations are
4223 // verified when we check that dmask matches dst size.
4224 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4225}
4226
4227bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4228
4229 const unsigned Opc = Inst.getOpcode();
4230 const MCInstrDesc &Desc = MII.get(Opc);
4231
4232 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4233 return true;
4234
4235 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4236 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4237
4238 // GATHER4 instructions use dmask in a different fashion compared to
4239 // other MIMG instructions. The only useful DMASK values are
4240 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4241 // (red,red,red,red) etc.) The ISA document doesn't mention
4242 // this.
4243 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4244}
4245
4246bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4247 const OperandVector &Operands) {
4248 if (!isGFX10Plus())
4249 return true;
4250
4251 const unsigned Opc = Inst.getOpcode();
4252 const MCInstrDesc &Desc = MII.get(Opc);
4253
4254 if ((Desc.TSFlags & MIMGFlags) == 0)
4255 return true;
4256
4257 // image_bvh_intersect_ray instructions do not have dim
4259 return true;
4260
4261 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4262 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4263 if (Op.isDim())
4264 return true;
4265 }
4266 return false;
4267}
4268
4269bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4270 const unsigned Opc = Inst.getOpcode();
4271 const MCInstrDesc &Desc = MII.get(Opc);
4272
4273 if ((Desc.TSFlags & MIMGFlags) == 0)
4274 return true;
4275
4276 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4277 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4279
4280 if (!BaseOpcode->MSAA)
4281 return true;
4282
4283 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4284 assert(DimIdx != -1);
4285
4286 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4287 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4288
4289 return DimInfo->MSAA;
4290}
4291
4292static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4293{
4294 switch (Opcode) {
4295 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4296 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4297 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4298 return true;
4299 default:
4300 return false;
4301 }
4302}
4303
4304// movrels* opcodes should only allow VGPRS as src0.
4305// This is specified in .td description for vop1/vop3,
4306// but sdwa is handled differently. See isSDWAOperand.
4307bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4308 const OperandVector &Operands) {
4309
4310 const unsigned Opc = Inst.getOpcode();
4311 const MCInstrDesc &Desc = MII.get(Opc);
4312
4313 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4314 return true;
4315
4316 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4317 assert(Src0Idx != -1);
4318
4319 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4320 if (Src0.isReg()) {
4321 auto Reg = mc2PseudoReg(Src0.getReg());
4322 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4323 if (!isSGPR(Reg, TRI))
4324 return true;
4325 }
4326
4327 Error(getOperandLoc(Operands, Src0Idx), "source operand must be a VGPR");
4328 return false;
4329}
4330
4331bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4332 const OperandVector &Operands) {
4333
4334 const unsigned Opc = Inst.getOpcode();
4335
4336 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4337 return true;
4338
4339 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4340 assert(Src0Idx != -1);
4341
4342 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4343 if (!Src0.isReg())
4344 return true;
4345
4346 auto Reg = mc2PseudoReg(Src0.getReg());
4347 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4348 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4349 Error(getOperandLoc(Operands, Src0Idx),
4350 "source operand must be either a VGPR or an inline constant");
4351 return false;
4352 }
4353
4354 return true;
4355}
4356
4357bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4358 const OperandVector &Operands) {
4359 unsigned Opcode = Inst.getOpcode();
4360 const MCInstrDesc &Desc = MII.get(Opcode);
4361
4362 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4363 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4364 return true;
4365
4366 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4367 if (Src2Idx == -1)
4368 return true;
4369
4370 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4371 Error(getOperandLoc(Operands, Src2Idx),
4372 "inline constants are not allowed for this operand");
4373 return false;
4374 }
4375
4376 return true;
4377}
4378
4379bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4380 const OperandVector &Operands) {
4381 const unsigned Opc = Inst.getOpcode();
4382 const MCInstrDesc &Desc = MII.get(Opc);
4383
4384 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4385 return true;
4386
4387 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4388 if (BlgpIdx != -1) {
4389 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4390 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4391
4392 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4393 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4394
4395 // Validate the correct register size was used for the floating point
4396 // format operands
4397
4398 bool Success = true;
4399 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4400 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4401 Error(getOperandLoc(Operands, Src0Idx),
4402 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4403 Success = false;
4404 }
4405
4406 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4407 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4408 Error(getOperandLoc(Operands, Src1Idx),
4409 "wrong register tuple size for blgp value " + Twine(BLGP));
4410 Success = false;
4411 }
4412
4413 return Success;
4414 }
4415 }
4416
4417 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4418 if (Src2Idx == -1)
4419 return true;
4420
4421 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4422 if (!Src2.isReg())
4423 return true;
4424
4425 MCRegister Src2Reg = Src2.getReg();
4426 MCRegister DstReg = Inst.getOperand(0).getReg();
4427 if (Src2Reg == DstReg)
4428 return true;
4429
4430 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4431 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4432 return true;
4433
4434 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4435 Error(getOperandLoc(Operands, Src2Idx),
4436 "source 2 operand must not partially overlap with dst");
4437 return false;
4438 }
4439
4440 return true;
4441}
4442
4443bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4444 switch (Inst.getOpcode()) {
4445 default:
4446 return true;
4447 case V_DIV_SCALE_F32_gfx6_gfx7:
4448 case V_DIV_SCALE_F32_vi:
4449 case V_DIV_SCALE_F32_gfx10:
4450 case V_DIV_SCALE_F64_gfx6_gfx7:
4451 case V_DIV_SCALE_F64_vi:
4452 case V_DIV_SCALE_F64_gfx10:
4453 break;
4454 }
4455
4456 // TODO: Check that src0 = src1 or src2.
4457
4458 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4459 AMDGPU::OpName::src2_modifiers,
4460 AMDGPU::OpName::src2_modifiers}) {
4461 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4462 .getImm() &
4464 return false;
4465 }
4466 }
4467
4468 return true;
4469}
4470
4471bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4472
4473 const unsigned Opc = Inst.getOpcode();
4474 const MCInstrDesc &Desc = MII.get(Opc);
4475
4476 if ((Desc.TSFlags & MIMGFlags) == 0)
4477 return true;
4478
4479 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4480 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4481 if (isCI() || isSI())
4482 return false;
4483 }
4484
4485 return true;
4486}
4487
4488bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4489 const unsigned Opc = Inst.getOpcode();
4490 const MCInstrDesc &Desc = MII.get(Opc);
4491
4492 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4493 return true;
4494
4495 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4496
4497 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4498}
4499
4500static bool IsRevOpcode(const unsigned Opcode)
4501{
4502 switch (Opcode) {
4503 case AMDGPU::V_SUBREV_F32_e32:
4504 case AMDGPU::V_SUBREV_F32_e64:
4505 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4506 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4507 case AMDGPU::V_SUBREV_F32_e32_vi:
4508 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4509 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4510 case AMDGPU::V_SUBREV_F32_e64_vi:
4511
4512 case AMDGPU::V_SUBREV_CO_U32_e32:
4513 case AMDGPU::V_SUBREV_CO_U32_e64:
4514 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4515 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4516
4517 case AMDGPU::V_SUBBREV_U32_e32:
4518 case AMDGPU::V_SUBBREV_U32_e64:
4519 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4520 case AMDGPU::V_SUBBREV_U32_e32_vi:
4521 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4522 case AMDGPU::V_SUBBREV_U32_e64_vi:
4523
4524 case AMDGPU::V_SUBREV_U32_e32:
4525 case AMDGPU::V_SUBREV_U32_e64:
4526 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4527 case AMDGPU::V_SUBREV_U32_e32_vi:
4528 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4529 case AMDGPU::V_SUBREV_U32_e64_vi:
4530
4531 case AMDGPU::V_SUBREV_F16_e32:
4532 case AMDGPU::V_SUBREV_F16_e64:
4533 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4534 case AMDGPU::V_SUBREV_F16_e32_vi:
4535 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4536 case AMDGPU::V_SUBREV_F16_e64_vi:
4537
4538 case AMDGPU::V_SUBREV_U16_e32:
4539 case AMDGPU::V_SUBREV_U16_e64:
4540 case AMDGPU::V_SUBREV_U16_e32_vi:
4541 case AMDGPU::V_SUBREV_U16_e64_vi:
4542
4543 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4544 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4545 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4546
4547 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4548 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4549
4550 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4551 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4552
4553 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4554 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4555
4556 case AMDGPU::V_LSHRREV_B32_e32:
4557 case AMDGPU::V_LSHRREV_B32_e64:
4558 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4559 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4560 case AMDGPU::V_LSHRREV_B32_e32_vi:
4561 case AMDGPU::V_LSHRREV_B32_e64_vi:
4562 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4563 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4564
4565 case AMDGPU::V_ASHRREV_I32_e32:
4566 case AMDGPU::V_ASHRREV_I32_e64:
4567 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4568 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4569 case AMDGPU::V_ASHRREV_I32_e32_vi:
4570 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4571 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4572 case AMDGPU::V_ASHRREV_I32_e64_vi:
4573
4574 case AMDGPU::V_LSHLREV_B32_e32:
4575 case AMDGPU::V_LSHLREV_B32_e64:
4576 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4577 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4578 case AMDGPU::V_LSHLREV_B32_e32_vi:
4579 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4580 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4581 case AMDGPU::V_LSHLREV_B32_e64_vi:
4582
4583 case AMDGPU::V_LSHLREV_B16_e32:
4584 case AMDGPU::V_LSHLREV_B16_e64:
4585 case AMDGPU::V_LSHLREV_B16_e32_vi:
4586 case AMDGPU::V_LSHLREV_B16_e64_vi:
4587 case AMDGPU::V_LSHLREV_B16_gfx10:
4588
4589 case AMDGPU::V_LSHRREV_B16_e32:
4590 case AMDGPU::V_LSHRREV_B16_e64:
4591 case AMDGPU::V_LSHRREV_B16_e32_vi:
4592 case AMDGPU::V_LSHRREV_B16_e64_vi:
4593 case AMDGPU::V_LSHRREV_B16_gfx10:
4594
4595 case AMDGPU::V_ASHRREV_I16_e32:
4596 case AMDGPU::V_ASHRREV_I16_e64:
4597 case AMDGPU::V_ASHRREV_I16_e32_vi:
4598 case AMDGPU::V_ASHRREV_I16_e64_vi:
4599 case AMDGPU::V_ASHRREV_I16_gfx10:
4600
4601 case AMDGPU::V_LSHLREV_B64_e64:
4602 case AMDGPU::V_LSHLREV_B64_gfx10:
4603 case AMDGPU::V_LSHLREV_B64_vi:
4604
4605 case AMDGPU::V_LSHRREV_B64_e64:
4606 case AMDGPU::V_LSHRREV_B64_gfx10:
4607 case AMDGPU::V_LSHRREV_B64_vi:
4608
4609 case AMDGPU::V_ASHRREV_I64_e64:
4610 case AMDGPU::V_ASHRREV_I64_gfx10:
4611 case AMDGPU::V_ASHRREV_I64_vi:
4612
4613 case AMDGPU::V_PK_LSHLREV_B16:
4614 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4615 case AMDGPU::V_PK_LSHLREV_B16_vi:
4616
4617 case AMDGPU::V_PK_LSHRREV_B16:
4618 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4619 case AMDGPU::V_PK_LSHRREV_B16_vi:
4620 case AMDGPU::V_PK_ASHRREV_I16:
4621 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4622 case AMDGPU::V_PK_ASHRREV_I16_vi:
4623 return true;
4624 default:
4625 return false;
4626 }
4627}
4628
4629bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4630 const OperandVector &Operands) {
4631 using namespace SIInstrFlags;
4632 const unsigned Opcode = Inst.getOpcode();
4633 const MCInstrDesc &Desc = MII.get(Opcode);
4634
4635 // lds_direct register is defined so that it can be used
4636 // with 9-bit operands only. Ignore encodings which do not accept these.
4637 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4638 if ((Desc.TSFlags & Enc) == 0)
4639 return true;
4640
4641 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4642 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4643 if (SrcIdx == -1)
4644 break;
4645 const auto &Src = Inst.getOperand(SrcIdx);
4646 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4647
4648 if (isGFX90A() || isGFX11Plus()) {
4649 Error(getOperandLoc(Operands, SrcIdx),
4650 "lds_direct is not supported on this GPU");
4651 return false;
4652 }
4653
4654 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) {
4655 Error(getOperandLoc(Operands, SrcIdx),
4656 "lds_direct cannot be used with this instruction");
4657 return false;
4658 }
4659
4660 if (SrcName != OpName::src0) {
4661 Error(getOperandLoc(Operands, SrcIdx),
4662 "lds_direct may be used as src0 only");
4663 return false;
4664 }
4665 }
4666 }
4667
4668 return true;
4669}
4670
4671SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4672 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4673 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4674 if (Op.isFlatOffset())
4675 return Op.getStartLoc();
4676 }
4677 return getLoc();
4678}
4679
4680bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4681 const OperandVector &Operands) {
4682 auto Opcode = Inst.getOpcode();
4683 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4684 if (OpNum == -1)
4685 return true;
4686
4687 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4688 if ((TSFlags & SIInstrFlags::FLAT))
4689 return validateFlatOffset(Inst, Operands);
4690
4691 if ((TSFlags & SIInstrFlags::SMRD))
4692 return validateSMEMOffset(Inst, Operands);
4693
4694 const auto &Op = Inst.getOperand(OpNum);
4695 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4696 if (isGFX12Plus() &&
4697 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4698 const unsigned OffsetSize = 24;
4699 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
4700 Error(getFlatOffsetLoc(Operands),
4701 Twine("expected a ") + Twine(OffsetSize - 1) +
4702 "-bit unsigned offset for buffer ops");
4703 return false;
4704 }
4705 } else {
4706 const unsigned OffsetSize = 16;
4707 if (!isUIntN(OffsetSize, Op.getImm())) {
4708 Error(getFlatOffsetLoc(Operands),
4709 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4710 return false;
4711 }
4712 }
4713 return true;
4714}
4715
4716bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4717 const OperandVector &Operands) {
4718 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4719 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4720 return true;
4721
4722 auto Opcode = Inst.getOpcode();
4723 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4724 assert(OpNum != -1);
4725
4726 const auto &Op = Inst.getOperand(OpNum);
4727 if (!hasFlatOffsets() && Op.getImm() != 0) {
4728 Error(getFlatOffsetLoc(Operands),
4729 "flat offset modifier is not supported on this GPU");
4730 return false;
4731 }
4732
4733 // For pre-GFX12 FLAT instructions the offset must be positive;
4734 // MSB is ignored and forced to zero.
4735 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4736 bool AllowNegative =
4738 isGFX12Plus();
4739 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4740 Error(getFlatOffsetLoc(Operands),
4741 Twine("expected a ") +
4742 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4743 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4744 return false;
4745 }
4746
4747 return true;
4748}
4749
4750SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4751 // Start with second operand because SMEM Offset cannot be dst or src0.
4752 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4753 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4754 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4755 return Op.getStartLoc();
4756 }
4757 return getLoc();
4758}
4759
4760bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4761 const OperandVector &Operands) {
4762 if (isCI() || isSI())
4763 return true;
4764
4765 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4766 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4767 return true;
4768
4769 auto Opcode = Inst.getOpcode();
4770 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4771 if (OpNum == -1)
4772 return true;
4773
4774 const auto &Op = Inst.getOperand(OpNum);
4775 if (!Op.isImm())
4776 return true;
4777
4778 uint64_t Offset = Op.getImm();
4779 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4782 return true;
4783
4784 Error(getSMEMOffsetLoc(Operands),
4785 isGFX12Plus() && IsBuffer
4786 ? "expected a 23-bit unsigned offset for buffer ops"
4787 : isGFX12Plus() ? "expected a 24-bit signed offset"
4788 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4789 : "expected a 21-bit signed offset");
4790
4791 return false;
4792}
4793
4794bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4795 const OperandVector &Operands) {
4796 unsigned Opcode = Inst.getOpcode();
4797 const MCInstrDesc &Desc = MII.get(Opcode);
4798 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4799 return true;
4800
4801 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4802 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4803
4804 const int OpIndices[] = { Src0Idx, Src1Idx };
4805
4806 unsigned NumExprs = 0;
4807 unsigned NumLiterals = 0;
4808 int64_t LiteralValue;
4809
4810 for (int OpIdx : OpIndices) {
4811 if (OpIdx == -1) break;
4812
4813 const MCOperand &MO = Inst.getOperand(OpIdx);
4814 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4816 std::optional<int64_t> Imm;
4817 if (MO.isImm()) {
4818 Imm = MO.getImm();
4819 } else if (MO.isExpr()) {
4820 if (isLitExpr(MO.getExpr()))
4821 Imm = getLitValue(MO.getExpr());
4822 } else {
4823 continue;
4824 }
4825
4826 if (!Imm.has_value()) {
4827 ++NumExprs;
4828 } else if (!isInlineConstant(Inst, OpIdx)) {
4829 auto OpType = static_cast<AMDGPU::OperandType>(
4830 Desc.operands()[OpIdx].OperandType);
4831 int64_t Value = encode32BitLiteral(*Imm, OpType);
4832 if (NumLiterals == 0 || LiteralValue != Value) {
4834 ++NumLiterals;
4835 }
4836 }
4837 }
4838 }
4839
4840 if (NumLiterals + NumExprs <= 1)
4841 return true;
4842
4843 Error(getOperandLoc(Operands, Src1Idx),
4844 "only one unique literal operand is allowed");
4845 return false;
4846}
4847
4848bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4849 const unsigned Opc = Inst.getOpcode();
4850 if (isPermlane16(Opc)) {
4851 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4852 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4853
4854 if (OpSel & ~3)
4855 return false;
4856 }
4857
4858 uint64_t TSFlags = MII.get(Opc).TSFlags;
4859
4860 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4861 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4862 if (OpSelIdx != -1) {
4863 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4864 return false;
4865 }
4866 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4867 if (OpSelHiIdx != -1) {
4868 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4869 return false;
4870 }
4871 }
4872
4873 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4874 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4875 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4876 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4877 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4878 if (OpSel & 3)
4879 return false;
4880 }
4881
4882 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4883 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4884 // the first SGPR and use it for both the low and high operations.
4885 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4886 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4887 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4888 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4889 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4890
4891 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4892 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4893 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4894 unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4895
4896 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4897
4898 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
4899 unsigned Mask = 1U << Index;
4900 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
4901 };
4902
4903 if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
4904 !VerifyOneSGPR(/*Index=*/0))
4905 return false;
4906 if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
4907 !VerifyOneSGPR(/*Index=*/1))
4908 return false;
4909
4910 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4911 if (Src2Idx != -1) {
4912 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4913 if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
4914 !VerifyOneSGPR(/*Index=*/2))
4915 return false;
4916 }
4917 }
4918
4919 return true;
4920}
4921
4922bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4923 if (!hasTrue16Insts())
4924 return true;
4925 const MCRegisterInfo *MRI = getMRI();
4926 const unsigned Opc = Inst.getOpcode();
4927 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4928 if (OpSelIdx == -1)
4929 return true;
4930 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
4931 // If the value is 0 we could have a default OpSel Operand, so conservatively
4932 // allow it.
4933 if (OpSelOpValue == 0)
4934 return true;
4935 unsigned OpCount = 0;
4936 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4937 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4938 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
4939 if (OpIdx == -1)
4940 continue;
4941 const MCOperand &Op = Inst.getOperand(OpIdx);
4942 if (Op.isReg() &&
4943 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
4944 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
4945 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
4946 if (OpSelOpIsHi != VGPRSuffixIsHi)
4947 return false;
4948 }
4949 ++OpCount;
4950 }
4951
4952 return true;
4953}
4954
4955bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
4956 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4957
4958 const unsigned Opc = Inst.getOpcode();
4959 uint64_t TSFlags = MII.get(Opc).TSFlags;
4960
4961 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4962 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4963 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4964 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4965 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4966 !(TSFlags & SIInstrFlags::IsSWMMAC))
4967 return true;
4968
4969 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4970 if (NegIdx == -1)
4971 return true;
4972
4973 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4974
4975 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4976 // on some src operands but not allowed on other.
4977 // It is convenient that such instructions don't have src_modifiers operand
4978 // for src operands that don't allow neg because they also don't allow opsel.
4979
4980 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4981 AMDGPU::OpName::src1_modifiers,
4982 AMDGPU::OpName::src2_modifiers};
4983
4984 for (unsigned i = 0; i < 3; ++i) {
4985 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4986 if (Neg & (1 << i))
4987 return false;
4988 }
4989 }
4990
4991 return true;
4992}
4993
4994bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4995 const OperandVector &Operands) {
4996 const unsigned Opc = Inst.getOpcode();
4997 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4998 if (DppCtrlIdx >= 0) {
4999 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
5000
5001 if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
5002 AMDGPU::isDPALU_DPP(MII.get(Opc), getSTI())) {
5003 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5004 // only on GFX12.
5005 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5006 Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
5007 : "DP ALU dpp only supports row_newbcast");
5008 return false;
5009 }
5010 }
5011
5012 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
5013 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5014
5015 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
5016 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5017 if (Src1Idx >= 0) {
5018 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5019 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5020 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
5021 Error(getOperandLoc(Operands, Src1Idx),
5022 "invalid operand for instruction");
5023 return false;
5024 }
5025 if (Src1.isImm()) {
5026 Error(getInstLoc(Operands),
5027 "src1 immediate operand invalid for instruction");
5028 return false;
5029 }
5030 }
5031 }
5032
5033 return true;
5034}
5035
5036// Check if VCC register matches wavefront size
5037bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5038 return (Reg == AMDGPU::VCC && isWave64()) ||
5039 (Reg == AMDGPU::VCC_LO && isWave32());
5040}
5041
5042// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5043bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5044 const OperandVector &Operands) {
5045 unsigned Opcode = Inst.getOpcode();
5046 const MCInstrDesc &Desc = MII.get(Opcode);
5047 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5048 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5049 !HasMandatoryLiteral && !isVOPD(Opcode))
5050 return true;
5051
5052 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
5053
5054 std::optional<unsigned> LiteralOpIdx;
5055 std::optional<uint64_t> LiteralValue;
5056
5057 for (int OpIdx : OpIndices) {
5058 if (OpIdx == -1)
5059 continue;
5060
5061 const MCOperand &MO = Inst.getOperand(OpIdx);
5062 if (!MO.isImm() && !MO.isExpr())
5063 continue;
5064 if (!isSISrcOperand(Desc, OpIdx))
5065 continue;
5066
5067 std::optional<int64_t> Imm;
5068 if (MO.isImm())
5069 Imm = MO.getImm();
5070 else if (MO.isExpr() && isLitExpr(MO.getExpr()))
5071 Imm = getLitValue(MO.getExpr());
5072
5073 bool IsAnotherLiteral = false;
5074 if (!Imm.has_value()) {
5075 // Literal value not known, so we conservately assume it's different.
5076 IsAnotherLiteral = true;
5077 } else if (!isInlineConstant(Inst, OpIdx)) {
5078 uint64_t Value = *Imm;
5079 bool IsForcedFP64 =
5080 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5082 HasMandatoryLiteral);
5083 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
5084 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5085 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
5086
5087 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5088 !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) {
5089 Error(getOperandLoc(Operands, OpIdx),
5090 "invalid operand for instruction");
5091 return false;
5092 }
5093
5094 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5095 Value = Hi_32(Value);
5096
5097 IsAnotherLiteral = !LiteralValue || *LiteralValue != Value;
5099 }
5100
5101 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5102 !getFeatureBits()[FeatureVOP3Literal]) {
5103 Error(getOperandLoc(Operands, OpIdx),
5104 "literal operands are not supported");
5105 return false;
5106 }
5107
5108 if (LiteralOpIdx && IsAnotherLiteral) {
5109 Error(getLaterLoc(getOperandLoc(Operands, OpIdx),
5110 getOperandLoc(Operands, *LiteralOpIdx)),
5111 "only one unique literal operand is allowed");
5112 return false;
5113 }
5114
5115 if (IsAnotherLiteral)
5116 LiteralOpIdx = OpIdx;
5117 }
5118
5119 return true;
5120}
5121
5122// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5123static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5124 const MCRegisterInfo *MRI) {
5125 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5126 if (OpIdx < 0)
5127 return -1;
5128
5129 const MCOperand &Op = Inst.getOperand(OpIdx);
5130 if (!Op.isReg())
5131 return -1;
5132
5133 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5134 auto Reg = Sub ? Sub : Op.getReg();
5135 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5136 return AGPR32.contains(Reg) ? 1 : 0;
5137}
5138
5139bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5140 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5141 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5143 SIInstrFlags::DS)) == 0)
5144 return true;
5145
5146 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5147 ? AMDGPU::OpName::data0
5148 : AMDGPU::OpName::vdata;
5149
5150 const MCRegisterInfo *MRI = getMRI();
5151 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5152 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5153
5154 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5155 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5156 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5157 return false;
5158 }
5159
5160 auto FB = getFeatureBits();
5161 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5162 if (DataAreg < 0 || DstAreg < 0)
5163 return true;
5164 return DstAreg == DataAreg;
5165 }
5166
5167 return DstAreg < 1 && DataAreg < 1;
5168}
5169
5170bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5171 auto FB = getFeatureBits();
5172 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5173 return true;
5174
5175 unsigned Opc = Inst.getOpcode();
5176 const MCRegisterInfo *MRI = getMRI();
5177 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5178 // unaligned VGPR. All others only allow even aligned VGPRs.
5179 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5180 return true;
5181
5182 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5183 switch (Opc) {
5184 default:
5185 break;
5186 case AMDGPU::DS_LOAD_TR6_B96:
5187 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5188 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5189 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5190 return true;
5191 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5192 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5193 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5194 // allows unaligned VGPR for vdst, but other operands still only allow
5195 // even aligned VGPRs.
5196 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
5197 if (VAddrIdx != -1) {
5198 const MCOperand &Op = Inst.getOperand(VAddrIdx);
5199 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5200 if ((Sub - AMDGPU::VGPR0) & 1)
5201 return false;
5202 }
5203 return true;
5204 }
5205 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5206 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5207 return true;
5208 }
5209 }
5210
5211 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5212 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5213 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5214 const MCOperand &Op = Inst.getOperand(I);
5215 if (!Op.isReg())
5216 continue;
5217
5218 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5219 if (!Sub)
5220 continue;
5221
5222 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5223 return false;
5224 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5225 return false;
5226 }
5227
5228 return true;
5229}
5230
5231SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5232 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5233 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5234 if (Op.isBLGP())
5235 return Op.getStartLoc();
5236 }
5237 return SMLoc();
5238}
5239
5240bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5241 const OperandVector &Operands) {
5242 unsigned Opc = Inst.getOpcode();
5243 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5244 if (BlgpIdx == -1)
5245 return true;
5246 SMLoc BLGPLoc = getBLGPLoc(Operands);
5247 if (!BLGPLoc.isValid())
5248 return true;
5249 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5250 auto FB = getFeatureBits();
5251 bool UsesNeg = false;
5252 if (FB[AMDGPU::FeatureGFX940Insts]) {
5253 switch (Opc) {
5254 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5255 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5256 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5257 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5258 UsesNeg = true;
5259 }
5260 }
5261
5262 if (IsNeg == UsesNeg)
5263 return true;
5264
5265 Error(BLGPLoc,
5266 UsesNeg ? "invalid modifier: blgp is not supported"
5267 : "invalid modifier: neg is not supported");
5268
5269 return false;
5270}
5271
5272bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5273 const OperandVector &Operands) {
5274 if (!isGFX11Plus())
5275 return true;
5276
5277 unsigned Opc = Inst.getOpcode();
5278 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5279 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5280 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5281 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5282 return true;
5283
5284 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5285 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5286 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5287 if (Reg == AMDGPU::SGPR_NULL)
5288 return true;
5289
5290 Error(getOperandLoc(Operands, Src0Idx), "src0 must be null");
5291 return false;
5292}
5293
5294bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5295 const OperandVector &Operands) {
5296 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5297 if ((TSFlags & SIInstrFlags::DS) == 0)
5298 return true;
5299 if (TSFlags & SIInstrFlags::GWS)
5300 return validateGWS(Inst, Operands);
5301 // Only validate GDS for non-GWS instructions.
5302 if (hasGDS())
5303 return true;
5304 int GDSIdx =
5305 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5306 if (GDSIdx < 0)
5307 return true;
5308 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5309 if (GDS) {
5310 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5311 Error(S, "gds modifier is not supported on this GPU");
5312 return false;
5313 }
5314 return true;
5315}
5316
5317// gfx90a has an undocumented limitation:
5318// DS_GWS opcodes must use even aligned registers.
5319bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5320 const OperandVector &Operands) {
5321 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5322 return true;
5323
5324 int Opc = Inst.getOpcode();
5325 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5326 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5327 return true;
5328
5329 const MCRegisterInfo *MRI = getMRI();
5330 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5331 int Data0Pos =
5332 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5333 assert(Data0Pos != -1);
5334 auto Reg = Inst.getOperand(Data0Pos).getReg();
5335 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5336 if (RegIdx & 1) {
5337 Error(getOperandLoc(Operands, Data0Pos), "vgpr must be even aligned");
5338 return false;
5339 }
5340
5341 return true;
5342}
5343
5344bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5345 const OperandVector &Operands,
5346 SMLoc IDLoc) {
5347 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5348 AMDGPU::OpName::cpol);
5349 if (CPolPos == -1)
5350 return true;
5351
5352 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5353
5354 if (!isGFX1250()) {
5355 if (CPol & CPol::SCAL) {
5356 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5357 StringRef CStr(S.getPointer());
5358 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5359 Error(S, "scale_offset is not supported on this GPU");
5360 }
5361 if (CPol & CPol::NV) {
5362 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5363 StringRef CStr(S.getPointer());
5364 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5365 Error(S, "nv is not supported on this GPU");
5366 }
5367 }
5368
5369 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
5370 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5371 StringRef CStr(S.getPointer());
5372 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5373 Error(S, "scale_offset is not supported for this instruction");
5374 }
5375
5376 if (isGFX12Plus())
5377 return validateTHAndScopeBits(Inst, Operands, CPol);
5378
5379 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5380 if (TSFlags & SIInstrFlags::SMRD) {
5381 if (CPol && (isSI() || isCI())) {
5382 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5383 Error(S, "cache policy is not supported for SMRD instructions");
5384 return false;
5385 }
5386 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5387 Error(IDLoc, "invalid cache policy for SMEM instruction");
5388 return false;
5389 }
5390 }
5391
5392 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5393 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5396 if (!(TSFlags & AllowSCCModifier)) {
5397 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5398 StringRef CStr(S.getPointer());
5399 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5400 Error(S,
5401 "scc modifier is not supported for this instruction on this GPU");
5402 return false;
5403 }
5404 }
5405
5407 return true;
5408
5409 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5410 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5411 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5412 : "instruction must use glc");
5413 return false;
5414 }
5415 } else {
5416 if (CPol & CPol::GLC) {
5417 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5418 StringRef CStr(S.getPointer());
5420 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5421 Error(S, isGFX940() ? "instruction must not use sc0"
5422 : "instruction must not use glc");
5423 return false;
5424 }
5425 }
5426
5427 return true;
5428}
5429
5430bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5431 const OperandVector &Operands,
5432 const unsigned CPol) {
5433 const unsigned TH = CPol & AMDGPU::CPol::TH;
5434 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5435
5436 const unsigned Opcode = Inst.getOpcode();
5437 const MCInstrDesc &TID = MII.get(Opcode);
5438
5439 auto PrintError = [&](StringRef Msg) {
5440 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5441 Error(S, Msg);
5442 return false;
5443 };
5444
5445 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5448 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5449
5450 if (TH == 0)
5451 return true;
5452
5453 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5454 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5455 (TH == AMDGPU::CPol::TH_NT_HT)))
5456 return PrintError("invalid th value for SMEM instruction");
5457
5458 if (TH == AMDGPU::CPol::TH_BYPASS) {
5459 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5461 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5463 return PrintError("scope and th combination is not valid");
5464 }
5465
5466 unsigned THType = AMDGPU::getTemporalHintType(TID);
5467 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5468 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5469 return PrintError("invalid th value for atomic instructions");
5470 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5471 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5472 return PrintError("invalid th value for store instructions");
5473 } else {
5474 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5475 return PrintError("invalid th value for load instructions");
5476 }
5477
5478 return true;
5479}
5480
5481bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5482 const OperandVector &Operands) {
5483 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5484 if (Desc.mayStore() &&
5486 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5487 if (Loc != getInstLoc(Operands)) {
5488 Error(Loc, "TFE modifier has no meaning for store instructions");
5489 return false;
5490 }
5491 }
5492
5493 return true;
5494}
5495
5496bool AMDGPUAsmParser::validateSetVgprMSB(const MCInst &Inst,
5497 const OperandVector &Operands) {
5498 if (Inst.getOpcode() != AMDGPU::S_SET_VGPR_MSB_gfx12)
5499 return true;
5500
5501 int Simm16Pos =
5502 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::simm16);
5503 if ((unsigned)Inst.getOperand(Simm16Pos).getImm() > 255) {
5504 SMLoc Loc = Operands[1]->getStartLoc();
5505 Error(Loc, "s_set_vgpr_msb accepts values in range [0..255]");
5506 return false;
5507 }
5508
5509 return true;
5510}
5511
5512bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5513 const OperandVector &Operands) {
5514 unsigned Opc = Inst.getOpcode();
5515 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5516 const MCInstrDesc &Desc = MII.get(Opc);
5517
5518 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) -> bool {
5519 int FmtIdx = AMDGPU::getNamedOperandIdx(Opc, FmtOp);
5520 if (FmtIdx == -1)
5521 return true;
5522 unsigned Fmt = Inst.getOperand(FmtIdx).getImm();
5523 int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
5524 unsigned RegSize =
5525 TRI->getRegClass(Desc.operands()[SrcIdx].RegClass).getSizeInBits();
5526
5528 return true;
5529
5530 static const char *FmtNames[] = {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
5531 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
5532 "MATRIX_FMT_FP4"};
5533
5534 Error(getOperandLoc(Operands, SrcIdx),
5535 "wrong register tuple size for " + Twine(FmtNames[Fmt]));
5536 return false;
5537 };
5538
5539 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5540 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5541}
5542
5543bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc,
5544 const OperandVector &Operands) {
5545 if (!validateLdsDirect(Inst, Operands))
5546 return false;
5547 if (!validateTrue16OpSel(Inst)) {
5548 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5549 "op_sel operand conflicts with 16-bit operand suffix");
5550 return false;
5551 }
5552 if (!validateSOPLiteral(Inst, Operands))
5553 return false;
5554 if (!validateVOPLiteral(Inst, Operands)) {
5555 return false;
5556 }
5557 if (!validateConstantBusLimitations(Inst, Operands)) {
5558 return false;
5559 }
5560 if (!validateVOPD(Inst, Operands)) {
5561 return false;
5562 }
5563 if (!validateIntClampSupported(Inst)) {
5564 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5565 "integer clamping is not supported on this GPU");
5566 return false;
5567 }
5568 if (!validateOpSel(Inst)) {
5569 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5570 "invalid op_sel operand");
5571 return false;
5572 }
5573 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5574 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5575 "invalid neg_lo operand");
5576 return false;
5577 }
5578 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5579 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5580 "invalid neg_hi operand");
5581 return false;
5582 }
5583 if (!validateDPP(Inst, Operands)) {
5584 return false;
5585 }
5586 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5587 if (!validateMIMGD16(Inst)) {
5588 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5589 "d16 modifier is not supported on this GPU");
5590 return false;
5591 }
5592 if (!validateMIMGDim(Inst, Operands)) {
5593 Error(IDLoc, "missing dim operand");
5594 return false;
5595 }
5596 if (!validateTensorR128(Inst)) {
5597 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5598 "instruction must set modifier r128=0");
5599 return false;
5600 }
5601 if (!validateMIMGMSAA(Inst)) {
5602 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5603 "invalid dim; must be MSAA type");
5604 return false;
5605 }
5606 if (!validateMIMGDataSize(Inst, IDLoc)) {
5607 return false;
5608 }
5609 if (!validateMIMGAddrSize(Inst, IDLoc))
5610 return false;
5611 if (!validateMIMGAtomicDMask(Inst)) {
5612 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5613 "invalid atomic image dmask");
5614 return false;
5615 }
5616 if (!validateMIMGGatherDMask(Inst)) {
5617 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5618 "invalid image_gather dmask: only one bit must be set");
5619 return false;
5620 }
5621 if (!validateMovrels(Inst, Operands)) {
5622 return false;
5623 }
5624 if (!validateOffset(Inst, Operands)) {
5625 return false;
5626 }
5627 if (!validateMAIAccWrite(Inst, Operands)) {
5628 return false;
5629 }
5630 if (!validateMAISrc2(Inst, Operands)) {
5631 return false;
5632 }
5633 if (!validateMFMA(Inst, Operands)) {
5634 return false;
5635 }
5636 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5637 return false;
5638 }
5639
5640 if (!validateAGPRLdSt(Inst)) {
5641 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5642 ? "invalid register class: data and dst should be all VGPR or AGPR"
5643 : "invalid register class: agpr loads and stores not supported on this GPU"
5644 );
5645 return false;
5646 }
5647 if (!validateVGPRAlign(Inst)) {
5648 Error(IDLoc,
5649 "invalid register class: vgpr tuples must be 64 bit aligned");
5650 return false;
5651 }
5652 if (!validateDS(Inst, Operands)) {
5653 return false;
5654 }
5655
5656 if (!validateBLGP(Inst, Operands)) {
5657 return false;
5658 }
5659
5660 if (!validateDivScale(Inst)) {
5661 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5662 return false;
5663 }
5664 if (!validateWaitCnt(Inst, Operands)) {
5665 return false;
5666 }
5667 if (!validateTFE(Inst, Operands)) {
5668 return false;
5669 }
5670 if (!validateSetVgprMSB(Inst, Operands)) {
5671 return false;
5672 }
5673 if (!validateWMMA(Inst, Operands)) {
5674 return false;
5675 }
5676
5677 return true;
5678}
5679
5681 const FeatureBitset &FBS,
5682 unsigned VariantID = 0);
5683
5684static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5685 const FeatureBitset &AvailableFeatures,
5686 unsigned VariantID);
5687
5688bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5689 const FeatureBitset &FBS) {
5690 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5691}
5692
5693bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5694 const FeatureBitset &FBS,
5695 ArrayRef<unsigned> Variants) {
5696 for (auto Variant : Variants) {
5697 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5698 return true;
5699 }
5700
5701 return false;
5702}
5703
5704bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5705 SMLoc IDLoc) {
5706 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5707
5708 // Check if requested instruction variant is supported.
5709 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5710 return false;
5711
5712 // This instruction is not supported.
5713 // Clear any other pending errors because they are no longer relevant.
5714 getParser().clearPendingErrors();
5715
5716 // Requested instruction variant is not supported.
5717 // Check if any other variants are supported.
5718 StringRef VariantName = getMatchedVariantName();
5719 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5720 return Error(IDLoc,
5721 Twine(VariantName,
5722 " variant of this instruction is not supported"));
5723 }
5724
5725 // Check if this instruction may be used with a different wavesize.
5726 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5727 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5728 // FIXME: Use getAvailableFeatures, and do not manually recompute
5729 FeatureBitset FeaturesWS32 = getFeatureBits();
5730 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5731 .flip(AMDGPU::FeatureWavefrontSize32);
5732 FeatureBitset AvailableFeaturesWS32 =
5733 ComputeAvailableFeatures(FeaturesWS32);
5734
5735 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5736 return Error(IDLoc, "instruction requires wavesize=32");
5737 }
5738
5739 // Finally check if this instruction is supported on any other GPU.
5740 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5741 return Error(IDLoc, "instruction not supported on this GPU");
5742 }
5743
5744 // Instruction not supported on any GPU. Probably a typo.
5745 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5746 return Error(IDLoc, "invalid instruction" + Suggestion);
5747}
5748
5750 uint64_t InvalidOprIdx) {
5751 assert(InvalidOprIdx < Operands.size());
5752 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5753 if (Op.isToken() && InvalidOprIdx > 1) {
5754 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5755 return PrevOp.isToken() && PrevOp.getToken() == "::";
5756 }
5757 return false;
5758}
5759
5760bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5762 MCStreamer &Out,
5763 uint64_t &ErrorInfo,
5764 bool MatchingInlineAsm) {
5765 MCInst Inst;
5766 Inst.setLoc(IDLoc);
5767 unsigned Result = Match_Success;
5768 for (auto Variant : getMatchedVariants()) {
5769 uint64_t EI;
5770 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5771 Variant);
5772 // We order match statuses from least to most specific. We use most specific
5773 // status as resulting
5774 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5775 if (R == Match_Success || R == Match_MissingFeature ||
5776 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5777 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5778 Result != Match_MissingFeature)) {
5779 Result = R;
5780 ErrorInfo = EI;
5781 }
5782 if (R == Match_Success)
5783 break;
5784 }
5785
5786 if (Result == Match_Success) {
5787 if (!validateInstruction(Inst, IDLoc, Operands)) {
5788 return true;
5789 }
5790 Out.emitInstruction(Inst, getSTI());
5791 return false;
5792 }
5793
5794 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5795 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5796 return true;
5797 }
5798
5799 switch (Result) {
5800 default: break;
5801 case Match_MissingFeature:
5802 // It has been verified that the specified instruction
5803 // mnemonic is valid. A match was found but it requires
5804 // features which are not supported on this GPU.
5805 return Error(IDLoc, "operands are not valid for this GPU or mode");
5806
5807 case Match_InvalidOperand: {
5808 SMLoc ErrorLoc = IDLoc;
5809 if (ErrorInfo != ~0ULL) {
5810 if (ErrorInfo >= Operands.size()) {
5811 return Error(IDLoc, "too few operands for instruction");
5812 }
5813 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5814 if (ErrorLoc == SMLoc())
5815 ErrorLoc = IDLoc;
5816
5817 if (isInvalidVOPDY(Operands, ErrorInfo))
5818 return Error(ErrorLoc, "invalid VOPDY instruction");
5819 }
5820 return Error(ErrorLoc, "invalid operand for instruction");
5821 }
5822
5823 case Match_MnemonicFail:
5824 llvm_unreachable("Invalid instructions should have been handled already");
5825 }
5826 llvm_unreachable("Implement any new match types added!");
5827}
5828
5829bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5830 int64_t Tmp = -1;
5831 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5832 return true;
5833 }
5834 if (getParser().parseAbsoluteExpression(Tmp)) {
5835 return true;
5836 }
5837 Ret = static_cast<uint32_t>(Tmp);
5838 return false;
5839}
5840
5841bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5842 if (!getSTI().getTargetTriple().isAMDGCN())
5843 return TokError("directive only supported for amdgcn architecture");
5844
5845 std::string TargetIDDirective;
5846 SMLoc TargetStart = getTok().getLoc();
5847 if (getParser().parseEscapedString(TargetIDDirective))
5848 return true;
5849
5850 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5851 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5852 return getParser().Error(TargetRange.Start,
5853 (Twine(".amdgcn_target directive's target id ") +
5854 Twine(TargetIDDirective) +
5855 Twine(" does not match the specified target id ") +
5856 Twine(getTargetStreamer().getTargetID()->toString())).str());
5857
5858 return false;
5859}
5860
5861bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5862 return Error(Range.Start, "value out of range", Range);
5863}
5864
5865bool AMDGPUAsmParser::calculateGPRBlocks(
5866 const FeatureBitset &Features, const MCExpr *VCCUsed,
5867 const MCExpr *FlatScrUsed, bool XNACKUsed,
5868 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5869 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5870 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5871 // TODO(scott.linder): These calculations are duplicated from
5872 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5873 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5874 MCContext &Ctx = getContext();
5875
5876 const MCExpr *NumSGPRs = NextFreeSGPR;
5877 int64_t EvaluatedSGPRs;
5878
5879 if (Version.Major >= 10)
5881 else {
5882 unsigned MaxAddressableNumSGPRs =
5884
5885 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5886 !Features.test(FeatureSGPRInitBug) &&
5887 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5888 return OutOfRangeError(SGPRRange);
5889
5890 const MCExpr *ExtraSGPRs =
5891 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5892 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5893
5894 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5895 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5896 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5897 return OutOfRangeError(SGPRRange);
5898
5899 if (Features.test(FeatureSGPRInitBug))
5900 NumSGPRs =
5902 }
5903
5904 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5905 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5906 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5907 unsigned Granule) -> const MCExpr * {
5908 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5909 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5910 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5911 const MCExpr *AlignToGPR =
5912 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5913 const MCExpr *DivGPR =
5914 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5915 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5916 return SubGPR;
5917 };
5918
5919 VGPRBlocks = GetNumGPRBlocks(
5920 NextFreeVGPR,
5921 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5922 SGPRBlocks =
5923 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5924
5925 return false;
5926}
5927
5928bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5929 if (!getSTI().getTargetTriple().isAMDGCN())
5930 return TokError("directive only supported for amdgcn architecture");
5931
5932 if (!isHsaAbi(getSTI()))
5933 return TokError("directive only supported for amdhsa OS");
5934
5935 StringRef KernelName;
5936 if (getParser().parseIdentifier(KernelName))
5937 return true;
5938
5939 AMDGPU::MCKernelDescriptor KD =
5941 &getSTI(), getContext());
5942
5943 StringSet<> Seen;
5944
5945 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5946
5947 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5948 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5949
5950 SMRange VGPRRange;
5951 const MCExpr *NextFreeVGPR = ZeroExpr;
5952 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5953 const MCExpr *NamedBarCnt = ZeroExpr;
5954 uint64_t SharedVGPRCount = 0;
5955 uint64_t PreloadLength = 0;
5956 uint64_t PreloadOffset = 0;
5957 SMRange SGPRRange;
5958 const MCExpr *NextFreeSGPR = ZeroExpr;
5959
5960 // Count the number of user SGPRs implied from the enabled feature bits.
5961 unsigned ImpliedUserSGPRCount = 0;
5962
5963 // Track if the asm explicitly contains the directive for the user SGPR
5964 // count.
5965 std::optional<unsigned> ExplicitUserSGPRCount;
5966 const MCExpr *ReserveVCC = OneExpr;
5967 const MCExpr *ReserveFlatScr = OneExpr;
5968 std::optional<bool> EnableWavefrontSize32;
5969
5970 while (true) {
5971 while (trySkipToken(AsmToken::EndOfStatement));
5972
5973 StringRef ID;
5974 SMRange IDRange = getTok().getLocRange();
5975 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5976 return true;
5977
5978 if (ID == ".end_amdhsa_kernel")
5979 break;
5980
5981 if (!Seen.insert(ID).second)
5982 return TokError(".amdhsa_ directives cannot be repeated");
5983
5984 SMLoc ValStart = getLoc();
5985 const MCExpr *ExprVal;
5986 if (getParser().parseExpression(ExprVal))
5987 return true;
5988 SMLoc ValEnd = getLoc();
5989 SMRange ValRange = SMRange(ValStart, ValEnd);
5990
5991 int64_t IVal = 0;
5992 uint64_t Val = IVal;
5993 bool EvaluatableExpr;
5994 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5995 if (IVal < 0)
5996 return OutOfRangeError(ValRange);
5997 Val = IVal;
5998 }
5999
6000#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6001 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6002 return OutOfRangeError(RANGE); \
6003 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6004 getContext());
6005
6006// Some fields use the parsed value immediately which requires the expression to
6007// be solvable.
6008#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6009 if (!(RESOLVED)) \
6010 return Error(IDRange.Start, "directive should have resolvable expression", \
6011 IDRange);
6012
6013 if (ID == ".amdhsa_group_segment_fixed_size") {
6015 CHAR_BIT>(Val))
6016 return OutOfRangeError(ValRange);
6017 KD.group_segment_fixed_size = ExprVal;
6018 } else if (ID == ".amdhsa_private_segment_fixed_size") {
6020 CHAR_BIT>(Val))
6021 return OutOfRangeError(ValRange);
6022 KD.private_segment_fixed_size = ExprVal;
6023 } else if (ID == ".amdhsa_kernarg_size") {
6024 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
6025 return OutOfRangeError(ValRange);
6026 KD.kernarg_size = ExprVal;
6027 } else if (ID == ".amdhsa_user_sgpr_count") {
6028 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6029 ExplicitUserSGPRCount = Val;
6030 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6031 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6033 return Error(IDRange.Start,
6034 "directive is not supported with architected flat scratch",
6035 IDRange);
6037 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6038 ExprVal, ValRange);
6039 if (Val)
6040 ImpliedUserSGPRCount += 4;
6041 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6042 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6043 if (!hasKernargPreload())
6044 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6045
6046 if (Val > getMaxNumUserSGPRs())
6047 return OutOfRangeError(ValRange);
6048 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6049 ValRange);
6050 if (Val) {
6051 ImpliedUserSGPRCount += Val;
6052 PreloadLength = Val;
6053 }
6054 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6055 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6056 if (!hasKernargPreload())
6057 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6058
6059 if (Val >= 1024)
6060 return OutOfRangeError(ValRange);
6061 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6062 ValRange);
6063 if (Val)
6064 PreloadOffset = Val;
6065 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6066 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6068 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6069 ValRange);
6070 if (Val)
6071 ImpliedUserSGPRCount += 2;
6072 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6073 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6075 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6076 ValRange);
6077 if (Val)
6078 ImpliedUserSGPRCount += 2;
6079 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6080 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6082 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6083 ExprVal, ValRange);
6084 if (Val)
6085 ImpliedUserSGPRCount += 2;
6086 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6087 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6089 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6090 ValRange);
6091 if (Val)
6092 ImpliedUserSGPRCount += 2;
6093 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6095 return Error(IDRange.Start,
6096 "directive is not supported with architected flat scratch",
6097 IDRange);
6098 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6100 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6101 ExprVal, ValRange);
6102 if (Val)
6103 ImpliedUserSGPRCount += 2;
6104 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6105 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6107 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6108 ExprVal, ValRange);
6109 if (Val)
6110 ImpliedUserSGPRCount += 1;
6111 } else if (ID == ".amdhsa_wavefront_size32") {
6112 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6113 if (IVersion.Major < 10)
6114 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6115 EnableWavefrontSize32 = Val;
6117 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6118 ValRange);
6119 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6121 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6122 ValRange);
6123 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6125 return Error(IDRange.Start,
6126 "directive is not supported with architected flat scratch",
6127 IDRange);
6129 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6130 ValRange);
6131 } else if (ID == ".amdhsa_enable_private_segment") {
6133 return Error(
6134 IDRange.Start,
6135 "directive is not supported without architected flat scratch",
6136 IDRange);
6138 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6139 ValRange);
6140 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6142 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6143 ValRange);
6144 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6146 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6147 ValRange);
6148 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6150 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6151 ValRange);
6152 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6154 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6155 ValRange);
6156 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6158 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6159 ValRange);
6160 } else if (ID == ".amdhsa_next_free_vgpr") {
6161 VGPRRange = ValRange;
6162 NextFreeVGPR = ExprVal;
6163 } else if (ID == ".amdhsa_next_free_sgpr") {
6164 SGPRRange = ValRange;
6165 NextFreeSGPR = ExprVal;
6166 } else if (ID == ".amdhsa_accum_offset") {
6167 if (!isGFX90A())
6168 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6169 AccumOffset = ExprVal;
6170 } else if (ID == ".amdhsa_named_barrier_count") {
6171 if (!isGFX1250())
6172 return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
6173 NamedBarCnt = ExprVal;
6174 } else if (ID == ".amdhsa_reserve_vcc") {
6175 if (EvaluatableExpr && !isUInt<1>(Val))
6176 return OutOfRangeError(ValRange);
6177 ReserveVCC = ExprVal;
6178 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6179 if (IVersion.Major < 7)
6180 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6182 return Error(IDRange.Start,
6183 "directive is not supported with architected flat scratch",
6184 IDRange);
6185 if (EvaluatableExpr && !isUInt<1>(Val))
6186 return OutOfRangeError(ValRange);
6187 ReserveFlatScr = ExprVal;
6188 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6189 if (IVersion.Major < 8)
6190 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6191 if (!isUInt<1>(Val))
6192 return OutOfRangeError(ValRange);
6193 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6194 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6195 IDRange);
6196 } else if (ID == ".amdhsa_float_round_mode_32") {
6198 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6199 ValRange);
6200 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6202 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6203 ValRange);
6204 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6206 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6207 ValRange);
6208 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6210 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6211 ValRange);
6212 } else if (ID == ".amdhsa_dx10_clamp") {
6213 if (IVersion.Major >= 12)
6214 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6216 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6217 ValRange);
6218 } else if (ID == ".amdhsa_ieee_mode") {
6219 if (IVersion.Major >= 12)
6220 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
6222 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6223 ValRange);
6224 } else if (ID == ".amdhsa_fp16_overflow") {
6225 if (IVersion.Major < 9)
6226 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6228 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6229 ValRange);
6230 } else if (ID == ".amdhsa_tg_split") {
6231 if (!isGFX90A())
6232 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6233 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6234 ExprVal, ValRange);
6235 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6236 if (!supportsWGP(getSTI()))
6237 return Error(IDRange.Start,
6238 "directive unsupported on " + getSTI().getCPU(), IDRange);
6240 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6241 ValRange);
6242 } else if (ID == ".amdhsa_memory_ordered") {
6243 if (IVersion.Major < 10)
6244 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6246 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6247 ValRange);
6248 } else if (ID == ".amdhsa_forward_progress") {
6249 if (IVersion.Major < 10)
6250 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6252 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6253 ValRange);
6254 } else if (ID == ".amdhsa_shared_vgpr_count") {
6255 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6256 if (IVersion.Major < 10 || IVersion.Major >= 12)
6257 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6258 IDRange);
6259 SharedVGPRCount = Val;
6261 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6262 ValRange);
6263 } else if (ID == ".amdhsa_inst_pref_size") {
6264 if (IVersion.Major < 11)
6265 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6266 if (IVersion.Major == 11) {
6268 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6269 ValRange);
6270 } else {
6272 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6273 ValRange);
6274 }
6275 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6278 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6279 ExprVal, ValRange);
6280 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6282 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6283 ExprVal, ValRange);
6284 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6287 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6288 ExprVal, ValRange);
6289 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6291 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6292 ExprVal, ValRange);
6293 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6295 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6296 ExprVal, ValRange);
6297 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6299 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6300 ExprVal, ValRange);
6301 } else if (ID == ".amdhsa_exception_int_div_zero") {
6303 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6304 ExprVal, ValRange);
6305 } else if (ID == ".amdhsa_round_robin_scheduling") {
6306 if (IVersion.Major < 12)
6307 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6309 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6310 ValRange);
6311 } else {
6312 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6313 }
6314
6315#undef PARSE_BITS_ENTRY
6316 }
6317
6318 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6319 return TokError(".amdhsa_next_free_vgpr directive is required");
6320
6321 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6322 return TokError(".amdhsa_next_free_sgpr directive is required");
6323
6324 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6325
6326 // Consider the case where the total number of UserSGPRs with trailing
6327 // allocated preload SGPRs, is greater than the number of explicitly
6328 // referenced SGPRs.
6329 if (PreloadLength) {
6330 MCContext &Ctx = getContext();
6331 NextFreeSGPR = AMDGPUMCExpr::createMax(
6332 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6333 }
6334
6335 const MCExpr *VGPRBlocks;
6336 const MCExpr *SGPRBlocks;
6337 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6338 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6339 EnableWavefrontSize32, NextFreeVGPR,
6340 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6341 SGPRBlocks))
6342 return true;
6343
6344 int64_t EvaluatedVGPRBlocks;
6345 bool VGPRBlocksEvaluatable =
6346 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6347 if (VGPRBlocksEvaluatable &&
6349 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6350 return OutOfRangeError(VGPRRange);
6351 }
6353 KD.compute_pgm_rsrc1, VGPRBlocks,
6354 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6355 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6356
6357 int64_t EvaluatedSGPRBlocks;
6358 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6360 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6361 return OutOfRangeError(SGPRRange);
6363 KD.compute_pgm_rsrc1, SGPRBlocks,
6364 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6365 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6366
6367 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6368 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
6369 "enabled user SGPRs");
6370
6371 if (isGFX1250()) {
6373 return TokError("too many user SGPRs enabled");
6376 MCConstantExpr::create(UserSGPRCount, getContext()),
6377 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6378 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6379 } else {
6381 UserSGPRCount))
6382 return TokError("too many user SGPRs enabled");
6385 MCConstantExpr::create(UserSGPRCount, getContext()),
6386 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6387 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6388 }
6389
6390 int64_t IVal = 0;
6391 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6392 return TokError("Kernarg size should be resolvable");
6393 uint64_t kernarg_size = IVal;
6394 if (PreloadLength && kernarg_size &&
6395 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6396 return TokError("Kernarg preload length + offset is larger than the "
6397 "kernarg segment size");
6398
6399 if (isGFX90A()) {
6400 if (!Seen.contains(".amdhsa_accum_offset"))
6401 return TokError(".amdhsa_accum_offset directive is required");
6402 int64_t EvaluatedAccum;
6403 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6404 uint64_t UEvaluatedAccum = EvaluatedAccum;
6405 if (AccumEvaluatable &&
6406 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6407 return TokError("accum_offset should be in range [4..256] in "
6408 "increments of 4");
6409
6410 int64_t EvaluatedNumVGPR;
6411 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6412 AccumEvaluatable &&
6413 UEvaluatedAccum >
6414 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6415 return TokError("accum_offset exceeds total VGPR allocation");
6416 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6418 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6421 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6422 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6423 getContext());
6424 }
6425
6426 if (isGFX1250())
6428 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6429 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6430 getContext());
6431
6432 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6433 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6434 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6435 return TokError("shared_vgpr_count directive not valid on "
6436 "wavefront size 32");
6437 }
6438
6439 if (VGPRBlocksEvaluatable &&
6440 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6441 63)) {
6442 return TokError("shared_vgpr_count*2 + "
6443 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6444 "exceed 63\n");
6445 }
6446 }
6447
6448 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6449 NextFreeVGPR, NextFreeSGPR,
6450 ReserveVCC, ReserveFlatScr);
6451 return false;
6452}
6453
6454bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6455 uint32_t Version;
6456 if (ParseAsAbsoluteExpression(Version))
6457 return true;
6458
6459 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6460 return false;
6461}
6462
6463bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6464 AMDGPUMCKernelCodeT &C) {
6465 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6466 // assembly for backwards compatibility.
6467 if (ID == "max_scratch_backing_memory_byte_size") {
6468 Parser.eatToEndOfStatement();
6469 return false;
6470 }
6471
6472 SmallString<40> ErrStr;
6473 raw_svector_ostream Err(ErrStr);
6474 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6475 return TokError(Err.str());
6476 }
6477 Lex();
6478
6479 if (ID == "enable_wavefront_size32") {
6480 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6481 if (!isGFX10Plus())
6482 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6483 if (!isWave32())
6484 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6485 } else {
6486 if (!isWave64())
6487 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6488 }
6489 }
6490
6491 if (ID == "wavefront_size") {
6492 if (C.wavefront_size == 5) {
6493 if (!isGFX10Plus())
6494 return TokError("wavefront_size=5 is only allowed on GFX10+");
6495 if (!isWave32())
6496 return TokError("wavefront_size=5 requires +WavefrontSize32");
6497 } else if (C.wavefront_size == 6) {
6498 if (!isWave64())
6499 return TokError("wavefront_size=6 requires +WavefrontSize64");
6500 }
6501 }
6502
6503 return false;
6504}
6505
6506bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6507 AMDGPUMCKernelCodeT KernelCode;
6508 KernelCode.initDefault(&getSTI(), getContext());
6509
6510 while (true) {
6511 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6512 // will set the current token to EndOfStatement.
6513 while(trySkipToken(AsmToken::EndOfStatement));
6514
6515 StringRef ID;
6516 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6517 return true;
6518
6519 if (ID == ".end_amd_kernel_code_t")
6520 break;
6521
6522 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6523 return true;
6524 }
6525
6526 KernelCode.validate(&getSTI(), getContext());
6527 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6528
6529 return false;
6530}
6531
6532bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6533 StringRef KernelName;
6534 if (!parseId(KernelName, "expected symbol name"))
6535 return true;
6536
6537 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6539
6540 KernelScope.initialize(getContext());
6541 return false;
6542}
6543
6544bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6545 if (!getSTI().getTargetTriple().isAMDGCN()) {
6546 return Error(getLoc(),
6547 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6548 "architectures");
6549 }
6550
6551 auto TargetIDDirective = getLexer().getTok().getStringContents();
6552 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6553 return Error(getParser().getTok().getLoc(), "target id must match options");
6554
6555 getTargetStreamer().EmitISAVersion();
6556 Lex();
6557
6558 return false;
6559}
6560
6561bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6562 assert(isHsaAbi(getSTI()));
6563
6564 std::string HSAMetadataString;
6565 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6566 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6567 return true;
6568
6569 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6570 return Error(getLoc(), "invalid HSA metadata");
6571
6572 return false;
6573}
6574
6575/// Common code to parse out a block of text (typically YAML) between start and
6576/// end directives.
6577bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6578 const char *AssemblerDirectiveEnd,
6579 std::string &CollectString) {
6580
6581 raw_string_ostream CollectStream(CollectString);
6582
6583 getLexer().setSkipSpace(false);
6584
6585 bool FoundEnd = false;
6586 while (!isToken(AsmToken::Eof)) {
6587 while (isToken(AsmToken::Space)) {
6588 CollectStream << getTokenStr();
6589 Lex();
6590 }
6591
6592 if (trySkipId(AssemblerDirectiveEnd)) {
6593 FoundEnd = true;
6594 break;
6595 }
6596
6597 CollectStream << Parser.parseStringToEndOfStatement()
6598 << getContext().getAsmInfo()->getSeparatorString();
6599
6600 Parser.eatToEndOfStatement();
6601 }
6602
6603 getLexer().setSkipSpace(true);
6604
6605 if (isToken(AsmToken::Eof) && !FoundEnd) {
6606 return TokError(Twine("expected directive ") +
6607 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6608 }
6609
6610 return false;
6611}
6612
6613/// Parse the assembler directive for new MsgPack-format PAL metadata.
6614bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6615 std::string String;
6616 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6618 return true;
6619
6620 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6621 if (!PALMetadata->setFromString(String))
6622 return Error(getLoc(), "invalid PAL metadata");
6623 return false;
6624}
6625
6626/// Parse the assembler directive for old linear-format PAL metadata.
6627bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6628 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6629 return Error(getLoc(),
6630 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6631 "not available on non-amdpal OSes")).str());
6632 }
6633
6634 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6635 PALMetadata->setLegacy();
6636 for (;;) {
6637 uint32_t Key, Value;
6638 if (ParseAsAbsoluteExpression(Key)) {
6639 return TokError(Twine("invalid value in ") +
6641 }
6642 if (!trySkipToken(AsmToken::Comma)) {
6643 return TokError(Twine("expected an even number of values in ") +
6645 }
6646 if (ParseAsAbsoluteExpression(Value)) {
6647 return TokError(Twine("invalid value in ") +
6649 }
6650 PALMetadata->setRegister(Key, Value);
6651 if (!trySkipToken(AsmToken::Comma))
6652 break;
6653 }
6654 return false;
6655}
6656
6657/// ParseDirectiveAMDGPULDS
6658/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6659bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6660 if (getParser().checkForValidSection())
6661 return true;
6662
6663 StringRef Name;
6664 SMLoc NameLoc = getLoc();
6665 if (getParser().parseIdentifier(Name))
6666 return TokError("expected identifier in directive");
6667
6668 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6669 if (getParser().parseComma())
6670 return true;
6671
6672 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6673
6674 int64_t Size;
6675 SMLoc SizeLoc = getLoc();
6676 if (getParser().parseAbsoluteExpression(Size))
6677 return true;
6678 if (Size < 0)
6679 return Error(SizeLoc, "size must be non-negative");
6680 if (Size > LocalMemorySize)
6681 return Error(SizeLoc, "size is too large");
6682
6683 int64_t Alignment = 4;
6684 if (trySkipToken(AsmToken::Comma)) {
6685 SMLoc AlignLoc = getLoc();
6686 if (getParser().parseAbsoluteExpression(Alignment))
6687 return true;
6688 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6689 return Error(AlignLoc, "alignment must be a power of two");
6690
6691 // Alignment larger than the size of LDS is possible in theory, as long
6692 // as the linker manages to place to symbol at address 0, but we do want
6693 // to make sure the alignment fits nicely into a 32-bit integer.
6694 if (Alignment >= 1u << 31)
6695 return Error(AlignLoc, "alignment is too large");
6696 }
6697
6698 if (parseEOL())
6699 return true;
6700
6701 Symbol->redefineIfPossible();
6702 if (!Symbol->isUndefined())
6703 return Error(NameLoc, "invalid symbol redefinition");
6704
6705 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6706 return false;
6707}
6708
6709bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6710 StringRef IDVal = DirectiveID.getString();
6711
6712 if (isHsaAbi(getSTI())) {
6713 if (IDVal == ".amdhsa_kernel")
6714 return ParseDirectiveAMDHSAKernel();
6715
6716 if (IDVal == ".amdhsa_code_object_version")
6717 return ParseDirectiveAMDHSACodeObjectVersion();
6718
6719 // TODO: Restructure/combine with PAL metadata directive.
6721 return ParseDirectiveHSAMetadata();
6722 } else {
6723 if (IDVal == ".amd_kernel_code_t")
6724 return ParseDirectiveAMDKernelCodeT();
6725
6726 if (IDVal == ".amdgpu_hsa_kernel")
6727 return ParseDirectiveAMDGPUHsaKernel();
6728
6729 if (IDVal == ".amd_amdgpu_isa")
6730 return ParseDirectiveISAVersion();
6731
6733 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6734 Twine(" directive is "
6735 "not available on non-amdhsa OSes"))
6736 .str());
6737 }
6738 }
6739
6740 if (IDVal == ".amdgcn_target")
6741 return ParseDirectiveAMDGCNTarget();
6742
6743 if (IDVal == ".amdgpu_lds")
6744 return ParseDirectiveAMDGPULDS();
6745
6746 if (IDVal == PALMD::AssemblerDirectiveBegin)
6747 return ParseDirectivePALMetadataBegin();
6748
6749 if (IDVal == PALMD::AssemblerDirective)
6750 return ParseDirectivePALMetadata();
6751
6752 return true;
6753}
6754
6755bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6756 MCRegister Reg) {
6757 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6758 return isGFX9Plus();
6759
6760 // GFX10+ has 2 more SGPRs 104 and 105.
6761 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6762 return hasSGPR104_SGPR105();
6763
6764 switch (Reg.id()) {
6765 case SRC_SHARED_BASE_LO:
6766 case SRC_SHARED_BASE:
6767 case SRC_SHARED_LIMIT_LO:
6768 case SRC_SHARED_LIMIT:
6769 case SRC_PRIVATE_BASE_LO:
6770 case SRC_PRIVATE_BASE:
6771 case SRC_PRIVATE_LIMIT_LO:
6772 case SRC_PRIVATE_LIMIT:
6773 return isGFX9Plus();
6774 case SRC_FLAT_SCRATCH_BASE_LO:
6775 case SRC_FLAT_SCRATCH_BASE_HI:
6776 return hasGloballyAddressableScratch();
6777 case SRC_POPS_EXITING_WAVE_ID:
6778 return isGFX9Plus() && !isGFX11Plus();
6779 case TBA:
6780 case TBA_LO:
6781 case TBA_HI:
6782 case TMA:
6783 case TMA_LO:
6784 case TMA_HI:
6785 return !isGFX9Plus();
6786 case XNACK_MASK:
6787 case XNACK_MASK_LO:
6788 case XNACK_MASK_HI:
6789 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6790 case SGPR_NULL:
6791 return isGFX10Plus();
6792 case SRC_EXECZ:
6793 case SRC_VCCZ:
6794 return !isGFX11Plus();
6795 default:
6796 break;
6797 }
6798
6799 if (isCI())
6800 return true;
6801
6802 if (isSI() || isGFX10Plus()) {
6803 // No flat_scr on SI.
6804 // On GFX10Plus flat scratch is not a valid register operand and can only be
6805 // accessed with s_setreg/s_getreg.
6806 switch (Reg.id()) {
6807 case FLAT_SCR:
6808 case FLAT_SCR_LO:
6809 case FLAT_SCR_HI:
6810 return false;
6811 default:
6812 return true;
6813 }
6814 }
6815
6816 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6817 // SI/CI have.
6818 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
6819 return hasSGPR102_SGPR103();
6820
6821 return true;
6822}
6823
6824ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6825 StringRef Mnemonic,
6826 OperandMode Mode) {
6827 ParseStatus Res = parseVOPD(Operands);
6828 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6829 return Res;
6830
6831 // Try to parse with a custom parser
6832 Res = MatchOperandParserImpl(Operands, Mnemonic);
6833
6834 // If we successfully parsed the operand or if there as an error parsing,
6835 // we are done.
6836 //
6837 // If we are parsing after we reach EndOfStatement then this means we
6838 // are appending default values to the Operands list. This is only done
6839 // by custom parser, so we shouldn't continue on to the generic parsing.
6840 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6841 return Res;
6842
6843 SMLoc RBraceLoc;
6844 SMLoc LBraceLoc = getLoc();
6845 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6846 unsigned Prefix = Operands.size();
6847
6848 for (;;) {
6849 auto Loc = getLoc();
6850 Res = parseReg(Operands);
6851 if (Res.isNoMatch())
6852 Error(Loc, "expected a register");
6853 if (!Res.isSuccess())
6854 return ParseStatus::Failure;
6855
6856 RBraceLoc = getLoc();
6857 if (trySkipToken(AsmToken::RBrac))
6858 break;
6859
6860 if (!skipToken(AsmToken::Comma,
6861 "expected a comma or a closing square bracket"))
6862 return ParseStatus::Failure;
6863 }
6864
6865 if (Operands.size() - Prefix > 1) {
6866 Operands.insert(Operands.begin() + Prefix,
6867 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6868 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6869 }
6870
6871 return ParseStatus::Success;
6872 }
6873
6874 return parseRegOrImm(Operands);
6875}
6876
6877StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6878 // Clear any forced encodings from the previous instruction.
6879 setForcedEncodingSize(0);
6880 setForcedDPP(false);
6881 setForcedSDWA(false);
6882
6883 if (Name.consume_back("_e64_dpp")) {
6884 setForcedDPP(true);
6885 setForcedEncodingSize(64);
6886 return Name;
6887 }
6888 if (Name.consume_back("_e64")) {
6889 setForcedEncodingSize(64);
6890 return Name;
6891 }
6892 if (Name.consume_back("_e32")) {
6893 setForcedEncodingSize(32);
6894 return Name;
6895 }
6896 if (Name.consume_back("_dpp")) {
6897 setForcedDPP(true);
6898 return Name;
6899 }
6900 if (Name.consume_back("_sdwa")) {
6901 setForcedSDWA(true);
6902 return Name;
6903 }
6904 return Name;
6905}
6906
6907static void applyMnemonicAliases(StringRef &Mnemonic,
6908 const FeatureBitset &Features,
6909 unsigned VariantID);
6910
6911bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6912 StringRef Name, SMLoc NameLoc,
6914 // Add the instruction mnemonic
6915 Name = parseMnemonicSuffix(Name);
6916
6917 // If the target architecture uses MnemonicAlias, call it here to parse
6918 // operands correctly.
6919 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6920
6921 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6922
6923 bool IsMIMG = Name.starts_with("image_");
6924
6925 while (!trySkipToken(AsmToken::EndOfStatement)) {
6926 OperandMode Mode = OperandMode_Default;
6927 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6928 Mode = OperandMode_NSA;
6929 ParseStatus Res = parseOperand(Operands, Name, Mode);
6930
6931 if (!Res.isSuccess()) {
6932 checkUnsupportedInstruction(Name, NameLoc);
6933 if (!Parser.hasPendingError()) {
6934 // FIXME: use real operand location rather than the current location.
6935 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6936 : "not a valid operand.";
6937 Error(getLoc(), Msg);
6938 }
6939 while (!trySkipToken(AsmToken::EndOfStatement)) {
6940 lex();
6941 }
6942 return true;
6943 }
6944
6945 // Eat the comma or space if there is one.
6946 trySkipToken(AsmToken::Comma);
6947 }
6948
6949 return false;
6950}
6951
6952//===----------------------------------------------------------------------===//
6953// Utility functions
6954//===----------------------------------------------------------------------===//
6955
6956ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6958 SMLoc S = getLoc();
6959 if (!trySkipId(Name))
6960 return ParseStatus::NoMatch;
6961
6962 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6963 return ParseStatus::Success;
6964}
6965
6966ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6967 int64_t &IntVal) {
6968
6969 if (!trySkipId(Prefix, AsmToken::Colon))
6970 return ParseStatus::NoMatch;
6971
6973}
6974
6975ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6976 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6977 std::function<bool(int64_t &)> ConvertResult) {
6978 SMLoc S = getLoc();
6979 int64_t Value = 0;
6980
6981 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6982 if (!Res.isSuccess())
6983 return Res;
6984
6985 if (ConvertResult && !ConvertResult(Value)) {
6986 Error(S, "invalid " + StringRef(Prefix) + " value.");
6987 }
6988
6989 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6990 return ParseStatus::Success;
6991}
6992
6993ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6994 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6995 bool (*ConvertResult)(int64_t &)) {
6996 SMLoc S = getLoc();
6997 if (!trySkipId(Prefix, AsmToken::Colon))
6998 return ParseStatus::NoMatch;
6999
7000 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
7001 return ParseStatus::Failure;
7002
7003 unsigned Val = 0;
7004 const unsigned MaxSize = 4;
7005
7006 // FIXME: How to verify the number of elements matches the number of src
7007 // operands?
7008 for (int I = 0; ; ++I) {
7009 int64_t Op;
7010 SMLoc Loc = getLoc();
7011 if (!parseExpr(Op))
7012 return ParseStatus::Failure;
7013
7014 if (Op != 0 && Op != 1)
7015 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
7016
7017 Val |= (Op << I);
7018
7019 if (trySkipToken(AsmToken::RBrac))
7020 break;
7021
7022 if (I + 1 == MaxSize)
7023 return Error(getLoc(), "expected a closing square bracket");
7024
7025 if (!skipToken(AsmToken::Comma, "expected a comma"))
7026 return ParseStatus::Failure;
7027 }
7028
7029 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
7030 return ParseStatus::Success;
7031}
7032
7033ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7035 AMDGPUOperand::ImmTy ImmTy) {
7036 int64_t Bit;
7037 SMLoc S = getLoc();
7038
7039 if (trySkipId(Name)) {
7040 Bit = 1;
7041 } else if (trySkipId("no", Name)) {
7042 Bit = 0;
7043 } else {
7044 return ParseStatus::NoMatch;
7045 }
7046
7047 if (Name == "r128" && !hasMIMG_R128())
7048 return Error(S, "r128 modifier is not supported on this GPU");
7049 if (Name == "a16" && !hasA16())
7050 return Error(S, "a16 modifier is not supported on this GPU");
7051
7052 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7053 ImmTy = AMDGPUOperand::ImmTyR128A16;
7054
7055 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
7056 return ParseStatus::Success;
7057}
7058
7059unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7060 bool &Disabling) const {
7061 Disabling = Id.consume_front("no");
7062
7063 if (isGFX940() && !Mnemo.starts_with("s_")) {
7064 return StringSwitch<unsigned>(Id)
7065 .Case("nt", AMDGPU::CPol::NT)
7066 .Case("sc0", AMDGPU::CPol::SC0)
7067 .Case("sc1", AMDGPU::CPol::SC1)
7068 .Default(0);
7069 }
7070
7071 return StringSwitch<unsigned>(Id)
7072 .Case("dlc", AMDGPU::CPol::DLC)
7073 .Case("glc", AMDGPU::CPol::GLC)
7074 .Case("scc", AMDGPU::CPol::SCC)
7075 .Case("slc", AMDGPU::CPol::SLC)
7076 .Default(0);
7077}
7078
7079ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7080 if (isGFX12Plus()) {
7081 SMLoc StringLoc = getLoc();
7082
7083 int64_t CPolVal = 0;
7084 ParseStatus ResTH = ParseStatus::NoMatch;
7085 ParseStatus ResScope = ParseStatus::NoMatch;
7086 ParseStatus ResNV = ParseStatus::NoMatch;
7087 ParseStatus ResScal = ParseStatus::NoMatch;
7088
7089 for (;;) {
7090 if (ResTH.isNoMatch()) {
7091 int64_t TH;
7092 ResTH = parseTH(Operands, TH);
7093 if (ResTH.isFailure())
7094 return ResTH;
7095 if (ResTH.isSuccess()) {
7096 CPolVal |= TH;
7097 continue;
7098 }
7099 }
7100
7101 if (ResScope.isNoMatch()) {
7102 int64_t Scope;
7103 ResScope = parseScope(Operands, Scope);
7104 if (ResScope.isFailure())
7105 return ResScope;
7106 if (ResScope.isSuccess()) {
7107 CPolVal |= Scope;
7108 continue;
7109 }
7110 }
7111
7112 // NV bit exists on GFX12+, but does something starting from GFX1250.
7113 // Allow parsing on all GFX12 and fail on validation for better
7114 // diagnostics.
7115 if (ResNV.isNoMatch()) {
7116 if (trySkipId("nv")) {
7117 ResNV = ParseStatus::Success;
7118 CPolVal |= CPol::NV;
7119 continue;
7120 } else if (trySkipId("no", "nv")) {
7121 ResNV = ParseStatus::Success;
7122 continue;
7123 }
7124 }
7125
7126 if (ResScal.isNoMatch()) {
7127 if (trySkipId("scale_offset")) {
7128 ResScal = ParseStatus::Success;
7129 CPolVal |= CPol::SCAL;
7130 continue;
7131 } else if (trySkipId("no", "scale_offset")) {
7132 ResScal = ParseStatus::Success;
7133 continue;
7134 }
7135 }
7136
7137 break;
7138 }
7139
7140 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7141 ResScal.isNoMatch())
7142 return ParseStatus::NoMatch;
7143
7144 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
7145 AMDGPUOperand::ImmTyCPol));
7146 return ParseStatus::Success;
7147 }
7148
7149 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7150 SMLoc OpLoc = getLoc();
7151 unsigned Enabled = 0, Seen = 0;
7152 for (;;) {
7153 SMLoc S = getLoc();
7154 bool Disabling;
7155 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7156 if (!CPol)
7157 break;
7158
7159 lex();
7160
7161 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7162 return Error(S, "dlc modifier is not supported on this GPU");
7163
7164 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7165 return Error(S, "scc modifier is not supported on this GPU");
7166
7167 if (Seen & CPol)
7168 return Error(S, "duplicate cache policy modifier");
7169
7170 if (!Disabling)
7171 Enabled |= CPol;
7172
7173 Seen |= CPol;
7174 }
7175
7176 if (!Seen)
7177 return ParseStatus::NoMatch;
7178
7179 Operands.push_back(
7180 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7181 return ParseStatus::Success;
7182}
7183
7184ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7185 int64_t &Scope) {
7186 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7188
7189 ParseStatus Res = parseStringOrIntWithPrefix(
7190 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7191 Scope);
7192
7193 if (Res.isSuccess())
7194 Scope = Scopes[Scope];
7195
7196 return Res;
7197}
7198
7199ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7200 TH = AMDGPU::CPol::TH_RT; // default
7201
7202 StringRef Value;
7203 SMLoc StringLoc;
7204 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7205 if (!Res.isSuccess())
7206 return Res;
7207
7208 if (Value == "TH_DEFAULT")
7210 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7211 Value == "TH_LOAD_NT_WB") {
7212 return Error(StringLoc, "invalid th value");
7213 } else if (Value.consume_front("TH_ATOMIC_")) {
7215 } else if (Value.consume_front("TH_LOAD_")) {
7217 } else if (Value.consume_front("TH_STORE_")) {
7219 } else {
7220 return Error(StringLoc, "invalid th value");
7221 }
7222
7223 if (Value == "BYPASS")
7225
7226 if (TH != 0) {
7228 TH |= StringSwitch<int64_t>(Value)
7229 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7230 .Case("RT", AMDGPU::CPol::TH_RT)
7231 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7232 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7233 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7235 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7236 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7238 .Default(0xffffffff);
7239 else
7240 TH |= StringSwitch<int64_t>(Value)
7241 .Case("RT", AMDGPU::CPol::TH_RT)
7242 .Case("NT", AMDGPU::CPol::TH_NT)
7243 .Case("HT", AMDGPU::CPol::TH_HT)
7244 .Case("LU", AMDGPU::CPol::TH_LU)
7245 .Case("WB", AMDGPU::CPol::TH_WB)
7246 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7247 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7248 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7249 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7250 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7251 .Default(0xffffffff);
7252 }
7253
7254 if (TH == 0xffffffff)
7255 return Error(StringLoc, "invalid th value");
7256
7257 return ParseStatus::Success;
7258}
7259
7260static void
7262 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7263 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7264 std::optional<unsigned> InsertAt = std::nullopt) {
7265 auto i = OptionalIdx.find(ImmT);
7266 if (i != OptionalIdx.end()) {
7267 unsigned Idx = i->second;
7268 const AMDGPUOperand &Op =
7269 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7270 if (InsertAt)
7271 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7272 else
7273 Op.addImmOperands(Inst, 1);
7274 } else {
7275 if (InsertAt.has_value())
7276 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7277 else
7279 }
7280}
7281
7282ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7283 StringRef &Value,
7284 SMLoc &StringLoc) {
7285 if (!trySkipId(Prefix, AsmToken::Colon))
7286 return ParseStatus::NoMatch;
7287
7288 StringLoc = getLoc();
7289 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7291}
7292
7293ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7294 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7295 int64_t &IntVal) {
7296 if (!trySkipId(Name, AsmToken::Colon))
7297 return ParseStatus::NoMatch;
7298
7299 SMLoc StringLoc = getLoc();
7300
7301 StringRef Value;
7302 if (isToken(AsmToken::Identifier)) {
7303 Value = getTokenStr();
7304 lex();
7305
7306 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7307 if (Value == Ids[IntVal])
7308 break;
7309 } else if (!parseExpr(IntVal))
7310 return ParseStatus::Failure;
7311
7312 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7313 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7314
7315 return ParseStatus::Success;
7316}
7317
7318ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7319 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7320 AMDGPUOperand::ImmTy Type) {
7321 SMLoc S = getLoc();
7322 int64_t IntVal;
7323
7324 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7325 if (Res.isSuccess())
7326 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7327
7328 return Res;
7329}
7330
7331//===----------------------------------------------------------------------===//
7332// MTBUF format
7333//===----------------------------------------------------------------------===//
7334
7335bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7336 int64_t MaxVal,
7337 int64_t &Fmt) {
7338 int64_t Val;
7339 SMLoc Loc = getLoc();
7340
7341 auto Res = parseIntWithPrefix(Pref, Val);
7342 if (Res.isFailure())
7343 return false;
7344 if (Res.isNoMatch())
7345 return true;
7346
7347 if (Val < 0 || Val > MaxVal) {
7348 Error(Loc, Twine("out of range ", StringRef(Pref)));
7349 return false;
7350 }
7351
7352 Fmt = Val;
7353 return true;
7354}
7355
7356ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7357 AMDGPUOperand::ImmTy ImmTy) {
7358 const char *Pref = "index_key";
7359 int64_t ImmVal = 0;
7360 SMLoc Loc = getLoc();
7361 auto Res = parseIntWithPrefix(Pref, ImmVal);
7362 if (!Res.isSuccess())
7363 return Res;
7364
7365 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7366 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7367 (ImmVal < 0 || ImmVal > 1))
7368 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7369
7370 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7371 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7372
7373 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7374 return ParseStatus::Success;
7375}
7376
7377ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7378 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7379}
7380
7381ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7382 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7383}
7384
7385ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7386 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7387}
7388
7389ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7390 StringRef Name,
7391 AMDGPUOperand::ImmTy Type) {
7392 return parseStringOrIntWithPrefix(Operands, Name,
7393 {"MATRIX_FMT_FP8", "MATRIX_FMT_BF8",
7394 "MATRIX_FMT_FP6", "MATRIX_FMT_BF6",
7395 "MATRIX_FMT_FP4"},
7396 Type);
7397}
7398
7399ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7400 return tryParseMatrixFMT(Operands, "matrix_a_fmt",
7401 AMDGPUOperand::ImmTyMatrixAFMT);
7402}
7403
7404ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7405 return tryParseMatrixFMT(Operands, "matrix_b_fmt",
7406 AMDGPUOperand::ImmTyMatrixBFMT);
7407}
7408
7409ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7410 StringRef Name,
7411 AMDGPUOperand::ImmTy Type) {
7412 return parseStringOrIntWithPrefix(
7413 Operands, Name, {"MATRIX_SCALE_ROW0", "MATRIX_SCALE_ROW1"}, Type);
7414}
7415
7416ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7417 return tryParseMatrixScale(Operands, "matrix_a_scale",
7418 AMDGPUOperand::ImmTyMatrixAScale);
7419}
7420
7421ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7422 return tryParseMatrixScale(Operands, "matrix_b_scale",
7423 AMDGPUOperand::ImmTyMatrixBScale);
7424}
7425
7426ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7427 StringRef Name,
7428 AMDGPUOperand::ImmTy Type) {
7429 return parseStringOrIntWithPrefix(
7430 Operands, Name,
7431 {"MATRIX_SCALE_FMT_E8", "MATRIX_SCALE_FMT_E5M3", "MATRIX_SCALE_FMT_E4M3"},
7432 Type);
7433}
7434
7435ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7436 return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
7437 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7438}
7439
7440ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7441 return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
7442 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7443}
7444
7445// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7446// values to live in a joint format operand in the MCInst encoding.
7447ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7448 using namespace llvm::AMDGPU::MTBUFFormat;
7449
7450 int64_t Dfmt = DFMT_UNDEF;
7451 int64_t Nfmt = NFMT_UNDEF;
7452
7453 // dfmt and nfmt can appear in either order, and each is optional.
7454 for (int I = 0; I < 2; ++I) {
7455 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7456 return ParseStatus::Failure;
7457
7458 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7459 return ParseStatus::Failure;
7460
7461 // Skip optional comma between dfmt/nfmt
7462 // but guard against 2 commas following each other.
7463 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7464 !peekToken().is(AsmToken::Comma)) {
7465 trySkipToken(AsmToken::Comma);
7466 }
7467 }
7468
7469 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7470 return ParseStatus::NoMatch;
7471
7472 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7473 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7474
7475 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7476 return ParseStatus::Success;
7477}
7478
7479ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7480 using namespace llvm::AMDGPU::MTBUFFormat;
7481
7482 int64_t Fmt = UFMT_UNDEF;
7483
7484 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7485 return ParseStatus::Failure;
7486
7487 if (Fmt == UFMT_UNDEF)
7488 return ParseStatus::NoMatch;
7489
7490 Format = Fmt;
7491 return ParseStatus::Success;
7492}
7493
7494bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7495 int64_t &Nfmt,
7496 StringRef FormatStr,
7497 SMLoc Loc) {
7498 using namespace llvm::AMDGPU::MTBUFFormat;
7499 int64_t Format;
7500
7501 Format = getDfmt(FormatStr);
7502 if (Format != DFMT_UNDEF) {
7503 Dfmt = Format;
7504 return true;
7505 }
7506
7507 Format = getNfmt(FormatStr, getSTI());
7508 if (Format != NFMT_UNDEF) {
7509 Nfmt = Format;
7510 return true;
7511 }
7512
7513 Error(Loc, "unsupported format");
7514 return false;
7515}
7516
7517ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7518 SMLoc FormatLoc,
7519 int64_t &Format) {
7520 using namespace llvm::AMDGPU::MTBUFFormat;
7521
7522 int64_t Dfmt = DFMT_UNDEF;
7523 int64_t Nfmt = NFMT_UNDEF;
7524 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7525 return ParseStatus::Failure;
7526
7527 if (trySkipToken(AsmToken::Comma)) {
7528 StringRef Str;
7529 SMLoc Loc = getLoc();
7530 if (!parseId(Str, "expected a format string") ||
7531 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7532 return ParseStatus::Failure;
7533 if (Dfmt == DFMT_UNDEF)
7534 return Error(Loc, "duplicate numeric format");
7535 if (Nfmt == NFMT_UNDEF)
7536 return Error(Loc, "duplicate data format");
7537 }
7538
7539 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7540 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7541
7542 if (isGFX10Plus()) {
7543 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7544 if (Ufmt == UFMT_UNDEF)
7545 return Error(FormatLoc, "unsupported format");
7546 Format = Ufmt;
7547 } else {
7548 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7549 }
7550
7551 return ParseStatus::Success;
7552}
7553
7554ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7555 SMLoc Loc,
7556 int64_t &Format) {
7557 using namespace llvm::AMDGPU::MTBUFFormat;
7558
7559 auto Id = getUnifiedFormat(FormatStr, getSTI());
7560 if (Id == UFMT_UNDEF)
7561 return ParseStatus::NoMatch;
7562
7563 if (!isGFX10Plus())
7564 return Error(Loc, "unified format is not supported on this GPU");
7565
7566 Format = Id;
7567 return ParseStatus::Success;
7568}
7569
7570ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7571 using namespace llvm::AMDGPU::MTBUFFormat;
7572 SMLoc Loc = getLoc();
7573
7574 if (!parseExpr(Format))
7575 return ParseStatus::Failure;
7576 if (!isValidFormatEncoding(Format, getSTI()))
7577 return Error(Loc, "out of range format");
7578
7579 return ParseStatus::Success;
7580}
7581
7582ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7583 using namespace llvm::AMDGPU::MTBUFFormat;
7584
7585 if (!trySkipId("format", AsmToken::Colon))
7586 return ParseStatus::NoMatch;
7587
7588 if (trySkipToken(AsmToken::LBrac)) {
7589 StringRef FormatStr;
7590 SMLoc Loc = getLoc();
7591 if (!parseId(FormatStr, "expected a format string"))
7592 return ParseStatus::Failure;
7593
7594 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7595 if (Res.isNoMatch())
7596 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7597 if (!Res.isSuccess())
7598 return Res;
7599
7600 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7601 return ParseStatus::Failure;
7602
7603 return ParseStatus::Success;
7604 }
7605
7606 return parseNumericFormat(Format);
7607}
7608
7609ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7610 using namespace llvm::AMDGPU::MTBUFFormat;
7611
7612 int64_t Format = getDefaultFormatEncoding(getSTI());
7613 ParseStatus Res;
7614 SMLoc Loc = getLoc();
7615
7616 // Parse legacy format syntax.
7617 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7618 if (Res.isFailure())
7619 return Res;
7620
7621 bool FormatFound = Res.isSuccess();
7622
7623 Operands.push_back(
7624 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7625
7626 if (FormatFound)
7627 trySkipToken(AsmToken::Comma);
7628
7629 if (isToken(AsmToken::EndOfStatement)) {
7630 // We are expecting an soffset operand,
7631 // but let matcher handle the error.
7632 return ParseStatus::Success;
7633 }
7634
7635 // Parse soffset.
7636 Res = parseRegOrImm(Operands);
7637 if (!Res.isSuccess())
7638 return Res;
7639
7640 trySkipToken(AsmToken::Comma);
7641
7642 if (!FormatFound) {
7643 Res = parseSymbolicOrNumericFormat(Format);
7644 if (Res.isFailure())
7645 return Res;
7646 if (Res.isSuccess()) {
7647 auto Size = Operands.size();
7648 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7649 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7650 Op.setImm(Format);
7651 }
7652 return ParseStatus::Success;
7653 }
7654
7655 if (isId("format") && peekToken().is(AsmToken::Colon))
7656 return Error(getLoc(), "duplicate format");
7657 return ParseStatus::Success;
7658}
7659
7660ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7661 ParseStatus Res =
7662 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7663 if (Res.isNoMatch()) {
7664 Res = parseIntWithPrefix("inst_offset", Operands,
7665 AMDGPUOperand::ImmTyInstOffset);
7666 }
7667 return Res;
7668}
7669
7670ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7671 ParseStatus Res =
7672 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7673 if (Res.isNoMatch())
7674 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7675 return Res;
7676}
7677
7678ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7679 ParseStatus Res =
7680 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7681 if (Res.isNoMatch()) {
7682 Res =
7683 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7684 }
7685 return Res;
7686}
7687
7688//===----------------------------------------------------------------------===//
7689// Exp
7690//===----------------------------------------------------------------------===//
7691
7692void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7693 OptionalImmIndexMap OptionalIdx;
7694
7695 unsigned OperandIdx[4];
7696 unsigned EnMask = 0;
7697 int SrcIdx = 0;
7698
7699 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7700 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7701
7702 // Add the register arguments
7703 if (Op.isReg()) {
7704 assert(SrcIdx < 4);
7705 OperandIdx[SrcIdx] = Inst.size();
7706 Op.addRegOperands(Inst, 1);
7707 ++SrcIdx;
7708 continue;
7709 }
7710
7711 if (Op.isOff()) {
7712 assert(SrcIdx < 4);
7713 OperandIdx[SrcIdx] = Inst.size();
7714 Inst.addOperand(MCOperand::createReg(MCRegister()));
7715 ++SrcIdx;
7716 continue;
7717 }
7718
7719 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7720 Op.addImmOperands(Inst, 1);
7721 continue;
7722 }
7723
7724 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7725 continue;
7726
7727 // Handle optional arguments
7728 OptionalIdx[Op.getImmTy()] = i;
7729 }
7730
7731 assert(SrcIdx == 4);
7732
7733 bool Compr = false;
7734 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7735 Compr = true;
7736 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7737 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7738 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7739 }
7740
7741 for (auto i = 0; i < SrcIdx; ++i) {
7742 if (Inst.getOperand(OperandIdx[i]).getReg()) {
7743 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7744 }
7745 }
7746
7747 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7748 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7749
7750 Inst.addOperand(MCOperand::createImm(EnMask));
7751}
7752
7753//===----------------------------------------------------------------------===//
7754// s_waitcnt
7755//===----------------------------------------------------------------------===//
7756
7757static bool
7759 const AMDGPU::IsaVersion ISA,
7760 int64_t &IntVal,
7761 int64_t CntVal,
7762 bool Saturate,
7763 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7764 unsigned (*decode)(const IsaVersion &Version, unsigned))
7765{
7766 bool Failed = false;
7767
7768 IntVal = encode(ISA, IntVal, CntVal);
7769 if (CntVal != decode(ISA, IntVal)) {
7770 if (Saturate) {
7771 IntVal = encode(ISA, IntVal, -1);
7772 } else {
7773 Failed = true;
7774 }
7775 }
7776 return Failed;
7777}
7778
7779bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7780
7781 SMLoc CntLoc = getLoc();
7782 StringRef CntName = getTokenStr();
7783
7784 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7785 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7786 return false;
7787
7788 int64_t CntVal;
7789 SMLoc ValLoc = getLoc();
7790 if (!parseExpr(CntVal))
7791 return false;
7792
7793 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7794
7795 bool Failed = true;
7796 bool Sat = CntName.ends_with("_sat");
7797
7798 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7799 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7800 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7801 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7802 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7803 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7804 } else {
7805 Error(CntLoc, "invalid counter name " + CntName);
7806 return false;
7807 }
7808
7809 if (Failed) {
7810 Error(ValLoc, "too large value for " + CntName);
7811 return false;
7812 }
7813
7814 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7815 return false;
7816
7817 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7818 if (isToken(AsmToken::EndOfStatement)) {
7819 Error(getLoc(), "expected a counter name");
7820 return false;
7821 }
7822 }
7823
7824 return true;
7825}
7826
7827ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7828 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7829 int64_t Waitcnt = getWaitcntBitMask(ISA);
7830 SMLoc S = getLoc();
7831
7832 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7833 while (!isToken(AsmToken::EndOfStatement)) {
7834 if (!parseCnt(Waitcnt))
7835 return ParseStatus::Failure;
7836 }
7837 } else {
7838 if (!parseExpr(Waitcnt))
7839 return ParseStatus::Failure;
7840 }
7841
7842 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7843 return ParseStatus::Success;
7844}
7845
7846bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7847 SMLoc FieldLoc = getLoc();
7848 StringRef FieldName = getTokenStr();
7849 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7850 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7851 return false;
7852
7853 SMLoc ValueLoc = getLoc();
7854 StringRef ValueName = getTokenStr();
7855 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7856 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7857 return false;
7858
7859 unsigned Shift;
7860 if (FieldName == "instid0") {
7861 Shift = 0;
7862 } else if (FieldName == "instskip") {
7863 Shift = 4;
7864 } else if (FieldName == "instid1") {
7865 Shift = 7;
7866 } else {
7867 Error(FieldLoc, "invalid field name " + FieldName);
7868 return false;
7869 }
7870
7871 int Value;
7872 if (Shift == 4) {
7873 // Parse values for instskip.
7874 Value = StringSwitch<int>(ValueName)
7875 .Case("SAME", 0)
7876 .Case("NEXT", 1)
7877 .Case("SKIP_1", 2)
7878 .Case("SKIP_2", 3)
7879 .Case("SKIP_3", 4)
7880 .Case("SKIP_4", 5)
7881 .Default(-1);
7882 } else {
7883 // Parse values for instid0 and instid1.
7884 Value = StringSwitch<int>(ValueName)
7885 .Case("NO_DEP", 0)
7886 .Case("VALU_DEP_1", 1)
7887 .Case("VALU_DEP_2", 2)
7888 .Case("VALU_DEP_3", 3)
7889 .Case("VALU_DEP_4", 4)
7890 .Case("TRANS32_DEP_1", 5)
7891 .Case("TRANS32_DEP_2", 6)
7892 .Case("TRANS32_DEP_3", 7)
7893 .Case("FMA_ACCUM_CYCLE_1", 8)
7894 .Case("SALU_CYCLE_1", 9)
7895 .Case("SALU_CYCLE_2", 10)
7896 .Case("SALU_CYCLE_3", 11)
7897 .Default(-1);
7898 }
7899 if (Value < 0) {
7900 Error(ValueLoc, "invalid value name " + ValueName);
7901 return false;
7902 }
7903
7904 Delay |= Value << Shift;
7905 return true;
7906}
7907
7908ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7909 int64_t Delay = 0;
7910 SMLoc S = getLoc();
7911
7912 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7913 do {
7914 if (!parseDelay(Delay))
7915 return ParseStatus::Failure;
7916 } while (trySkipToken(AsmToken::Pipe));
7917 } else {
7918 if (!parseExpr(Delay))
7919 return ParseStatus::Failure;
7920 }
7921
7922 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7923 return ParseStatus::Success;
7924}
7925
7926bool
7927AMDGPUOperand::isSWaitCnt() const {
7928 return isImm();
7929}
7930
7931bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7932
7933//===----------------------------------------------------------------------===//
7934// DepCtr
7935//===----------------------------------------------------------------------===//
7936
7937void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7938 StringRef DepCtrName) {
7939 switch (ErrorId) {
7940 case OPR_ID_UNKNOWN:
7941 Error(Loc, Twine("invalid counter name ", DepCtrName));
7942 return;
7943 case OPR_ID_UNSUPPORTED:
7944 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7945 return;
7946 case OPR_ID_DUPLICATE:
7947 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7948 return;
7949 case OPR_VAL_INVALID:
7950 Error(Loc, Twine("invalid value for ", DepCtrName));
7951 return;
7952 default:
7953 assert(false);
7954 }
7955}
7956
7957bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7958
7959 using namespace llvm::AMDGPU::DepCtr;
7960
7961 SMLoc DepCtrLoc = getLoc();
7962 StringRef DepCtrName = getTokenStr();
7963
7964 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7965 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7966 return false;
7967
7968 int64_t ExprVal;
7969 if (!parseExpr(ExprVal))
7970 return false;
7971
7972 unsigned PrevOprMask = UsedOprMask;
7973 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7974
7975 if (CntVal < 0) {
7976 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7977 return false;
7978 }
7979
7980 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7981 return false;
7982
7983 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7984 if (isToken(AsmToken::EndOfStatement)) {
7985 Error(getLoc(), "expected a counter name");
7986 return false;
7987 }
7988 }
7989
7990 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7991 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7992 return true;
7993}
7994
7995ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7996 using namespace llvm::AMDGPU::DepCtr;
7997
7998 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7999 SMLoc Loc = getLoc();
8000
8001 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8002 unsigned UsedOprMask = 0;
8003 while (!isToken(AsmToken::EndOfStatement)) {
8004 if (!parseDepCtr(DepCtr, UsedOprMask))
8005 return ParseStatus::Failure;
8006 }
8007 } else {
8008 if (!parseExpr(DepCtr))
8009 return ParseStatus::Failure;
8010 }
8011
8012 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
8013 return ParseStatus::Success;
8014}
8015
8016bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8017
8018//===----------------------------------------------------------------------===//
8019// hwreg
8020//===----------------------------------------------------------------------===//
8021
8022ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8023 OperandInfoTy &Offset,
8024 OperandInfoTy &Width) {
8025 using namespace llvm::AMDGPU::Hwreg;
8026
8027 if (!trySkipId("hwreg", AsmToken::LParen))
8028 return ParseStatus::NoMatch;
8029
8030 // The register may be specified by name or using a numeric code
8031 HwReg.Loc = getLoc();
8032 if (isToken(AsmToken::Identifier) &&
8033 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8034 HwReg.IsSymbolic = true;
8035 lex(); // skip register name
8036 } else if (!parseExpr(HwReg.Val, "a register name")) {
8037 return ParseStatus::Failure;
8038 }
8039
8040 if (trySkipToken(AsmToken::RParen))
8041 return ParseStatus::Success;
8042
8043 // parse optional params
8044 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
8045 return ParseStatus::Failure;
8046
8047 Offset.Loc = getLoc();
8048 if (!parseExpr(Offset.Val))
8049 return ParseStatus::Failure;
8050
8051 if (!skipToken(AsmToken::Comma, "expected a comma"))
8052 return ParseStatus::Failure;
8053
8054 Width.Loc = getLoc();
8055 if (!parseExpr(Width.Val) ||
8056 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8057 return ParseStatus::Failure;
8058
8059 return ParseStatus::Success;
8060}
8061
8062ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8063 using namespace llvm::AMDGPU::Hwreg;
8064
8065 int64_t ImmVal = 0;
8066 SMLoc Loc = getLoc();
8067
8068 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8069 HwregId::Default);
8070 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8071 HwregOffset::Default);
8072 struct : StructuredOpField {
8073 using StructuredOpField::StructuredOpField;
8074 bool validate(AMDGPUAsmParser &Parser) const override {
8075 if (!isUIntN(Width, Val - 1))
8076 return Error(Parser, "only values from 1 to 32 are legal");
8077 return true;
8078 }
8079 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8080 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
8081
8082 if (Res.isNoMatch())
8083 Res = parseHwregFunc(HwReg, Offset, Width);
8084
8085 if (Res.isSuccess()) {
8086 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
8087 return ParseStatus::Failure;
8088 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
8089 }
8090
8091 if (Res.isNoMatch() &&
8092 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
8094
8095 if (!Res.isSuccess())
8096 return ParseStatus::Failure;
8097
8098 if (!isUInt<16>(ImmVal))
8099 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8100 Operands.push_back(
8101 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8102 return ParseStatus::Success;
8103}
8104
8105bool AMDGPUOperand::isHwreg() const {
8106 return isImmTy(ImmTyHwreg);
8107}
8108
8109//===----------------------------------------------------------------------===//
8110// sendmsg
8111//===----------------------------------------------------------------------===//
8112
8113bool
8114AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8115 OperandInfoTy &Op,
8116 OperandInfoTy &Stream) {
8117 using namespace llvm::AMDGPU::SendMsg;
8118
8119 Msg.Loc = getLoc();
8120 if (isToken(AsmToken::Identifier) &&
8121 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8122 Msg.IsSymbolic = true;
8123 lex(); // skip message name
8124 } else if (!parseExpr(Msg.Val, "a message name")) {
8125 return false;
8126 }
8127
8128 if (trySkipToken(AsmToken::Comma)) {
8129 Op.IsDefined = true;
8130 Op.Loc = getLoc();
8131 if (isToken(AsmToken::Identifier) &&
8132 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8134 lex(); // skip operation name
8135 } else if (!parseExpr(Op.Val, "an operation name")) {
8136 return false;
8137 }
8138
8139 if (trySkipToken(AsmToken::Comma)) {
8140 Stream.IsDefined = true;
8141 Stream.Loc = getLoc();
8142 if (!parseExpr(Stream.Val))
8143 return false;
8144 }
8145 }
8146
8147 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
8148}
8149
8150bool
8151AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8152 const OperandInfoTy &Op,
8153 const OperandInfoTy &Stream) {
8154 using namespace llvm::AMDGPU::SendMsg;
8155
8156 // Validation strictness depends on whether message is specified
8157 // in a symbolic or in a numeric form. In the latter case
8158 // only encoding possibility is checked.
8159 bool Strict = Msg.IsSymbolic;
8160
8161 if (Strict) {
8162 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8163 Error(Msg.Loc, "specified message id is not supported on this GPU");
8164 return false;
8165 }
8166 } else {
8167 if (!isValidMsgId(Msg.Val, getSTI())) {
8168 Error(Msg.Loc, "invalid message id");
8169 return false;
8170 }
8171 }
8172 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
8173 if (Op.IsDefined) {
8174 Error(Op.Loc, "message does not support operations");
8175 } else {
8176 Error(Msg.Loc, "missing message operation");
8177 }
8178 return false;
8179 }
8180 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
8181 if (Op.Val == OPR_ID_UNSUPPORTED)
8182 Error(Op.Loc, "specified operation id is not supported on this GPU");
8183 else
8184 Error(Op.Loc, "invalid operation id");
8185 return false;
8186 }
8187 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
8188 Stream.IsDefined) {
8189 Error(Stream.Loc, "message operation does not support streams");
8190 return false;
8191 }
8192 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
8193 Error(Stream.Loc, "invalid message stream id");
8194 return false;
8195 }
8196 return true;
8197}
8198
8199ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8200 using namespace llvm::AMDGPU::SendMsg;
8201
8202 int64_t ImmVal = 0;
8203 SMLoc Loc = getLoc();
8204
8205 if (trySkipId("sendmsg", AsmToken::LParen)) {
8206 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8207 OperandInfoTy Op(OP_NONE_);
8208 OperandInfoTy Stream(STREAM_ID_NONE_);
8209 if (parseSendMsgBody(Msg, Op, Stream) &&
8210 validateSendMsg(Msg, Op, Stream)) {
8211 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
8212 } else {
8213 return ParseStatus::Failure;
8214 }
8215 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
8216 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8217 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8218 } else {
8219 return ParseStatus::Failure;
8220 }
8221
8222 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8223 return ParseStatus::Success;
8224}
8225
8226bool AMDGPUOperand::isSendMsg() const {
8227 return isImmTy(ImmTySendMsg);
8228}
8229
8230//===----------------------------------------------------------------------===//
8231// v_interp
8232//===----------------------------------------------------------------------===//
8233
8234ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8235 StringRef Str;
8236 SMLoc S = getLoc();
8237
8238 if (!parseId(Str))
8239 return ParseStatus::NoMatch;
8240
8241 int Slot = StringSwitch<int>(Str)
8242 .Case("p10", 0)
8243 .Case("p20", 1)
8244 .Case("p0", 2)
8245 .Default(-1);
8246
8247 if (Slot == -1)
8248 return Error(S, "invalid interpolation slot");
8249
8250 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8251 AMDGPUOperand::ImmTyInterpSlot));
8252 return ParseStatus::Success;
8253}
8254
8255ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8256 StringRef Str;
8257 SMLoc S = getLoc();
8258
8259 if (!parseId(Str))
8260 return ParseStatus::NoMatch;
8261
8262 if (!Str.starts_with("attr"))
8263 return Error(S, "invalid interpolation attribute");
8264
8265 StringRef Chan = Str.take_back(2);
8266 int AttrChan = StringSwitch<int>(Chan)
8267 .Case(".x", 0)
8268 .Case(".y", 1)
8269 .Case(".z", 2)
8270 .Case(".w", 3)
8271 .Default(-1);
8272 if (AttrChan == -1)
8273 return Error(S, "invalid or missing interpolation attribute channel");
8274
8275 Str = Str.drop_back(2).drop_front(4);
8276
8277 uint8_t Attr;
8278 if (Str.getAsInteger(10, Attr))
8279 return Error(S, "invalid or missing interpolation attribute number");
8280
8281 if (Attr > 32)
8282 return Error(S, "out of bounds interpolation attribute number");
8283
8284 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8285
8286 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8287 AMDGPUOperand::ImmTyInterpAttr));
8288 Operands.push_back(AMDGPUOperand::CreateImm(
8289 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8290 return ParseStatus::Success;
8291}
8292
8293//===----------------------------------------------------------------------===//
8294// exp
8295//===----------------------------------------------------------------------===//
8296
8297ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8298 using namespace llvm::AMDGPU::Exp;
8299
8300 StringRef Str;
8301 SMLoc S = getLoc();
8302
8303 if (!parseId(Str))
8304 return ParseStatus::NoMatch;
8305
8306 unsigned Id = getTgtId(Str);
8307 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8308 return Error(S, (Id == ET_INVALID)
8309 ? "invalid exp target"
8310 : "exp target is not supported on this GPU");
8311
8312 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8313 AMDGPUOperand::ImmTyExpTgt));
8314 return ParseStatus::Success;
8315}
8316
8317//===----------------------------------------------------------------------===//
8318// parser helpers
8319//===----------------------------------------------------------------------===//
8320
8321bool
8322AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8323 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8324}
8325
8326bool
8327AMDGPUAsmParser::isId(const StringRef Id) const {
8328 return isId(getToken(), Id);
8329}
8330
8331bool
8332AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8333 return getTokenKind() == Kind;
8334}
8335
8336StringRef AMDGPUAsmParser::getId() const {
8337 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8338}
8339
8340bool
8341AMDGPUAsmParser::trySkipId(const StringRef Id) {
8342 if (isId(Id)) {
8343 lex();
8344 return true;
8345 }
8346 return false;
8347}
8348
8349bool
8350AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8351 if (isToken(AsmToken::Identifier)) {
8352 StringRef Tok = getTokenStr();
8353 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8354 lex();
8355 return true;
8356 }
8357 }
8358 return false;
8359}
8360
8361bool
8362AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8363 if (isId(Id) && peekToken().is(Kind)) {
8364 lex();
8365 lex();
8366 return true;
8367 }
8368 return false;
8369}
8370
8371bool
8372AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8373 if (isToken(Kind)) {
8374 lex();
8375 return true;
8376 }
8377 return false;
8378}
8379
8380bool
8381AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8382 const StringRef ErrMsg) {
8383 if (!trySkipToken(Kind)) {
8384 Error(getLoc(), ErrMsg);
8385 return false;
8386 }
8387 return true;
8388}
8389
8390bool
8391AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8392 SMLoc S = getLoc();
8393
8394 const MCExpr *Expr;
8395 if (Parser.parseExpression(Expr))
8396 return false;
8397
8398 if (Expr->evaluateAsAbsolute(Imm))
8399 return true;
8400
8401 if (Expected.empty()) {
8402 Error(S, "expected absolute expression");
8403 } else {
8404 Error(S, Twine("expected ", Expected) +
8405 Twine(" or an absolute expression"));
8406 }
8407 return false;
8408}
8409
8410bool
8411AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8412 SMLoc S = getLoc();
8413
8414 const MCExpr *Expr;
8415 if (Parser.parseExpression(Expr))
8416 return false;
8417
8418 int64_t IntVal;
8419 if (Expr->evaluateAsAbsolute(IntVal)) {
8420 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8421 } else {
8422 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8423 }
8424 return true;
8425}
8426
8427bool
8428AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8429 if (isToken(AsmToken::String)) {
8430 Val = getToken().getStringContents();
8431 lex();
8432 return true;
8433 }
8434 Error(getLoc(), ErrMsg);
8435 return false;
8436}
8437
8438bool
8439AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8440 if (isToken(AsmToken::Identifier)) {
8441 Val = getTokenStr();
8442 lex();
8443 return true;
8444 }
8445 if (!ErrMsg.empty())
8446 Error(getLoc(), ErrMsg);
8447 return false;
8448}
8449
8450AsmToken
8451AMDGPUAsmParser::getToken() const {
8452 return Parser.getTok();
8453}
8454
8455AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8456 return isToken(AsmToken::EndOfStatement)
8457 ? getToken()
8458 : getLexer().peekTok(ShouldSkipSpace);
8459}
8460
8461void
8462AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8463 auto TokCount = getLexer().peekTokens(Tokens);
8464
8465 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8466 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8467}
8468
8470AMDGPUAsmParser::getTokenKind() const {
8471 return getLexer().getKind();
8472}
8473
8474SMLoc
8475AMDGPUAsmParser::getLoc() const {
8476 return getToken().getLoc();
8477}
8478
8479StringRef
8480AMDGPUAsmParser::getTokenStr() const {
8481 return getToken().getString();
8482}
8483
8484void
8485AMDGPUAsmParser::lex() {
8486 Parser.Lex();
8487}
8488
8489SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8490 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8491}
8492
8493// Returns one of the given locations that comes later in the source.
8494SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8495 return a.getPointer() < b.getPointer() ? b : a;
8496}
8497
8498SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8499 int MCOpIdx) const {
8500 for (const auto &Op : Operands) {
8501 const auto TargetOp = static_cast<AMDGPUOperand &>(*Op);
8502 if (TargetOp.getMCOpIdx() == MCOpIdx)
8503 return TargetOp.getStartLoc();
8504 }
8505 llvm_unreachable("No such MC operand!");
8506}
8507
8508SMLoc
8509AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8510 const OperandVector &Operands) const {
8511 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8512 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8513 if (Test(Op))
8514 return Op.getStartLoc();
8515 }
8516 return getInstLoc(Operands);
8517}
8518
8519SMLoc
8520AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8521 const OperandVector &Operands) const {
8522 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8523 return getOperandLoc(Test, Operands);
8524}
8525
8526ParseStatus
8527AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8528 if (!trySkipToken(AsmToken::LCurly))
8529 return ParseStatus::NoMatch;
8530
8531 bool First = true;
8532 while (!trySkipToken(AsmToken::RCurly)) {
8533 if (!First &&
8534 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8535 return ParseStatus::Failure;
8536
8537 StringRef Id = getTokenStr();
8538 SMLoc IdLoc = getLoc();
8539 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8540 !skipToken(AsmToken::Colon, "colon expected"))
8541 return ParseStatus::Failure;
8542
8543 const auto *I =
8544 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8545 if (I == Fields.end())
8546 return Error(IdLoc, "unknown field");
8547 if ((*I)->IsDefined)
8548 return Error(IdLoc, "duplicate field");
8549
8550 // TODO: Support symbolic values.
8551 (*I)->Loc = getLoc();
8552 if (!parseExpr((*I)->Val))
8553 return ParseStatus::Failure;
8554 (*I)->IsDefined = true;
8555
8556 First = false;
8557 }
8558 return ParseStatus::Success;
8559}
8560
8561bool AMDGPUAsmParser::validateStructuredOpFields(
8563 return all_of(Fields, [this](const StructuredOpField *F) {
8564 return F->validate(*this);
8565 });
8566}
8567
8568//===----------------------------------------------------------------------===//
8569// swizzle
8570//===----------------------------------------------------------------------===//
8571
8573static unsigned
8574encodeBitmaskPerm(const unsigned AndMask,
8575 const unsigned OrMask,
8576 const unsigned XorMask) {
8577 using namespace llvm::AMDGPU::Swizzle;
8578
8579 return BITMASK_PERM_ENC |
8580 (AndMask << BITMASK_AND_SHIFT) |
8581 (OrMask << BITMASK_OR_SHIFT) |
8582 (XorMask << BITMASK_XOR_SHIFT);
8583}
8584
8585bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8586 const unsigned MaxVal,
8587 const Twine &ErrMsg, SMLoc &Loc) {
8588 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8589 return false;
8590 }
8591 Loc = getLoc();
8592 if (!parseExpr(Op)) {
8593 return false;
8594 }
8595 if (Op < MinVal || Op > MaxVal) {
8596 Error(Loc, ErrMsg);
8597 return false;
8598 }
8599
8600 return true;
8601}
8602
8603bool
8604AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8605 const unsigned MinVal,
8606 const unsigned MaxVal,
8607 const StringRef ErrMsg) {
8608 SMLoc Loc;
8609 for (unsigned i = 0; i < OpNum; ++i) {
8610 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8611 return false;
8612 }
8613
8614 return true;
8615}
8616
8617bool
8618AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8619 using namespace llvm::AMDGPU::Swizzle;
8620
8621 int64_t Lane[LANE_NUM];
8622 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8623 "expected a 2-bit lane id")) {
8625 for (unsigned I = 0; I < LANE_NUM; ++I) {
8626 Imm |= Lane[I] << (LANE_SHIFT * I);
8627 }
8628 return true;
8629 }
8630 return false;
8631}
8632
8633bool
8634AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8635 using namespace llvm::AMDGPU::Swizzle;
8636
8637 SMLoc Loc;
8638 int64_t GroupSize;
8639 int64_t LaneIdx;
8640
8641 if (!parseSwizzleOperand(GroupSize,
8642 2, 32,
8643 "group size must be in the interval [2,32]",
8644 Loc)) {
8645 return false;
8646 }
8647 if (!isPowerOf2_64(GroupSize)) {
8648 Error(Loc, "group size must be a power of two");
8649 return false;
8650 }
8651 if (parseSwizzleOperand(LaneIdx,
8652 0, GroupSize - 1,
8653 "lane id must be in the interval [0,group size - 1]",
8654 Loc)) {
8655 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8656 return true;
8657 }
8658 return false;
8659}
8660
8661bool
8662AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8663 using namespace llvm::AMDGPU::Swizzle;
8664
8665 SMLoc Loc;
8666 int64_t GroupSize;
8667
8668 if (!parseSwizzleOperand(GroupSize,
8669 2, 32,
8670 "group size must be in the interval [2,32]",
8671 Loc)) {
8672 return false;
8673 }
8674 if (!isPowerOf2_64(GroupSize)) {
8675 Error(Loc, "group size must be a power of two");
8676 return false;
8677 }
8678
8679 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8680 return true;
8681}
8682
8683bool
8684AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8685 using namespace llvm::AMDGPU::Swizzle;
8686
8687 SMLoc Loc;
8688 int64_t GroupSize;
8689
8690 if (!parseSwizzleOperand(GroupSize,
8691 1, 16,
8692 "group size must be in the interval [1,16]",
8693 Loc)) {
8694 return false;
8695 }
8696 if (!isPowerOf2_64(GroupSize)) {
8697 Error(Loc, "group size must be a power of two");
8698 return false;
8699 }
8700
8701 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8702 return true;
8703}
8704
8705bool
8706AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8707 using namespace llvm::AMDGPU::Swizzle;
8708
8709 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8710 return false;
8711 }
8712
8713 StringRef Ctl;
8714 SMLoc StrLoc = getLoc();
8715 if (!parseString(Ctl)) {
8716 return false;
8717 }
8718 if (Ctl.size() != BITMASK_WIDTH) {
8719 Error(StrLoc, "expected a 5-character mask");
8720 return false;
8721 }
8722
8723 unsigned AndMask = 0;
8724 unsigned OrMask = 0;
8725 unsigned XorMask = 0;
8726
8727 for (size_t i = 0; i < Ctl.size(); ++i) {
8728 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8729 switch(Ctl[i]) {
8730 default:
8731 Error(StrLoc, "invalid mask");
8732 return false;
8733 case '0':
8734 break;
8735 case '1':
8736 OrMask |= Mask;
8737 break;
8738 case 'p':
8739 AndMask |= Mask;
8740 break;
8741 case 'i':
8742 AndMask |= Mask;
8743 XorMask |= Mask;
8744 break;
8745 }
8746 }
8747
8748 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8749 return true;
8750}
8751
8752bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8753 using namespace llvm::AMDGPU::Swizzle;
8754
8755 if (!AMDGPU::isGFX9Plus(getSTI())) {
8756 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
8757 return false;
8758 }
8759
8760 int64_t Swizzle;
8761 SMLoc Loc;
8762 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8763 "FFT swizzle must be in the interval [0," +
8764 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8765 Loc))
8766 return false;
8767
8768 Imm = FFT_MODE_ENC | Swizzle;
8769 return true;
8770}
8771
8772bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8773 using namespace llvm::AMDGPU::Swizzle;
8774
8775 if (!AMDGPU::isGFX9Plus(getSTI())) {
8776 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
8777 return false;
8778 }
8779
8780 SMLoc Loc;
8781 int64_t Direction;
8782
8783 if (!parseSwizzleOperand(Direction, 0, 1,
8784 "direction must be 0 (left) or 1 (right)", Loc))
8785 return false;
8786
8787 int64_t RotateSize;
8788 if (!parseSwizzleOperand(
8789 RotateSize, 0, ROTATE_MAX_SIZE,
8790 "number of threads to rotate must be in the interval [0," +
8791 Twine(ROTATE_MAX_SIZE) + Twine(']'),
8792 Loc))
8793 return false;
8794
8796 (RotateSize << ROTATE_SIZE_SHIFT);
8797 return true;
8798}
8799
8800bool
8801AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8802
8803 SMLoc OffsetLoc = getLoc();
8804
8805 if (!parseExpr(Imm, "a swizzle macro")) {
8806 return false;
8807 }
8808 if (!isUInt<16>(Imm)) {
8809 Error(OffsetLoc, "expected a 16-bit offset");
8810 return false;
8811 }
8812 return true;
8813}
8814
8815bool
8816AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8817 using namespace llvm::AMDGPU::Swizzle;
8818
8819 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8820
8821 SMLoc ModeLoc = getLoc();
8822 bool Ok = false;
8823
8824 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8825 Ok = parseSwizzleQuadPerm(Imm);
8826 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8827 Ok = parseSwizzleBitmaskPerm(Imm);
8828 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8829 Ok = parseSwizzleBroadcast(Imm);
8830 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8831 Ok = parseSwizzleSwap(Imm);
8832 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8833 Ok = parseSwizzleReverse(Imm);
8834 } else if (trySkipId(IdSymbolic[ID_FFT])) {
8835 Ok = parseSwizzleFFT(Imm);
8836 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8837 Ok = parseSwizzleRotate(Imm);
8838 } else {
8839 Error(ModeLoc, "expected a swizzle mode");
8840 }
8841
8842 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8843 }
8844
8845 return false;
8846}
8847
8848ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8849 SMLoc S = getLoc();
8850 int64_t Imm = 0;
8851
8852 if (trySkipId("offset")) {
8853
8854 bool Ok = false;
8855 if (skipToken(AsmToken::Colon, "expected a colon")) {
8856 if (trySkipId("swizzle")) {
8857 Ok = parseSwizzleMacro(Imm);
8858 } else {
8859 Ok = parseSwizzleOffset(Imm);
8860 }
8861 }
8862
8863 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8864
8866 }
8867 return ParseStatus::NoMatch;
8868}
8869
8870bool
8871AMDGPUOperand::isSwizzle() const {
8872 return isImmTy(ImmTySwizzle);
8873}
8874
8875//===----------------------------------------------------------------------===//
8876// VGPR Index Mode
8877//===----------------------------------------------------------------------===//
8878
8879int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8880
8881 using namespace llvm::AMDGPU::VGPRIndexMode;
8882
8883 if (trySkipToken(AsmToken::RParen)) {
8884 return OFF;
8885 }
8886
8887 int64_t Imm = 0;
8888
8889 while (true) {
8890 unsigned Mode = 0;
8891 SMLoc S = getLoc();
8892
8893 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8894 if (trySkipId(IdSymbolic[ModeId])) {
8895 Mode = 1 << ModeId;
8896 break;
8897 }
8898 }
8899
8900 if (Mode == 0) {
8901 Error(S, (Imm == 0)?
8902 "expected a VGPR index mode or a closing parenthesis" :
8903 "expected a VGPR index mode");
8904 return UNDEF;
8905 }
8906
8907 if (Imm & Mode) {
8908 Error(S, "duplicate VGPR index mode");
8909 return UNDEF;
8910 }
8911 Imm |= Mode;
8912
8913 if (trySkipToken(AsmToken::RParen))
8914 break;
8915 if (!skipToken(AsmToken::Comma,
8916 "expected a comma or a closing parenthesis"))
8917 return UNDEF;
8918 }
8919
8920 return Imm;
8921}
8922
8923ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8924
8925 using namespace llvm::AMDGPU::VGPRIndexMode;
8926
8927 int64_t Imm = 0;
8928 SMLoc S = getLoc();
8929
8930 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8931 Imm = parseGPRIdxMacro();
8932 if (Imm == UNDEF)
8933 return ParseStatus::Failure;
8934 } else {
8935 if (getParser().parseAbsoluteExpression(Imm))
8936 return ParseStatus::Failure;
8937 if (Imm < 0 || !isUInt<4>(Imm))
8938 return Error(S, "invalid immediate: only 4-bit values are legal");
8939 }
8940
8941 Operands.push_back(
8942 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8943 return ParseStatus::Success;
8944}
8945
8946bool AMDGPUOperand::isGPRIdxMode() const {
8947 return isImmTy(ImmTyGprIdxMode);
8948}
8949
8950//===----------------------------------------------------------------------===//
8951// sopp branch targets
8952//===----------------------------------------------------------------------===//
8953
8954ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8955
8956 // Make sure we are not parsing something
8957 // that looks like a label or an expression but is not.
8958 // This will improve error messages.
8959 if (isRegister() || isModifier())
8960 return ParseStatus::NoMatch;
8961
8962 if (!parseExpr(Operands))
8963 return ParseStatus::Failure;
8964
8965 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8966 assert(Opr.isImm() || Opr.isExpr());
8967 SMLoc Loc = Opr.getStartLoc();
8968
8969 // Currently we do not support arbitrary expressions as branch targets.
8970 // Only labels and absolute expressions are accepted.
8971 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8972 Error(Loc, "expected an absolute expression or a label");
8973 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8974 Error(Loc, "expected a 16-bit signed jump offset");
8975 }
8976
8977 return ParseStatus::Success;
8978}
8979
8980//===----------------------------------------------------------------------===//
8981// Boolean holding registers
8982//===----------------------------------------------------------------------===//
8983
8984ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8985 return parseReg(Operands);
8986}
8987
8988//===----------------------------------------------------------------------===//
8989// mubuf
8990//===----------------------------------------------------------------------===//
8991
8992void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8993 const OperandVector &Operands,
8994 bool IsAtomic) {
8995 OptionalImmIndexMap OptionalIdx;
8996 unsigned FirstOperandIdx = 1;
8997 bool IsAtomicReturn = false;
8998
8999 if (IsAtomic) {
9000 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
9002 }
9003
9004 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
9005 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9006
9007 // Add the register arguments
9008 if (Op.isReg()) {
9009 Op.addRegOperands(Inst, 1);
9010 // Insert a tied src for atomic return dst.
9011 // This cannot be postponed as subsequent calls to
9012 // addImmOperands rely on correct number of MC operands.
9013 if (IsAtomicReturn && i == FirstOperandIdx)
9014 Op.addRegOperands(Inst, 1);
9015 continue;
9016 }
9017
9018 // Handle the case where soffset is an immediate
9019 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9020 Op.addImmOperands(Inst, 1);
9021 continue;
9022 }
9023
9024 // Handle tokens like 'offen' which are sometimes hard-coded into the
9025 // asm string. There are no MCInst operands for these.
9026 if (Op.isToken()) {
9027 continue;
9028 }
9029 assert(Op.isImm());
9030
9031 // Handle optional arguments
9032 OptionalIdx[Op.getImmTy()] = i;
9033 }
9034
9035 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
9036 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
9037}
9038
9039//===----------------------------------------------------------------------===//
9040// smrd
9041//===----------------------------------------------------------------------===//
9042
9043bool AMDGPUOperand::isSMRDOffset8() const {
9044 return isImmLiteral() && isUInt<8>(getImm());
9045}
9046
9047bool AMDGPUOperand::isSMEMOffset() const {
9048 // Offset range is checked later by validator.
9049 return isImmLiteral();
9050}
9051
9052bool AMDGPUOperand::isSMRDLiteralOffset() const {
9053 // 32-bit literals are only supported on CI and we only want to use them
9054 // when the offset is > 8-bits.
9055 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
9056}
9057
9058//===----------------------------------------------------------------------===//
9059// vop3
9060//===----------------------------------------------------------------------===//
9061
9062static bool ConvertOmodMul(int64_t &Mul) {
9063 if (Mul != 1 && Mul != 2 && Mul != 4)
9064 return false;
9065
9066 Mul >>= 1;
9067 return true;
9068}
9069
9070static bool ConvertOmodDiv(int64_t &Div) {
9071 if (Div == 1) {
9072 Div = 0;
9073 return true;
9074 }
9075
9076 if (Div == 2) {
9077 Div = 3;
9078 return true;
9079 }
9080
9081 return false;
9082}
9083
9084// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9085// This is intentional and ensures compatibility with sp3.
9086// See bug 35397 for details.
9087bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9088 if (BoundCtrl == 0 || BoundCtrl == 1) {
9089 if (!isGFX11Plus())
9090 BoundCtrl = 1;
9091 return true;
9092 }
9093 return false;
9094}
9095
9096void AMDGPUAsmParser::onBeginOfFile() {
9097 if (!getParser().getStreamer().getTargetStreamer() ||
9098 getSTI().getTargetTriple().getArch() == Triple::r600)
9099 return;
9100
9101 if (!getTargetStreamer().getTargetID())
9102 getTargetStreamer().initializeTargetID(getSTI(),
9103 getSTI().getFeatureString());
9104
9105 if (isHsaAbi(getSTI()))
9106 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9107}
9108
9109/// Parse AMDGPU specific expressions.
9110///
9111/// expr ::= or(expr, ...) |
9112/// max(expr, ...)
9113///
9114bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9115 using AGVK = AMDGPUMCExpr::VariantKind;
9116
9117 if (isToken(AsmToken::Identifier)) {
9118 StringRef TokenId = getTokenStr();
9119 AGVK VK = StringSwitch<AGVK>(TokenId)
9120 .Case("max", AGVK::AGVK_Max)
9121 .Case("or", AGVK::AGVK_Or)
9122 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
9123 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9124 .Case("alignto", AGVK::AGVK_AlignTo)
9125 .Case("occupancy", AGVK::AGVK_Occupancy)
9126 .Default(AGVK::AGVK_None);
9127
9128 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
9130 uint64_t CommaCount = 0;
9131 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9132 lex(); // Eat '('
9133 while (true) {
9134 if (trySkipToken(AsmToken::RParen)) {
9135 if (Exprs.empty()) {
9136 Error(getToken().getLoc(),
9137 "empty " + Twine(TokenId) + " expression");
9138 return true;
9139 }
9140 if (CommaCount + 1 != Exprs.size()) {
9141 Error(getToken().getLoc(),
9142 "mismatch of commas in " + Twine(TokenId) + " expression");
9143 return true;
9144 }
9145 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
9146 return false;
9147 }
9148 const MCExpr *Expr;
9149 if (getParser().parseExpression(Expr, EndLoc))
9150 return true;
9151 Exprs.push_back(Expr);
9152 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
9153 if (LastTokenWasComma)
9154 CommaCount++;
9155 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
9156 Error(getToken().getLoc(),
9157 "unexpected token in " + Twine(TokenId) + " expression");
9158 return true;
9159 }
9160 }
9161 }
9162 }
9163 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
9164}
9165
9166ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9167 StringRef Name = getTokenStr();
9168 if (Name == "mul") {
9169 return parseIntWithPrefix("mul", Operands,
9170 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
9171 }
9172
9173 if (Name == "div") {
9174 return parseIntWithPrefix("div", Operands,
9175 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
9176 }
9177
9178 return ParseStatus::NoMatch;
9179}
9180
9181// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9182// the number of src operands present, then copies that bit into src0_modifiers.
9183static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9184 int Opc = Inst.getOpcode();
9185 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9186 if (OpSelIdx == -1)
9187 return;
9188
9189 int SrcNum;
9190 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9191 AMDGPU::OpName::src2};
9192 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
9193 ++SrcNum)
9194 ;
9195 assert(SrcNum > 0);
9196
9197 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9198
9199 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
9200 if (DstIdx == -1)
9201 return;
9202
9203 const MCOperand &DstOp = Inst.getOperand(DstIdx);
9204 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
9205 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9206 if (DstOp.isReg() &&
9207 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
9209 ModVal |= SISrcMods::DST_OP_SEL;
9210 } else {
9211 if ((OpSel & (1 << SrcNum)) != 0)
9212 ModVal |= SISrcMods::DST_OP_SEL;
9213 }
9214 Inst.getOperand(ModIdx).setImm(ModVal);
9215}
9216
9217void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9218 const OperandVector &Operands) {
9219 cvtVOP3P(Inst, Operands);
9220 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9221}
9222
9223void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9224 OptionalImmIndexMap &OptionalIdx) {
9225 cvtVOP3P(Inst, Operands, OptionalIdx);
9226 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9227}
9228
9229static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9230 return
9231 // 1. This operand is input modifiers
9232 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9233 // 2. This is not last operand
9234 && Desc.NumOperands > (OpNum + 1)
9235 // 3. Next operand is register class
9236 && Desc.operands()[OpNum + 1].RegClass != -1
9237 // 4. Next register is not tied to any other operand
9238 && Desc.getOperandConstraint(OpNum + 1,
9240}
9241
9242void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9243{
9244 OptionalImmIndexMap OptionalIdx;
9245 unsigned Opc = Inst.getOpcode();
9246
9247 unsigned I = 1;
9248 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9249 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9250 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9251 }
9252
9253 for (unsigned E = Operands.size(); I != E; ++I) {
9254 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9256 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9257 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9258 Op.isInterpAttrChan()) {
9259 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9260 } else if (Op.isImmModifier()) {
9261 OptionalIdx[Op.getImmTy()] = I;
9262 } else {
9263 llvm_unreachable("unhandled operand type");
9264 }
9265 }
9266
9267 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9268 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9269 AMDGPUOperand::ImmTyHigh);
9270
9271 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9272 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9273 AMDGPUOperand::ImmTyClamp);
9274
9275 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9276 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9277 AMDGPUOperand::ImmTyOModSI);
9278}
9279
9280void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9281{
9282 OptionalImmIndexMap OptionalIdx;
9283 unsigned Opc = Inst.getOpcode();
9284
9285 unsigned I = 1;
9286 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9287 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9288 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9289 }
9290
9291 for (unsigned E = Operands.size(); I != E; ++I) {
9292 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9294 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9295 } else if (Op.isImmModifier()) {
9296 OptionalIdx[Op.getImmTy()] = I;
9297 } else {
9298 llvm_unreachable("unhandled operand type");
9299 }
9300 }
9301
9302 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9303
9304 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9305 if (OpSelIdx != -1)
9306 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9307
9308 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9309
9310 if (OpSelIdx == -1)
9311 return;
9312
9313 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9314 AMDGPU::OpName::src2};
9315 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9316 AMDGPU::OpName::src1_modifiers,
9317 AMDGPU::OpName::src2_modifiers};
9318
9319 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9320
9321 for (int J = 0; J < 3; ++J) {
9322 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9323 if (OpIdx == -1)
9324 break;
9325
9326 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9327 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9328
9329 if ((OpSel & (1 << J)) != 0)
9330 ModVal |= SISrcMods::OP_SEL_0;
9331 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
9332 (OpSel & (1 << 3)) != 0)
9333 ModVal |= SISrcMods::DST_OP_SEL;
9334
9335 Inst.getOperand(ModIdx).setImm(ModVal);
9336 }
9337}
9338void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9339 const OperandVector &Operands) {
9340 OptionalImmIndexMap OptionalIdx;
9341 unsigned Opc = Inst.getOpcode();
9342 unsigned I = 1;
9343 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9344
9345 const MCInstrDesc &Desc = MII.get(Opc);
9346
9347 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9348 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9349
9350 for (unsigned E = Operands.size(); I != E; ++I) {
9351 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9352 int NumOperands = Inst.getNumOperands();
9353 // The order of operands in MCInst and parsed operands are different.
9354 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9355 // indices for parsing scale values correctly.
9356 if (NumOperands == CbszOpIdx) {
9359 }
9360 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9361 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9362 } else if (Op.isImmModifier()) {
9363 OptionalIdx[Op.getImmTy()] = I;
9364 } else {
9365 Op.addRegOrImmOperands(Inst, 1);
9366 }
9367 }
9368
9369 // Insert CBSZ and BLGP operands for F8F6F4 variants
9370 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9371 if (CbszIdx != OptionalIdx.end()) {
9372 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9373 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9374 }
9375
9376 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9377 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9378 if (BlgpIdx != OptionalIdx.end()) {
9379 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9380 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9381 }
9382
9383 // Add dummy src_modifiers
9386
9387 // Handle op_sel fields
9388
9389 unsigned OpSel = 0;
9390 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9391 if (OpselIdx != OptionalIdx.end()) {
9392 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9393 .getImm();
9394 }
9395
9396 unsigned OpSelHi = 0;
9397 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9398 if (OpselHiIdx != OptionalIdx.end()) {
9399 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9400 .getImm();
9401 }
9402 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9403 AMDGPU::OpName::src1_modifiers};
9404
9405 for (unsigned J = 0; J < 2; ++J) {
9406 unsigned ModVal = 0;
9407 if (OpSel & (1 << J))
9408 ModVal |= SISrcMods::OP_SEL_0;
9409 if (OpSelHi & (1 << J))
9410 ModVal |= SISrcMods::OP_SEL_1;
9411
9412 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9413 Inst.getOperand(ModIdx).setImm(ModVal);
9414 }
9415}
9416
9417void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9418 OptionalImmIndexMap &OptionalIdx) {
9419 unsigned Opc = Inst.getOpcode();
9420
9421 unsigned I = 1;
9422 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9423 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9424 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9425 }
9426
9427 for (unsigned E = Operands.size(); I != E; ++I) {
9428 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9430 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9431 } else if (Op.isImmModifier()) {
9432 OptionalIdx[Op.getImmTy()] = I;
9433 } else {
9434 Op.addRegOrImmOperands(Inst, 1);
9435 }
9436 }
9437
9438 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))
9439 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9440 AMDGPUOperand::ImmTyScaleSel);
9441
9442 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9443 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9444 AMDGPUOperand::ImmTyClamp);
9445
9446 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9447 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9448 Inst.addOperand(Inst.getOperand(0));
9449 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9450 AMDGPUOperand::ImmTyByteSel);
9451 }
9452
9453 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9454 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9455 AMDGPUOperand::ImmTyOModSI);
9456
9457 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9458 // it has src2 register operand that is tied to dst operand
9459 // we don't allow modifiers for this operand in assembler so src2_modifiers
9460 // should be 0.
9461 if (isMAC(Opc)) {
9462 auto *it = Inst.begin();
9463 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9464 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9465 ++it;
9466 // Copy the operand to ensure it's not invalidated when Inst grows.
9467 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9468 }
9469}
9470
9471void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9472 OptionalImmIndexMap OptionalIdx;
9473 cvtVOP3(Inst, Operands, OptionalIdx);
9474}
9475
9476void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9477 OptionalImmIndexMap &OptIdx) {
9478 const int Opc = Inst.getOpcode();
9479 const MCInstrDesc &Desc = MII.get(Opc);
9480
9481 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9482
9483 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9484 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9485 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9486 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9487 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9488 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9489 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9490 Inst.addOperand(Inst.getOperand(0));
9491 }
9492
9493 // Adding vdst_in operand is already covered for these DPP instructions in
9494 // cvtVOP3DPP.
9495 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
9496 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9497 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9498 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9499 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9500 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9501 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9502 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9503 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9504 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9505 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9506 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp_gfx1250 ||
9507 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp8_gfx1250 ||
9508 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9509 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9510 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
9511 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
9512 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
9513 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
9514 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
9515 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
9516 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
9517 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
9518 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
9519 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
9520 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
9521 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
9522 Inst.addOperand(Inst.getOperand(0));
9523 }
9524
9525 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9526 if (BitOp3Idx != -1) {
9527 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9528 }
9529
9530 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9531 // instruction, and then figure out where to actually put the modifiers
9532
9533 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9534 if (OpSelIdx != -1) {
9535 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9536 }
9537
9538 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9539 if (OpSelHiIdx != -1) {
9540 int DefaultVal = IsPacked ? -1 : 0;
9541 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9542 DefaultVal);
9543 }
9544
9545 int MatrixAFMTIdx =
9546 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
9547 if (MatrixAFMTIdx != -1) {
9548 addOptionalImmOperand(Inst, Operands, OptIdx,
9549 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9550 }
9551
9552 int MatrixBFMTIdx =
9553 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
9554 if (MatrixBFMTIdx != -1) {
9555 addOptionalImmOperand(Inst, Operands, OptIdx,
9556 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9557 }
9558
9559 int MatrixAScaleIdx =
9560 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
9561 if (MatrixAScaleIdx != -1) {
9562 addOptionalImmOperand(Inst, Operands, OptIdx,
9563 AMDGPUOperand::ImmTyMatrixAScale, 0);
9564 }
9565
9566 int MatrixBScaleIdx =
9567 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
9568 if (MatrixBScaleIdx != -1) {
9569 addOptionalImmOperand(Inst, Operands, OptIdx,
9570 AMDGPUOperand::ImmTyMatrixBScale, 0);
9571 }
9572
9573 int MatrixAScaleFmtIdx =
9574 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9575 if (MatrixAScaleFmtIdx != -1) {
9576 addOptionalImmOperand(Inst, Operands, OptIdx,
9577 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9578 }
9579
9580 int MatrixBScaleFmtIdx =
9581 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9582 if (MatrixBScaleFmtIdx != -1) {
9583 addOptionalImmOperand(Inst, Operands, OptIdx,
9584 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9585 }
9586
9587 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9588 addOptionalImmOperand(Inst, Operands, OptIdx,
9589 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9590
9591 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9592 addOptionalImmOperand(Inst, Operands, OptIdx,
9593 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9594
9595 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9596 if (NegLoIdx != -1)
9597 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9598
9599 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9600 if (NegHiIdx != -1)
9601 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9602
9603 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9604 AMDGPU::OpName::src2};
9605 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9606 AMDGPU::OpName::src1_modifiers,
9607 AMDGPU::OpName::src2_modifiers};
9608
9609 unsigned OpSel = 0;
9610 unsigned OpSelHi = 0;
9611 unsigned NegLo = 0;
9612 unsigned NegHi = 0;
9613
9614 if (OpSelIdx != -1)
9615 OpSel = Inst.getOperand(OpSelIdx).getImm();
9616
9617 if (OpSelHiIdx != -1)
9618 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9619
9620 if (NegLoIdx != -1)
9621 NegLo = Inst.getOperand(NegLoIdx).getImm();
9622
9623 if (NegHiIdx != -1)
9624 NegHi = Inst.getOperand(NegHiIdx).getImm();
9625
9626 for (int J = 0; J < 3; ++J) {
9627 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9628 if (OpIdx == -1)
9629 break;
9630
9631 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9632
9633 if (ModIdx == -1)
9634 continue;
9635
9636 uint32_t ModVal = 0;
9637
9638 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9639 if (SrcOp.isReg() && getMRI()
9640 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9641 .contains(SrcOp.getReg())) {
9642 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9643 if (VGPRSuffixIsHi)
9644 ModVal |= SISrcMods::OP_SEL_0;
9645 } else {
9646 if ((OpSel & (1 << J)) != 0)
9647 ModVal |= SISrcMods::OP_SEL_0;
9648 }
9649
9650 if ((OpSelHi & (1 << J)) != 0)
9651 ModVal |= SISrcMods::OP_SEL_1;
9652
9653 if ((NegLo & (1 << J)) != 0)
9654 ModVal |= SISrcMods::NEG;
9655
9656 if ((NegHi & (1 << J)) != 0)
9657 ModVal |= SISrcMods::NEG_HI;
9658
9659 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9660 }
9661}
9662
9663void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9664 OptionalImmIndexMap OptIdx;
9665 cvtVOP3(Inst, Operands, OptIdx);
9666 cvtVOP3P(Inst, Operands, OptIdx);
9667}
9668
9670 unsigned i, unsigned Opc,
9671 AMDGPU::OpName OpName) {
9672 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9673 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9674 else
9675 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9676}
9677
9678void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9679 unsigned Opc = Inst.getOpcode();
9680
9681 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9682 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9683 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9684 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9685 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9686
9687 OptionalImmIndexMap OptIdx;
9688 for (unsigned i = 5; i < Operands.size(); ++i) {
9689 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9690 OptIdx[Op.getImmTy()] = i;
9691 }
9692
9693 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9694 addOptionalImmOperand(Inst, Operands, OptIdx,
9695 AMDGPUOperand::ImmTyIndexKey8bit);
9696
9697 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9698 addOptionalImmOperand(Inst, Operands, OptIdx,
9699 AMDGPUOperand::ImmTyIndexKey16bit);
9700
9701 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
9702 addOptionalImmOperand(Inst, Operands, OptIdx,
9703 AMDGPUOperand::ImmTyIndexKey32bit);
9704
9705 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9706 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9707
9708 cvtVOP3P(Inst, Operands, OptIdx);
9709}
9710
9711//===----------------------------------------------------------------------===//
9712// VOPD
9713//===----------------------------------------------------------------------===//
9714
9715ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9716 if (!hasVOPD(getSTI()))
9717 return ParseStatus::NoMatch;
9718
9719 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9720 SMLoc S = getLoc();
9721 lex();
9722 lex();
9723 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9724 SMLoc OpYLoc = getLoc();
9725 StringRef OpYName;
9726 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9727 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9728 return ParseStatus::Success;
9729 }
9730 return Error(OpYLoc, "expected a VOPDY instruction after ::");
9731 }
9732 return ParseStatus::NoMatch;
9733}
9734
9735// Create VOPD MCInst operands using parsed assembler operands.
9736void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9737 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9738
9739 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9740 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9742 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9743 return;
9744 }
9745 if (Op.isReg()) {
9746 Op.addRegOperands(Inst, 1);
9747 return;
9748 }
9749 if (Op.isImm()) {
9750 Op.addImmOperands(Inst, 1);
9751 return;
9752 }
9753 llvm_unreachable("Unhandled operand type in cvtVOPD");
9754 };
9755
9756 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
9757
9758 // MCInst operands are ordered as follows:
9759 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9760
9761 for (auto CompIdx : VOPD::COMPONENTS) {
9762 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9763 }
9764
9765 for (auto CompIdx : VOPD::COMPONENTS) {
9766 const auto &CInfo = InstInfo[CompIdx];
9767 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9768 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9769 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9770 if (CInfo.hasSrc2Acc())
9771 addOp(CInfo.getIndexOfDstInParsedOperands());
9772 }
9773
9774 int BitOp3Idx =
9775 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
9776 if (BitOp3Idx != -1) {
9777 OptionalImmIndexMap OptIdx;
9778 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
9779 if (Op.isImm())
9780 OptIdx[Op.getImmTy()] = Operands.size() - 1;
9781
9782 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9783 }
9784}
9785
9786//===----------------------------------------------------------------------===//
9787// dpp
9788//===----------------------------------------------------------------------===//
9789
9790bool AMDGPUOperand::isDPP8() const {
9791 return isImmTy(ImmTyDPP8);
9792}
9793
9794bool AMDGPUOperand::isDPPCtrl() const {
9795 using namespace AMDGPU::DPP;
9796
9797 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
9798 if (result) {
9799 int64_t Imm = getImm();
9800 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9801 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9802 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9803 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9804 (Imm == DppCtrl::WAVE_SHL1) ||
9805 (Imm == DppCtrl::WAVE_ROL1) ||
9806 (Imm == DppCtrl::WAVE_SHR1) ||
9807 (Imm == DppCtrl::WAVE_ROR1) ||
9808 (Imm == DppCtrl::ROW_MIRROR) ||
9809 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9810 (Imm == DppCtrl::BCAST15) ||
9811 (Imm == DppCtrl::BCAST31) ||
9812 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9813 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9814 }
9815 return false;
9816}
9817
9818//===----------------------------------------------------------------------===//
9819// mAI
9820//===----------------------------------------------------------------------===//
9821
9822bool AMDGPUOperand::isBLGP() const {
9823 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
9824}
9825
9826bool AMDGPUOperand::isS16Imm() const {
9827 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
9828}
9829
9830bool AMDGPUOperand::isU16Imm() const {
9831 return isImmLiteral() && isUInt<16>(getImm());
9832}
9833
9834//===----------------------------------------------------------------------===//
9835// dim
9836//===----------------------------------------------------------------------===//
9837
9838bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9839 // We want to allow "dim:1D" etc.,
9840 // but the initial 1 is tokenized as an integer.
9841 std::string Token;
9842 if (isToken(AsmToken::Integer)) {
9843 SMLoc Loc = getToken().getEndLoc();
9844 Token = std::string(getTokenStr());
9845 lex();
9846 if (getLoc() != Loc)
9847 return false;
9848 }
9849
9850 StringRef Suffix;
9851 if (!parseId(Suffix))
9852 return false;
9853 Token += Suffix;
9854
9855 StringRef DimId = Token;
9856 DimId.consume_front("SQ_RSRC_IMG_");
9857
9858 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
9859 if (!DimInfo)
9860 return false;
9861
9862 Encoding = DimInfo->Encoding;
9863 return true;
9864}
9865
9866ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9867 if (!isGFX10Plus())
9868 return ParseStatus::NoMatch;
9869
9870 SMLoc S = getLoc();
9871
9872 if (!trySkipId("dim", AsmToken::Colon))
9873 return ParseStatus::NoMatch;
9874
9875 unsigned Encoding;
9876 SMLoc Loc = getLoc();
9877 if (!parseDimId(Encoding))
9878 return Error(Loc, "invalid dim value");
9879
9880 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9881 AMDGPUOperand::ImmTyDim));
9882 return ParseStatus::Success;
9883}
9884
9885//===----------------------------------------------------------------------===//
9886// dpp
9887//===----------------------------------------------------------------------===//
9888
9889ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9890 SMLoc S = getLoc();
9891
9892 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9893 return ParseStatus::NoMatch;
9894
9895 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9896
9897 int64_t Sels[8];
9898
9899 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9900 return ParseStatus::Failure;
9901
9902 for (size_t i = 0; i < 8; ++i) {
9903 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9904 return ParseStatus::Failure;
9905
9906 SMLoc Loc = getLoc();
9907 if (getParser().parseAbsoluteExpression(Sels[i]))
9908 return ParseStatus::Failure;
9909 if (0 > Sels[i] || 7 < Sels[i])
9910 return Error(Loc, "expected a 3-bit value");
9911 }
9912
9913 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9914 return ParseStatus::Failure;
9915
9916 unsigned DPP8 = 0;
9917 for (size_t i = 0; i < 8; ++i)
9918 DPP8 |= (Sels[i] << (i * 3));
9919
9920 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9921 return ParseStatus::Success;
9922}
9923
9924bool
9925AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9926 const OperandVector &Operands) {
9927 if (Ctrl == "row_newbcast")
9928 return isGFX90A();
9929
9930 if (Ctrl == "row_share" ||
9931 Ctrl == "row_xmask")
9932 return isGFX10Plus();
9933
9934 if (Ctrl == "wave_shl" ||
9935 Ctrl == "wave_shr" ||
9936 Ctrl == "wave_rol" ||
9937 Ctrl == "wave_ror" ||
9938 Ctrl == "row_bcast")
9939 return isVI() || isGFX9();
9940
9941 return Ctrl == "row_mirror" ||
9942 Ctrl == "row_half_mirror" ||
9943 Ctrl == "quad_perm" ||
9944 Ctrl == "row_shl" ||
9945 Ctrl == "row_shr" ||
9946 Ctrl == "row_ror";
9947}
9948
9949int64_t
9950AMDGPUAsmParser::parseDPPCtrlPerm() {
9951 // quad_perm:[%d,%d,%d,%d]
9952
9953 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9954 return -1;
9955
9956 int64_t Val = 0;
9957 for (int i = 0; i < 4; ++i) {
9958 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9959 return -1;
9960
9961 int64_t Temp;
9962 SMLoc Loc = getLoc();
9963 if (getParser().parseAbsoluteExpression(Temp))
9964 return -1;
9965 if (Temp < 0 || Temp > 3) {
9966 Error(Loc, "expected a 2-bit value");
9967 return -1;
9968 }
9969
9970 Val += (Temp << i * 2);
9971 }
9972
9973 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9974 return -1;
9975
9976 return Val;
9977}
9978
9979int64_t
9980AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9981 using namespace AMDGPU::DPP;
9982
9983 // sel:%d
9984
9985 int64_t Val;
9986 SMLoc Loc = getLoc();
9987
9988 if (getParser().parseAbsoluteExpression(Val))
9989 return -1;
9990
9991 struct DppCtrlCheck {
9992 int64_t Ctrl;
9993 int Lo;
9994 int Hi;
9995 };
9996
9997 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9998 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9999 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10000 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10001 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10002 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10003 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10004 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10005 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10006 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10007 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10008 .Default({-1, 0, 0});
10009
10010 bool Valid;
10011 if (Check.Ctrl == -1) {
10012 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
10013 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10014 } else {
10015 Valid = Check.Lo <= Val && Val <= Check.Hi;
10016 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
10017 }
10018
10019 if (!Valid) {
10020 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
10021 return -1;
10022 }
10023
10024 return Val;
10025}
10026
10027ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10028 using namespace AMDGPU::DPP;
10029
10030 if (!isToken(AsmToken::Identifier) ||
10031 !isSupportedDPPCtrl(getTokenStr(), Operands))
10032 return ParseStatus::NoMatch;
10033
10034 SMLoc S = getLoc();
10035 int64_t Val = -1;
10036 StringRef Ctrl;
10037
10038 parseId(Ctrl);
10039
10040 if (Ctrl == "row_mirror") {
10041 Val = DppCtrl::ROW_MIRROR;
10042 } else if (Ctrl == "row_half_mirror") {
10043 Val = DppCtrl::ROW_HALF_MIRROR;
10044 } else {
10045 if (skipToken(AsmToken::Colon, "expected a colon")) {
10046 if (Ctrl == "quad_perm") {
10047 Val = parseDPPCtrlPerm();
10048 } else {
10049 Val = parseDPPCtrlSel(Ctrl);
10050 }
10051 }
10052 }
10053
10054 if (Val == -1)
10055 return ParseStatus::Failure;
10056
10057 Operands.push_back(
10058 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10059 return ParseStatus::Success;
10060}
10061
10062void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10063 bool IsDPP8) {
10064 OptionalImmIndexMap OptionalIdx;
10065 unsigned Opc = Inst.getOpcode();
10066 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10067
10068 // MAC instructions are special because they have 'old'
10069 // operand which is not tied to dst (but assumed to be).
10070 // They also have dummy unused src2_modifiers.
10071 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
10072 int Src2ModIdx =
10073 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
10074 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10075 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
10076
10077 unsigned I = 1;
10078 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10079 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10080 }
10081
10082 int Fi = 0;
10083 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
10084 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10085 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10086 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10087 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
10088
10089 for (unsigned E = Operands.size(); I != E; ++I) {
10090
10091 if (IsMAC) {
10092 int NumOperands = Inst.getNumOperands();
10093 if (OldIdx == NumOperands) {
10094 // Handle old operand
10095 constexpr int DST_IDX = 0;
10096 Inst.addOperand(Inst.getOperand(DST_IDX));
10097 } else if (Src2ModIdx == NumOperands) {
10098 // Add unused dummy src2_modifiers
10100 }
10101 }
10102
10103 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10104 Inst.addOperand(Inst.getOperand(0));
10105 }
10106
10107 if (IsVOP3CvtSrDpp) {
10108 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10110 Inst.addOperand(MCOperand::createReg(MCRegister()));
10111 }
10112 }
10113
10114 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10116 if (TiedTo != -1) {
10117 assert((unsigned)TiedTo < Inst.getNumOperands());
10118 // handle tied old or src2 for MAC instructions
10119 Inst.addOperand(Inst.getOperand(TiedTo));
10120 }
10121 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10122 // Add the register arguments
10123 if (IsDPP8 && Op.isDppFI()) {
10124 Fi = Op.getImm();
10125 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10126 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10127 } else if (Op.isReg()) {
10128 Op.addRegOperands(Inst, 1);
10129 } else if (Op.isImm() &&
10130 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10131 Op.addImmOperands(Inst, 1);
10132 } else if (Op.isImm()) {
10133 OptionalIdx[Op.getImmTy()] = I;
10134 } else {
10135 llvm_unreachable("unhandled operand type");
10136 }
10137 }
10138
10139 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10140 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10141 AMDGPUOperand::ImmTyClamp);
10142
10143 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
10144 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10145 Inst.addOperand(Inst.getOperand(0));
10146 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10147 AMDGPUOperand::ImmTyByteSel);
10148 }
10149
10150 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10151 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
10152
10153 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10154 cvtVOP3P(Inst, Operands, OptionalIdx);
10155 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10156 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10157 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
10158 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
10159 }
10160
10161 if (IsDPP8) {
10162 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
10163 using namespace llvm::AMDGPU::DPP;
10164 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10165 } else {
10166 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
10167 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10168 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10169 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10170
10171 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
10172 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10173 AMDGPUOperand::ImmTyDppFI);
10174 }
10175}
10176
10177void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10178 OptionalImmIndexMap OptionalIdx;
10179
10180 unsigned I = 1;
10181 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10182 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10183 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10184 }
10185
10186 int Fi = 0;
10187 for (unsigned E = Operands.size(); I != E; ++I) {
10188 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10190 if (TiedTo != -1) {
10191 assert((unsigned)TiedTo < Inst.getNumOperands());
10192 // handle tied old or src2 for MAC instructions
10193 Inst.addOperand(Inst.getOperand(TiedTo));
10194 }
10195 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10196 // Add the register arguments
10197 if (Op.isReg() && validateVccOperand(Op.getReg())) {
10198 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10199 // Skip it.
10200 continue;
10201 }
10202
10203 if (IsDPP8) {
10204 if (Op.isDPP8()) {
10205 Op.addImmOperands(Inst, 1);
10206 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10207 Op.addRegWithFPInputModsOperands(Inst, 2);
10208 } else if (Op.isDppFI()) {
10209 Fi = Op.getImm();
10210 } else if (Op.isReg()) {
10211 Op.addRegOperands(Inst, 1);
10212 } else {
10213 llvm_unreachable("Invalid operand type");
10214 }
10215 } else {
10217 Op.addRegWithFPInputModsOperands(Inst, 2);
10218 } else if (Op.isReg()) {
10219 Op.addRegOperands(Inst, 1);
10220 } else if (Op.isDPPCtrl()) {
10221 Op.addImmOperands(Inst, 1);
10222 } else if (Op.isImm()) {
10223 // Handle optional arguments
10224 OptionalIdx[Op.getImmTy()] = I;
10225 } else {
10226 llvm_unreachable("Invalid operand type");
10227 }
10228 }
10229 }
10230
10231 if (IsDPP8) {
10232 using namespace llvm::AMDGPU::DPP;
10233 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10234 } else {
10235 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10236 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10237 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10238 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
10239 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10240 AMDGPUOperand::ImmTyDppFI);
10241 }
10242 }
10243}
10244
10245//===----------------------------------------------------------------------===//
10246// sdwa
10247//===----------------------------------------------------------------------===//
10248
10249ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10250 StringRef Prefix,
10251 AMDGPUOperand::ImmTy Type) {
10252 return parseStringOrIntWithPrefix(
10253 Operands, Prefix,
10254 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10255 Type);
10256}
10257
10258ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10259 return parseStringOrIntWithPrefix(
10260 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10261 AMDGPUOperand::ImmTySDWADstUnused);
10262}
10263
10264void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10265 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
10266}
10267
10268void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10269 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
10270}
10271
10272void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10273 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
10274}
10275
10276void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10277 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
10278}
10279
10280void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10281 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
10282}
10283
10284void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10285 uint64_t BasicInstType,
10286 bool SkipDstVcc,
10287 bool SkipSrcVcc) {
10288 using namespace llvm::AMDGPU::SDWA;
10289
10290 OptionalImmIndexMap OptionalIdx;
10291 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10292 bool SkippedVcc = false;
10293
10294 unsigned I = 1;
10295 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10296 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10297 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10298 }
10299
10300 for (unsigned E = Operands.size(); I != E; ++I) {
10301 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10302 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10303 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10304 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10305 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10306 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10307 // Skip VCC only if we didn't skip it on previous iteration.
10308 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10309 if (BasicInstType == SIInstrFlags::VOP2 &&
10310 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10311 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10312 SkippedVcc = true;
10313 continue;
10314 }
10315 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
10316 SkippedVcc = true;
10317 continue;
10318 }
10319 }
10321 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10322 } else if (Op.isImm()) {
10323 // Handle optional arguments
10324 OptionalIdx[Op.getImmTy()] = I;
10325 } else {
10326 llvm_unreachable("Invalid operand type");
10327 }
10328 SkippedVcc = false;
10329 }
10330
10331 const unsigned Opc = Inst.getOpcode();
10332 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10333 Opc != AMDGPU::V_NOP_sdwa_vi) {
10334 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10335 switch (BasicInstType) {
10336 case SIInstrFlags::VOP1:
10337 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10338 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10339 AMDGPUOperand::ImmTyClamp, 0);
10340
10341 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10342 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10343 AMDGPUOperand::ImmTyOModSI, 0);
10344
10345 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10346 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10347 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10348
10349 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10350 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10351 AMDGPUOperand::ImmTySDWADstUnused,
10352 DstUnused::UNUSED_PRESERVE);
10353
10354 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10355 break;
10356
10357 case SIInstrFlags::VOP2:
10358 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10359 AMDGPUOperand::ImmTyClamp, 0);
10360
10361 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10362 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10363
10364 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10365 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10366 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10367 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10368 break;
10369
10370 case SIInstrFlags::VOPC:
10371 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10372 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10373 AMDGPUOperand::ImmTyClamp, 0);
10374 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10375 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10376 break;
10377
10378 default:
10379 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10380 }
10381 }
10382
10383 // special case v_mac_{f16, f32}:
10384 // it has src2 register operand that is tied to dst operand
10385 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10386 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10387 auto *it = Inst.begin();
10388 std::advance(
10389 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10390 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10391 }
10392}
10393
10394/// Force static initialization.
10395extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10400
10401#define GET_MATCHER_IMPLEMENTATION
10402#define GET_MNEMONIC_SPELL_CHECKER
10403#define GET_MNEMONIC_CHECKER
10404#include "AMDGPUGenAsmMatcher.inc"
10405
10406ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10407 unsigned MCK) {
10408 switch (MCK) {
10409 case MCK_addr64:
10410 return parseTokenOp("addr64", Operands);
10411 case MCK_done:
10412 return parseTokenOp("done", Operands);
10413 case MCK_idxen:
10414 return parseTokenOp("idxen", Operands);
10415 case MCK_lds:
10416 return parseTokenOp("lds", Operands);
10417 case MCK_offen:
10418 return parseTokenOp("offen", Operands);
10419 case MCK_off:
10420 return parseTokenOp("off", Operands);
10421 case MCK_row_95_en:
10422 return parseTokenOp("row_en", Operands);
10423 case MCK_gds:
10424 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10425 case MCK_tfe:
10426 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10427 }
10428 return tryCustomParseOperand(Operands, MCK);
10429}
10430
10431// This function should be defined after auto-generated include so that we have
10432// MatchClassKind enum defined
10433unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10434 unsigned Kind) {
10435 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10436 // But MatchInstructionImpl() expects to meet token and fails to validate
10437 // operand. This method checks if we are given immediate operand but expect to
10438 // get corresponding token.
10439 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10440 switch (Kind) {
10441 case MCK_addr64:
10442 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10443 case MCK_gds:
10444 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10445 case MCK_lds:
10446 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10447 case MCK_idxen:
10448 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10449 case MCK_offen:
10450 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10451 case MCK_tfe:
10452 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10453 case MCK_SSrc_b32:
10454 // When operands have expression values, they will return true for isToken,
10455 // because it is not possible to distinguish between a token and an
10456 // expression at parse time. MatchInstructionImpl() will always try to
10457 // match an operand as a token, when isToken returns true, and when the
10458 // name of the expression is not a valid token, the match will fail,
10459 // so we need to handle it here.
10460 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10461 case MCK_SSrc_f32:
10462 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10463 case MCK_SOPPBrTarget:
10464 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10465 case MCK_VReg32OrOff:
10466 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10467 case MCK_InterpSlot:
10468 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10469 case MCK_InterpAttr:
10470 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10471 case MCK_InterpAttrChan:
10472 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10473 case MCK_SReg_64:
10474 case MCK_SReg_64_XEXEC:
10475 // Null is defined as a 32-bit register but
10476 // it should also be enabled with 64-bit operands or larger.
10477 // The following code enables it for SReg_64 and larger operands
10478 // used as source and destination. Remaining source
10479 // operands are handled in isInlinableImm.
10480 case MCK_SReg_96:
10481 case MCK_SReg_128:
10482 case MCK_SReg_256:
10483 case MCK_SReg_512:
10484 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10485 default:
10486 return Match_InvalidOperand;
10487 }
10488}
10489
10490//===----------------------------------------------------------------------===//
10491// endpgm
10492//===----------------------------------------------------------------------===//
10493
10494ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10495 SMLoc S = getLoc();
10496 int64_t Imm = 0;
10497
10498 if (!parseExpr(Imm)) {
10499 // The operand is optional, if not present default to 0
10500 Imm = 0;
10501 }
10502
10503 if (!isUInt<16>(Imm))
10504 return Error(S, "expected a 16-bit value");
10505
10506 Operands.push_back(
10507 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10508 return ParseStatus::Success;
10509}
10510
10511bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10512
10513//===----------------------------------------------------------------------===//
10514// Split Barrier
10515//===----------------------------------------------------------------------===//
10516
10517bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
@ Default
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:231
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
mir Rename Register Operands
Register Reg
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
unsigned unsigned DefaultVal
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file implements the SmallBitVector class.
static bool Enabled
Definition Statistic.cpp:46
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
BinaryOperator * Mul
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6057
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
iterator end() const
Definition ArrayRef.h:136
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
Target independent representation for an assembler token.
Definition MCAsmMacro.h:22
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
Register getReg() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
SMLoc getLoc() const
Definition MCInst.h:208
void setLoc(SMLoc loc)
Definition MCInst.h:207
unsigned getOpcode() const
Definition MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232
void addOperand(const MCOperand Op)
Definition MCInst.h:215
iterator begin()
Definition MCInst.h:227
size_t size() const
Definition MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
void setImm(int64_t Val)
Definition MCInst.h:89
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
bool isImm() const
Definition MCInst.h:66
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
const MCExpr * getExpr() const
Definition MCInst.h:118
bool isExpr() const
Definition MCInst.h:69
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
constexpr bool isValid() const
Definition MCRegister.h:76
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
void setRedefinable(bool Value)
Mark this symbol as redefinable.
Definition MCSymbol.h:210
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Definition Register.h:95
Represents a location in source code.
Definition SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:36
constexpr const char * getPointer() const
Definition SMLoc.h:34
constexpr bool isValid() const
Definition SMLoc.h:29
Represents a range in source code.
Definition SMLoc.h:48
SMLoc Start
Definition SMLoc.h:50
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:854
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:657
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:637
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:273
bool contains(StringRef key) const
Check if the set contains the given key.
Definition StringSet.h:60
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:194
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:231
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:202
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:209
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:219
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:224
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:206
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:201
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:218
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:216
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:205
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:236
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:237
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:212
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:204
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:217
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:213
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:238
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:220
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:203
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:228
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1425
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ OPERAND_IMMEDIATE
Definition MCInstrDesc.h:62
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Context & getContext() const
Definition BasicBlock.h:99
bool isNull(StringRef S)
Definition YAMLTraits.h:570
This is an optimization pass for GlobalISel generic memory operations.
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition Error.h:1113
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
StringMapEntry< Value * > ValueName
Definition Value.h:56
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1705
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:206
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
void PrintError(const Twine &Msg)
Definition Error.cpp:104
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:252
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:267
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:314
Op::Description Desc
Target & getTheR600Target()
The target for R600 GPUs.
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:159
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:164
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:405
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:257
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
#define N
RegisterKind Kind
StringLiteral Name
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
static LLVM_ABI const fltSemantics & IEEEsingle() LLVM_READNONE
Definition APFloat.cpp:266
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI const fltSemantics & IEEEdouble() LLVM_READNONE
Definition APFloat.cpp:267
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
Definition APFloat.cpp:264
static LLVM_ABI const fltSemantics & BFloat() LLVM_READNONE
Definition APFloat.cpp:265
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:320
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...