LLVM 20.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
15#include "SIDefines.h"
16#include "SIInstrInfo.h"
21#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/StringSet.h"
24#include "llvm/ADT/Twine.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCContext.h"
29#include "llvm/MC/MCExpr.h"
30#include "llvm/MC/MCInst.h"
31#include "llvm/MC/MCInstrDesc.h"
36#include "llvm/MC/MCSymbol.h"
43#include <optional>
44
45using namespace llvm;
46using namespace llvm::AMDGPU;
47using namespace llvm::amdhsa;
48
49namespace {
50
51class AMDGPUAsmParser;
52
53enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
54
55//===----------------------------------------------------------------------===//
56// Operand
57//===----------------------------------------------------------------------===//
58
59class AMDGPUOperand : public MCParsedAsmOperand {
60 enum KindTy {
61 Token,
62 Immediate,
65 } Kind;
66
67 SMLoc StartLoc, EndLoc;
68 const AMDGPUAsmParser *AsmParser;
69
70public:
71 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
72 : Kind(Kind_), AsmParser(AsmParser_) {}
73
74 using Ptr = std::unique_ptr<AMDGPUOperand>;
75
76 struct Modifiers {
77 bool Abs = false;
78 bool Neg = false;
79 bool Sext = false;
80 bool Lit = false;
81
82 bool hasFPModifiers() const { return Abs || Neg; }
83 bool hasIntModifiers() const { return Sext; }
84 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
85
86 int64_t getFPModifiersOperand() const {
87 int64_t Operand = 0;
88 Operand |= Abs ? SISrcMods::ABS : 0u;
89 Operand |= Neg ? SISrcMods::NEG : 0u;
90 return Operand;
91 }
92
93 int64_t getIntModifiersOperand() const {
94 int64_t Operand = 0;
95 Operand |= Sext ? SISrcMods::SEXT : 0u;
96 return Operand;
97 }
98
99 int64_t getModifiersOperand() const {
100 assert(!(hasFPModifiers() && hasIntModifiers())
101 && "fp and int modifiers should not be used simultaneously");
102 if (hasFPModifiers())
103 return getFPModifiersOperand();
104 if (hasIntModifiers())
105 return getIntModifiersOperand();
106 return 0;
107 }
108
109 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
110 };
111
112 enum ImmTy {
113 ImmTyNone,
114 ImmTyGDS,
115 ImmTyLDS,
116 ImmTyOffen,
117 ImmTyIdxen,
118 ImmTyAddr64,
119 ImmTyOffset,
120 ImmTyInstOffset,
121 ImmTyOffset0,
122 ImmTyOffset1,
123 ImmTySMEMOffsetMod,
124 ImmTyCPol,
125 ImmTyTFE,
126 ImmTyD16,
127 ImmTyClamp,
128 ImmTyOModSI,
129 ImmTySDWADstSel,
130 ImmTySDWASrc0Sel,
131 ImmTySDWASrc1Sel,
132 ImmTySDWADstUnused,
133 ImmTyDMask,
134 ImmTyDim,
135 ImmTyUNorm,
136 ImmTyDA,
137 ImmTyR128A16,
138 ImmTyA16,
139 ImmTyLWE,
140 ImmTyExpTgt,
141 ImmTyExpCompr,
142 ImmTyExpVM,
143 ImmTyFORMAT,
144 ImmTyHwreg,
145 ImmTyOff,
146 ImmTySendMsg,
147 ImmTyInterpSlot,
148 ImmTyInterpAttr,
149 ImmTyInterpAttrChan,
150 ImmTyOpSel,
151 ImmTyOpSelHi,
152 ImmTyNegLo,
153 ImmTyNegHi,
154 ImmTyIndexKey8bit,
155 ImmTyIndexKey16bit,
156 ImmTyDPP8,
157 ImmTyDppCtrl,
158 ImmTyDppRowMask,
159 ImmTyDppBankMask,
160 ImmTyDppBoundCtrl,
161 ImmTyDppFI,
162 ImmTySwizzle,
163 ImmTyGprIdxMode,
164 ImmTyHigh,
165 ImmTyBLGP,
166 ImmTyCBSZ,
167 ImmTyABID,
168 ImmTyEndpgm,
169 ImmTyWaitVDST,
170 ImmTyWaitEXP,
171 ImmTyWaitVAVDst,
172 ImmTyWaitVMVSrc,
173 ImmTyByteSel,
174 ImmTyBitOp3,
175 };
176
177 // Immediate operand kind.
178 // It helps to identify the location of an offending operand after an error.
179 // Note that regular literals and mandatory literals (KImm) must be handled
180 // differently. When looking for an offending operand, we should usually
181 // ignore mandatory literals because they are part of the instruction and
182 // cannot be changed. Report location of mandatory operands only for VOPD,
183 // when both OpX and OpY have a KImm and there are no other literals.
184 enum ImmKindTy {
185 ImmKindTyNone,
186 ImmKindTyLiteral,
187 ImmKindTyMandatoryLiteral,
188 ImmKindTyConst,
189 };
190
191private:
192 struct TokOp {
193 const char *Data;
194 unsigned Length;
195 };
196
197 struct ImmOp {
198 int64_t Val;
199 ImmTy Type;
200 bool IsFPImm;
201 mutable ImmKindTy Kind;
202 Modifiers Mods;
203 };
204
205 struct RegOp {
206 MCRegister RegNo;
207 Modifiers Mods;
208 };
209
210 union {
211 TokOp Tok;
212 ImmOp Imm;
213 RegOp Reg;
214 const MCExpr *Expr;
215 };
216
217public:
218 bool isToken() const override { return Kind == Token; }
219
220 bool isSymbolRefExpr() const {
221 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
222 }
223
224 bool isImm() const override {
225 return Kind == Immediate;
226 }
227
228 void setImmKindNone() const {
229 assert(isImm());
230 Imm.Kind = ImmKindTyNone;
231 }
232
233 void setImmKindLiteral() const {
234 assert(isImm());
235 Imm.Kind = ImmKindTyLiteral;
236 }
237
238 void setImmKindMandatoryLiteral() const {
239 assert(isImm());
240 Imm.Kind = ImmKindTyMandatoryLiteral;
241 }
242
243 void setImmKindConst() const {
244 assert(isImm());
245 Imm.Kind = ImmKindTyConst;
246 }
247
248 bool IsImmKindLiteral() const {
249 return isImm() && Imm.Kind == ImmKindTyLiteral;
250 }
251
252 bool IsImmKindMandatoryLiteral() const {
253 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
254 }
255
256 bool isImmKindConst() const {
257 return isImm() && Imm.Kind == ImmKindTyConst;
258 }
259
260 bool isInlinableImm(MVT type) const;
261 bool isLiteralImm(MVT type) const;
262
263 bool isRegKind() const {
264 return Kind == Register;
265 }
266
267 bool isReg() const override {
268 return isRegKind() && !hasModifiers();
269 }
270
271 bool isRegOrInline(unsigned RCID, MVT type) const {
272 return isRegClass(RCID) || isInlinableImm(type);
273 }
274
275 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
276 return isRegOrInline(RCID, type) || isLiteralImm(type);
277 }
278
279 bool isRegOrImmWithInt16InputMods() const {
280 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
281 }
282
283 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
285 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
286 }
287
288 bool isRegOrImmWithInt32InputMods() const {
289 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
290 }
291
292 bool isRegOrInlineImmWithInt16InputMods() const {
293 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
294 }
295
296 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
297 return isRegOrInline(
298 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
299 }
300
301 bool isRegOrInlineImmWithInt32InputMods() const {
302 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
303 }
304
305 bool isRegOrImmWithInt64InputMods() const {
306 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
307 }
308
309 bool isRegOrImmWithFP16InputMods() const {
310 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
311 }
312
313 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
315 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
316 }
317
318 bool isRegOrImmWithFP32InputMods() const {
319 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
320 }
321
322 bool isRegOrImmWithFP64InputMods() const {
323 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
324 }
325
326 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
327 return isRegOrInline(
328 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
329 }
330
331 bool isRegOrInlineImmWithFP32InputMods() const {
332 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
333 }
334
335 bool isPackedFP16InputMods() const {
336 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
337 }
338
339 bool isPackedFP32InputMods() const {
340 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::v2f32);
341 }
342
343 bool isVReg() const {
344 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
345 isRegClass(AMDGPU::VReg_64RegClassID) ||
346 isRegClass(AMDGPU::VReg_96RegClassID) ||
347 isRegClass(AMDGPU::VReg_128RegClassID) ||
348 isRegClass(AMDGPU::VReg_160RegClassID) ||
349 isRegClass(AMDGPU::VReg_192RegClassID) ||
350 isRegClass(AMDGPU::VReg_256RegClassID) ||
351 isRegClass(AMDGPU::VReg_512RegClassID) ||
352 isRegClass(AMDGPU::VReg_1024RegClassID);
353 }
354
355 bool isVReg32() const {
356 return isRegClass(AMDGPU::VGPR_32RegClassID);
357 }
358
359 bool isVReg32OrOff() const {
360 return isOff() || isVReg32();
361 }
362
363 bool isNull() const {
364 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
365 }
366
367 bool isVRegWithInputMods() const;
368 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
369 template <bool IsFake16> bool isT16VRegWithInputMods() const;
370
371 bool isSDWAOperand(MVT type) const;
372 bool isSDWAFP16Operand() const;
373 bool isSDWAFP32Operand() const;
374 bool isSDWAInt16Operand() const;
375 bool isSDWAInt32Operand() const;
376
377 bool isImmTy(ImmTy ImmT) const {
378 return isImm() && Imm.Type == ImmT;
379 }
380
381 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
382
383 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
384
385 bool isImmModifier() const {
386 return isImm() && Imm.Type != ImmTyNone;
387 }
388
389 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
390 bool isDim() const { return isImmTy(ImmTyDim); }
391 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
392 bool isOff() const { return isImmTy(ImmTyOff); }
393 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
394 bool isOffen() const { return isImmTy(ImmTyOffen); }
395 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
396 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
397 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
398 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
399 bool isGDS() const { return isImmTy(ImmTyGDS); }
400 bool isLDS() const { return isImmTy(ImmTyLDS); }
401 bool isCPol() const { return isImmTy(ImmTyCPol); }
402 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
403 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
404 bool isTFE() const { return isImmTy(ImmTyTFE); }
405 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
406 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
407 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
408 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
409 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
410 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
411 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
412 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
413 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
414 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
415 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
416 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
417 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
418 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
419
420 bool isRegOrImm() const {
421 return isReg() || isImm();
422 }
423
424 bool isRegClass(unsigned RCID) const;
425
426 bool isInlineValue() const;
427
428 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
429 return isRegOrInline(RCID, type) && !hasModifiers();
430 }
431
432 bool isSCSrcB16() const {
433 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
434 }
435
436 bool isSCSrcV2B16() const {
437 return isSCSrcB16();
438 }
439
440 bool isSCSrc_b32() const {
441 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
442 }
443
444 bool isSCSrc_b64() const {
445 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
446 }
447
448 bool isBoolReg() const;
449
450 bool isSCSrcF16() const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
452 }
453
454 bool isSCSrcV2F16() const {
455 return isSCSrcF16();
456 }
457
458 bool isSCSrcF32() const {
459 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
460 }
461
462 bool isSCSrcF64() const {
463 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
464 }
465
466 bool isSSrc_b32() const {
467 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
468 }
469
470 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
471
472 bool isSSrcV2B16() const {
473 llvm_unreachable("cannot happen");
474 return isSSrc_b16();
475 }
476
477 bool isSSrc_b64() const {
478 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
479 // See isVSrc64().
480 return isSCSrc_b64() || isLiteralImm(MVT::i64);
481 }
482
483 bool isSSrc_f32() const {
484 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
485 }
486
487 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
488
489 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
490
491 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
492
493 bool isSSrcV2F16() const {
494 llvm_unreachable("cannot happen");
495 return isSSrc_f16();
496 }
497
498 bool isSSrcV2FP32() const {
499 llvm_unreachable("cannot happen");
500 return isSSrc_f32();
501 }
502
503 bool isSCSrcV2FP32() const {
504 llvm_unreachable("cannot happen");
505 return isSCSrcF32();
506 }
507
508 bool isSSrcV2INT32() const {
509 llvm_unreachable("cannot happen");
510 return isSSrc_b32();
511 }
512
513 bool isSCSrcV2INT32() const {
514 llvm_unreachable("cannot happen");
515 return isSCSrc_b32();
516 }
517
518 bool isSSrcOrLds_b32() const {
519 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
520 isLiteralImm(MVT::i32) || isExpr();
521 }
522
523 bool isVCSrc_b32() const {
524 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
525 }
526
527 bool isVCSrcB64() const {
528 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
529 }
530
531 bool isVCSrcT_b16() const {
532 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
533 }
534
535 bool isVCSrcTB16_Lo128() const {
536 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
537 }
538
539 bool isVCSrcFake16B16_Lo128() const {
540 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
541 }
542
543 bool isVCSrc_b16() const {
544 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
545 }
546
547 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
548
549 bool isVCSrc_f32() const {
550 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
551 }
552
553 bool isVCSrcF64() const {
554 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
555 }
556
557 bool isVCSrcTBF16() const {
558 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
559 }
560
561 bool isVCSrcT_f16() const {
562 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
563 }
564
565 bool isVCSrcT_bf16() const {
566 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
567 }
568
569 bool isVCSrcTBF16_Lo128() const {
570 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
571 }
572
573 bool isVCSrcTF16_Lo128() const {
574 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
575 }
576
577 bool isVCSrcFake16BF16_Lo128() const {
578 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
579 }
580
581 bool isVCSrcFake16F16_Lo128() const {
582 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
583 }
584
585 bool isVCSrc_bf16() const {
586 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
587 }
588
589 bool isVCSrc_f16() const {
590 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
591 }
592
593 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
594
595 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
596
597 bool isVSrc_b32() const {
598 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
599 }
600
601 bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
602
603 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
604
605 bool isVSrcT_b16_Lo128() const {
606 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
607 }
608
609 bool isVSrcFake16_b16_Lo128() const {
610 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
611 }
612
613 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
614
615 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
616
617 bool isVCSrcV2FP32() const {
618 return isVCSrcF64();
619 }
620
621 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
622
623 bool isVCSrcV2INT32() const {
624 return isVCSrcB64();
625 }
626
627 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
628
629 bool isVSrc_f32() const {
630 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
631 }
632
633 bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
634
635 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
636
637 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
638
639 bool isVSrcT_bf16_Lo128() const {
640 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
641 }
642
643 bool isVSrcT_f16_Lo128() const {
644 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
645 }
646
647 bool isVSrcFake16_bf16_Lo128() const {
648 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
649 }
650
651 bool isVSrcFake16_f16_Lo128() const {
652 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
653 }
654
655 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
656
657 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
658
659 bool isVSrc_v2bf16() const {
660 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
661 }
662
663 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
664
665 bool isVISrcB32() const {
666 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
667 }
668
669 bool isVISrcB16() const {
670 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
671 }
672
673 bool isVISrcV2B16() const {
674 return isVISrcB16();
675 }
676
677 bool isVISrcF32() const {
678 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
679 }
680
681 bool isVISrcF16() const {
682 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
683 }
684
685 bool isVISrcV2F16() const {
686 return isVISrcF16() || isVISrcB32();
687 }
688
689 bool isVISrc_64_bf16() const {
690 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
691 }
692
693 bool isVISrc_64_f16() const {
694 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
695 }
696
697 bool isVISrc_64_b32() const {
698 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
699 }
700
701 bool isVISrc_64B64() const {
702 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
703 }
704
705 bool isVISrc_64_f64() const {
706 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
707 }
708
709 bool isVISrc_64V2FP32() const {
710 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
711 }
712
713 bool isVISrc_64V2INT32() const {
714 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
715 }
716
717 bool isVISrc_256_b32() const {
718 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
719 }
720
721 bool isVISrc_256_f32() const {
722 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
723 }
724
725 bool isVISrc_256B64() const {
726 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
727 }
728
729 bool isVISrc_256_f64() const {
730 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
731 }
732
733 bool isVISrc_128B16() const {
734 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
735 }
736
737 bool isVISrc_128V2B16() const {
738 return isVISrc_128B16();
739 }
740
741 bool isVISrc_128_b32() const {
742 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
743 }
744
745 bool isVISrc_128_f32() const {
746 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
747 }
748
749 bool isVISrc_256V2FP32() const {
750 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
751 }
752
753 bool isVISrc_256V2INT32() const {
754 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
755 }
756
757 bool isVISrc_512_b32() const {
758 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
759 }
760
761 bool isVISrc_512B16() const {
762 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
763 }
764
765 bool isVISrc_512V2B16() const {
766 return isVISrc_512B16();
767 }
768
769 bool isVISrc_512_f32() const {
770 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
771 }
772
773 bool isVISrc_512F16() const {
774 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
775 }
776
777 bool isVISrc_512V2F16() const {
778 return isVISrc_512F16() || isVISrc_512_b32();
779 }
780
781 bool isVISrc_1024_b32() const {
782 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
783 }
784
785 bool isVISrc_1024B16() const {
786 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
787 }
788
789 bool isVISrc_1024V2B16() const {
790 return isVISrc_1024B16();
791 }
792
793 bool isVISrc_1024_f32() const {
794 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
795 }
796
797 bool isVISrc_1024F16() const {
798 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
799 }
800
801 bool isVISrc_1024V2F16() const {
802 return isVISrc_1024F16() || isVISrc_1024_b32();
803 }
804
805 bool isAISrcB32() const {
806 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
807 }
808
809 bool isAISrcB16() const {
810 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
811 }
812
813 bool isAISrcV2B16() const {
814 return isAISrcB16();
815 }
816
817 bool isAISrcF32() const {
818 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
819 }
820
821 bool isAISrcF16() const {
822 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
823 }
824
825 bool isAISrcV2F16() const {
826 return isAISrcF16() || isAISrcB32();
827 }
828
829 bool isAISrc_64B64() const {
830 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
831 }
832
833 bool isAISrc_64_f64() const {
834 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
835 }
836
837 bool isAISrc_128_b32() const {
838 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
839 }
840
841 bool isAISrc_128B16() const {
842 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
843 }
844
845 bool isAISrc_128V2B16() const {
846 return isAISrc_128B16();
847 }
848
849 bool isAISrc_128_f32() const {
850 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
851 }
852
853 bool isAISrc_128F16() const {
854 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
855 }
856
857 bool isAISrc_128V2F16() const {
858 return isAISrc_128F16() || isAISrc_128_b32();
859 }
860
861 bool isVISrc_128_bf16() const {
862 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
863 }
864
865 bool isVISrc_128_f16() const {
866 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
867 }
868
869 bool isVISrc_128V2F16() const {
870 return isVISrc_128_f16() || isVISrc_128_b32();
871 }
872
873 bool isAISrc_256B64() const {
874 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
875 }
876
877 bool isAISrc_256_f64() const {
878 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
879 }
880
881 bool isAISrc_512_b32() const {
882 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
883 }
884
885 bool isAISrc_512B16() const {
886 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
887 }
888
889 bool isAISrc_512V2B16() const {
890 return isAISrc_512B16();
891 }
892
893 bool isAISrc_512_f32() const {
894 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
895 }
896
897 bool isAISrc_512F16() const {
898 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
899 }
900
901 bool isAISrc_512V2F16() const {
902 return isAISrc_512F16() || isAISrc_512_b32();
903 }
904
905 bool isAISrc_1024_b32() const {
906 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
907 }
908
909 bool isAISrc_1024B16() const {
910 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
911 }
912
913 bool isAISrc_1024V2B16() const {
914 return isAISrc_1024B16();
915 }
916
917 bool isAISrc_1024_f32() const {
918 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
919 }
920
921 bool isAISrc_1024F16() const {
922 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
923 }
924
925 bool isAISrc_1024V2F16() const {
926 return isAISrc_1024F16() || isAISrc_1024_b32();
927 }
928
929 bool isKImmFP32() const {
930 return isLiteralImm(MVT::f32);
931 }
932
933 bool isKImmFP16() const {
934 return isLiteralImm(MVT::f16);
935 }
936
937 bool isMem() const override {
938 return false;
939 }
940
941 bool isExpr() const {
942 return Kind == Expression;
943 }
944
945 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
946
947 bool isSWaitCnt() const;
948 bool isDepCtr() const;
949 bool isSDelayALU() const;
950 bool isHwreg() const;
951 bool isSendMsg() const;
952 bool isSplitBarrier() const;
953 bool isSwizzle() const;
954 bool isSMRDOffset8() const;
955 bool isSMEMOffset() const;
956 bool isSMRDLiteralOffset() const;
957 bool isDPP8() const;
958 bool isDPPCtrl() const;
959 bool isBLGP() const;
960 bool isGPRIdxMode() const;
961 bool isS16Imm() const;
962 bool isU16Imm() const;
963 bool isEndpgm() const;
964
965 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
966 return [=](){ return P(*this); };
967 }
968
969 StringRef getToken() const {
970 assert(isToken());
971 return StringRef(Tok.Data, Tok.Length);
972 }
973
974 int64_t getImm() const {
975 assert(isImm());
976 return Imm.Val;
977 }
978
979 void setImm(int64_t Val) {
980 assert(isImm());
981 Imm.Val = Val;
982 }
983
984 ImmTy getImmTy() const {
985 assert(isImm());
986 return Imm.Type;
987 }
988
989 MCRegister getReg() const override {
990 assert(isRegKind());
991 return Reg.RegNo;
992 }
993
994 SMLoc getStartLoc() const override {
995 return StartLoc;
996 }
997
998 SMLoc getEndLoc() const override {
999 return EndLoc;
1000 }
1001
1002 SMRange getLocRange() const {
1003 return SMRange(StartLoc, EndLoc);
1004 }
1005
1006 Modifiers getModifiers() const {
1007 assert(isRegKind() || isImmTy(ImmTyNone));
1008 return isRegKind() ? Reg.Mods : Imm.Mods;
1009 }
1010
1011 void setModifiers(Modifiers Mods) {
1012 assert(isRegKind() || isImmTy(ImmTyNone));
1013 if (isRegKind())
1014 Reg.Mods = Mods;
1015 else
1016 Imm.Mods = Mods;
1017 }
1018
1019 bool hasModifiers() const {
1020 return getModifiers().hasModifiers();
1021 }
1022
1023 bool hasFPModifiers() const {
1024 return getModifiers().hasFPModifiers();
1025 }
1026
1027 bool hasIntModifiers() const {
1028 return getModifiers().hasIntModifiers();
1029 }
1030
1031 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1032
1033 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1034
1035 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1036
1037 void addRegOperands(MCInst &Inst, unsigned N) const;
1038
1039 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1040 if (isRegKind())
1041 addRegOperands(Inst, N);
1042 else
1043 addImmOperands(Inst, N);
1044 }
1045
1046 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1047 Modifiers Mods = getModifiers();
1048 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1049 if (isRegKind()) {
1050 addRegOperands(Inst, N);
1051 } else {
1052 addImmOperands(Inst, N, false);
1053 }
1054 }
1055
1056 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1057 assert(!hasIntModifiers());
1058 addRegOrImmWithInputModsOperands(Inst, N);
1059 }
1060
1061 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1062 assert(!hasFPModifiers());
1063 addRegOrImmWithInputModsOperands(Inst, N);
1064 }
1065
1066 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1067 Modifiers Mods = getModifiers();
1068 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1069 assert(isRegKind());
1070 addRegOperands(Inst, N);
1071 }
1072
1073 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1074 assert(!hasIntModifiers());
1075 addRegWithInputModsOperands(Inst, N);
1076 }
1077
1078 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1079 assert(!hasFPModifiers());
1080 addRegWithInputModsOperands(Inst, N);
1081 }
1082
1083 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1084 // clang-format off
1085 switch (Type) {
1086 case ImmTyNone: OS << "None"; break;
1087 case ImmTyGDS: OS << "GDS"; break;
1088 case ImmTyLDS: OS << "LDS"; break;
1089 case ImmTyOffen: OS << "Offen"; break;
1090 case ImmTyIdxen: OS << "Idxen"; break;
1091 case ImmTyAddr64: OS << "Addr64"; break;
1092 case ImmTyOffset: OS << "Offset"; break;
1093 case ImmTyInstOffset: OS << "InstOffset"; break;
1094 case ImmTyOffset0: OS << "Offset0"; break;
1095 case ImmTyOffset1: OS << "Offset1"; break;
1096 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1097 case ImmTyCPol: OS << "CPol"; break;
1098 case ImmTyIndexKey8bit: OS << "index_key"; break;
1099 case ImmTyIndexKey16bit: OS << "index_key"; break;
1100 case ImmTyTFE: OS << "TFE"; break;
1101 case ImmTyD16: OS << "D16"; break;
1102 case ImmTyFORMAT: OS << "FORMAT"; break;
1103 case ImmTyClamp: OS << "Clamp"; break;
1104 case ImmTyOModSI: OS << "OModSI"; break;
1105 case ImmTyDPP8: OS << "DPP8"; break;
1106 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1107 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1108 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1109 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1110 case ImmTyDppFI: OS << "DppFI"; break;
1111 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1112 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1113 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1114 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1115 case ImmTyDMask: OS << "DMask"; break;
1116 case ImmTyDim: OS << "Dim"; break;
1117 case ImmTyUNorm: OS << "UNorm"; break;
1118 case ImmTyDA: OS << "DA"; break;
1119 case ImmTyR128A16: OS << "R128A16"; break;
1120 case ImmTyA16: OS << "A16"; break;
1121 case ImmTyLWE: OS << "LWE"; break;
1122 case ImmTyOff: OS << "Off"; break;
1123 case ImmTyExpTgt: OS << "ExpTgt"; break;
1124 case ImmTyExpCompr: OS << "ExpCompr"; break;
1125 case ImmTyExpVM: OS << "ExpVM"; break;
1126 case ImmTyHwreg: OS << "Hwreg"; break;
1127 case ImmTySendMsg: OS << "SendMsg"; break;
1128 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1129 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1130 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1131 case ImmTyOpSel: OS << "OpSel"; break;
1132 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1133 case ImmTyNegLo: OS << "NegLo"; break;
1134 case ImmTyNegHi: OS << "NegHi"; break;
1135 case ImmTySwizzle: OS << "Swizzle"; break;
1136 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1137 case ImmTyHigh: OS << "High"; break;
1138 case ImmTyBLGP: OS << "BLGP"; break;
1139 case ImmTyCBSZ: OS << "CBSZ"; break;
1140 case ImmTyABID: OS << "ABID"; break;
1141 case ImmTyEndpgm: OS << "Endpgm"; break;
1142 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1143 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1144 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1145 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1146 case ImmTyByteSel: OS << "ByteSel" ; break;
1147 case ImmTyBitOp3: OS << "BitOp3"; break;
1148 }
1149 // clang-format on
1150 }
1151
1152 void print(raw_ostream &OS) const override {
1153 switch (Kind) {
1154 case Register:
1155 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1156 << " mods: " << Reg.Mods << '>';
1157 break;
1158 case Immediate:
1159 OS << '<' << getImm();
1160 if (getImmTy() != ImmTyNone) {
1161 OS << " type: "; printImmTy(OS, getImmTy());
1162 }
1163 OS << " mods: " << Imm.Mods << '>';
1164 break;
1165 case Token:
1166 OS << '\'' << getToken() << '\'';
1167 break;
1168 case Expression:
1169 OS << "<expr " << *Expr << '>';
1170 break;
1171 }
1172 }
1173
1174 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1175 int64_t Val, SMLoc Loc,
1176 ImmTy Type = ImmTyNone,
1177 bool IsFPImm = false) {
1178 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1179 Op->Imm.Val = Val;
1180 Op->Imm.IsFPImm = IsFPImm;
1181 Op->Imm.Kind = ImmKindTyNone;
1182 Op->Imm.Type = Type;
1183 Op->Imm.Mods = Modifiers();
1184 Op->StartLoc = Loc;
1185 Op->EndLoc = Loc;
1186 return Op;
1187 }
1188
1189 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1190 StringRef Str, SMLoc Loc,
1191 bool HasExplicitEncodingSize = true) {
1192 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1193 Res->Tok.Data = Str.data();
1194 Res->Tok.Length = Str.size();
1195 Res->StartLoc = Loc;
1196 Res->EndLoc = Loc;
1197 return Res;
1198 }
1199
1200 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1201 MCRegister Reg, SMLoc S, SMLoc E) {
1202 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1203 Op->Reg.RegNo = Reg;
1204 Op->Reg.Mods = Modifiers();
1205 Op->StartLoc = S;
1206 Op->EndLoc = E;
1207 return Op;
1208 }
1209
1210 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1211 const class MCExpr *Expr, SMLoc S) {
1212 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1213 Op->Expr = Expr;
1214 Op->StartLoc = S;
1215 Op->EndLoc = S;
1216 return Op;
1217 }
1218};
1219
1220raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1221 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1222 return OS;
1223}
1224
1225//===----------------------------------------------------------------------===//
1226// AsmParser
1227//===----------------------------------------------------------------------===//
1228
1229// Holds info related to the current kernel, e.g. count of SGPRs used.
1230// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1231// .amdgpu_hsa_kernel or at EOF.
1232class KernelScopeInfo {
1233 int SgprIndexUnusedMin = -1;
1234 int VgprIndexUnusedMin = -1;
1235 int AgprIndexUnusedMin = -1;
1236 MCContext *Ctx = nullptr;
1237 MCSubtargetInfo const *MSTI = nullptr;
1238
1239 void usesSgprAt(int i) {
1240 if (i >= SgprIndexUnusedMin) {
1241 SgprIndexUnusedMin = ++i;
1242 if (Ctx) {
1243 MCSymbol* const Sym =
1244 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1245 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1246 }
1247 }
1248 }
1249
1250 void usesVgprAt(int i) {
1251 if (i >= VgprIndexUnusedMin) {
1252 VgprIndexUnusedMin = ++i;
1253 if (Ctx) {
1254 MCSymbol* const Sym =
1255 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1256 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1257 VgprIndexUnusedMin);
1258 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1259 }
1260 }
1261 }
1262
1263 void usesAgprAt(int i) {
1264 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1265 if (!hasMAIInsts(*MSTI))
1266 return;
1267
1268 if (i >= AgprIndexUnusedMin) {
1269 AgprIndexUnusedMin = ++i;
1270 if (Ctx) {
1271 MCSymbol* const Sym =
1272 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1273 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1274
1275 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1276 MCSymbol* const vSym =
1277 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1278 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1279 VgprIndexUnusedMin);
1280 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1281 }
1282 }
1283 }
1284
1285public:
1286 KernelScopeInfo() = default;
1287
1288 void initialize(MCContext &Context) {
1289 Ctx = &Context;
1290 MSTI = Ctx->getSubtargetInfo();
1291
1292 usesSgprAt(SgprIndexUnusedMin = -1);
1293 usesVgprAt(VgprIndexUnusedMin = -1);
1294 if (hasMAIInsts(*MSTI)) {
1295 usesAgprAt(AgprIndexUnusedMin = -1);
1296 }
1297 }
1298
1299 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1300 unsigned RegWidth) {
1301 switch (RegKind) {
1302 case IS_SGPR:
1303 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1304 break;
1305 case IS_AGPR:
1306 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1307 break;
1308 case IS_VGPR:
1309 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1310 break;
1311 default:
1312 break;
1313 }
1314 }
1315};
1316
1317class AMDGPUAsmParser : public MCTargetAsmParser {
1318 MCAsmParser &Parser;
1319
1320 unsigned ForcedEncodingSize = 0;
1321 bool ForcedDPP = false;
1322 bool ForcedSDWA = false;
1323 KernelScopeInfo KernelScope;
1324
1325 /// @name Auto-generated Match Functions
1326 /// {
1327
1328#define GET_ASSEMBLER_HEADER
1329#include "AMDGPUGenAsmMatcher.inc"
1330
1331 /// }
1332
1333private:
1334 void createConstantSymbol(StringRef Id, int64_t Val);
1335
1336 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1337 bool OutOfRangeError(SMRange Range);
1338 /// Calculate VGPR/SGPR blocks required for given target, reserved
1339 /// registers, and user-specified NextFreeXGPR values.
1340 ///
1341 /// \param Features [in] Target features, used for bug corrections.
1342 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1343 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1344 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1345 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1346 /// descriptor field, if valid.
1347 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1348 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1349 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1350 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1351 /// \param VGPRBlocks [out] Result VGPR block count.
1352 /// \param SGPRBlocks [out] Result SGPR block count.
1353 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1354 const MCExpr *FlatScrUsed, bool XNACKUsed,
1355 std::optional<bool> EnableWavefrontSize32,
1356 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1357 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1358 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1359 bool ParseDirectiveAMDGCNTarget();
1360 bool ParseDirectiveAMDHSACodeObjectVersion();
1361 bool ParseDirectiveAMDHSAKernel();
1362 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1363 bool ParseDirectiveAMDKernelCodeT();
1364 // TODO: Possibly make subtargetHasRegister const.
1365 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1366 bool ParseDirectiveAMDGPUHsaKernel();
1367
1368 bool ParseDirectiveISAVersion();
1369 bool ParseDirectiveHSAMetadata();
1370 bool ParseDirectivePALMetadataBegin();
1371 bool ParseDirectivePALMetadata();
1372 bool ParseDirectiveAMDGPULDS();
1373
1374 /// Common code to parse out a block of text (typically YAML) between start and
1375 /// end directives.
1376 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1377 const char *AssemblerDirectiveEnd,
1378 std::string &CollectString);
1379
1380 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1381 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1382 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1383 unsigned &RegNum, unsigned &RegWidth,
1384 bool RestoreOnFailure = false);
1385 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1386 unsigned &RegNum, unsigned &RegWidth,
1388 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1389 unsigned &RegWidth,
1391 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1392 unsigned &RegWidth,
1394 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1395 unsigned &RegWidth,
1397 bool ParseRegRange(unsigned& Num, unsigned& Width);
1398 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1399 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1400
1401 bool isRegister();
1402 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1403 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1404 void initializeGprCountSymbol(RegisterKind RegKind);
1405 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1406 unsigned RegWidth);
1407 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1408 bool IsAtomic);
1409
1410public:
1411 enum OperandMode {
1412 OperandMode_Default,
1413 OperandMode_NSA,
1414 };
1415
1416 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1417
1418 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1419 const MCInstrInfo &MII,
1420 const MCTargetOptions &Options)
1421 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1423
1424 if (getFeatureBits().none()) {
1425 // Set default features.
1426 copySTI().ToggleFeature("southern-islands");
1427 }
1428
1429 FeatureBitset FB = getFeatureBits();
1430 if (!FB[AMDGPU::FeatureWavefrontSize64] &&
1431 !FB[AMDGPU::FeatureWavefrontSize32]) {
1432 // If there is no default wave size it must be a generation before gfx10,
1433 // these have FeatureWavefrontSize64 in their definition already. For
1434 // gfx10+ set wave32 as a default.
1435 copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
1436 }
1437
1438 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1439
1441 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1442 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1443 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1444 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1445 } else {
1446 createConstantSymbol(".option.machine_version_major", ISA.Major);
1447 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1448 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1449 }
1450 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1451 initializeGprCountSymbol(IS_VGPR);
1452 initializeGprCountSymbol(IS_SGPR);
1453 } else
1454 KernelScope.initialize(getContext());
1455
1456 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1457 createConstantSymbol(Symbol, Code);
1458
1459 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1460 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1461 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1462 }
1463
1464 bool hasMIMG_R128() const {
1465 return AMDGPU::hasMIMG_R128(getSTI());
1466 }
1467
1468 bool hasPackedD16() const {
1469 return AMDGPU::hasPackedD16(getSTI());
1470 }
1471
1472 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1473
1474 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1475
1476 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1477
1478 bool isSI() const {
1479 return AMDGPU::isSI(getSTI());
1480 }
1481
1482 bool isCI() const {
1483 return AMDGPU::isCI(getSTI());
1484 }
1485
1486 bool isVI() const {
1487 return AMDGPU::isVI(getSTI());
1488 }
1489
1490 bool isGFX9() const {
1491 return AMDGPU::isGFX9(getSTI());
1492 }
1493
1494 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1495 bool isGFX90A() const {
1496 return AMDGPU::isGFX90A(getSTI());
1497 }
1498
1499 bool isGFX940() const {
1500 return AMDGPU::isGFX940(getSTI());
1501 }
1502
1503 bool isGFX9Plus() const {
1504 return AMDGPU::isGFX9Plus(getSTI());
1505 }
1506
1507 bool isGFX10() const {
1508 return AMDGPU::isGFX10(getSTI());
1509 }
1510
1511 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1512
1513 bool isGFX11() const {
1514 return AMDGPU::isGFX11(getSTI());
1515 }
1516
1517 bool isGFX11Plus() const {
1518 return AMDGPU::isGFX11Plus(getSTI());
1519 }
1520
1521 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1522
1523 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1524
1525 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1526
1527 bool isGFX10_BEncoding() const {
1529 }
1530
1531 bool hasInv2PiInlineImm() const {
1532 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1533 }
1534
1535 bool hasFlatOffsets() const {
1536 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1537 }
1538
1539 bool hasArchitectedFlatScratch() const {
1540 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1541 }
1542
1543 bool hasSGPR102_SGPR103() const {
1544 return !isVI() && !isGFX9();
1545 }
1546
1547 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1548
1549 bool hasIntClamp() const {
1550 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1551 }
1552
1553 bool hasPartialNSAEncoding() const {
1554 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1555 }
1556
1557 unsigned getNSAMaxSize(bool HasSampler = false) const {
1558 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1559 }
1560
1561 unsigned getMaxNumUserSGPRs() const {
1563 }
1564
1565 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1566
1567 AMDGPUTargetStreamer &getTargetStreamer() {
1569 return static_cast<AMDGPUTargetStreamer &>(TS);
1570 }
1571
1572 const MCRegisterInfo *getMRI() const {
1573 // We need this const_cast because for some reason getContext() is not const
1574 // in MCAsmParser.
1575 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1576 }
1577
1578 const MCInstrInfo *getMII() const {
1579 return &MII;
1580 }
1581
1582 const FeatureBitset &getFeatureBits() const {
1583 return getSTI().getFeatureBits();
1584 }
1585
1586 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1587 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1588 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1589
1590 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1591 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1592 bool isForcedDPP() const { return ForcedDPP; }
1593 bool isForcedSDWA() const { return ForcedSDWA; }
1594 ArrayRef<unsigned> getMatchedVariants() const;
1595 StringRef getMatchedVariantName() const;
1596
1597 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1598 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1599 bool RestoreOnFailure);
1600 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1602 SMLoc &EndLoc) override;
1603 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1605 unsigned Kind) override;
1606 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1609 bool MatchingInlineAsm) override;
1610 bool ParseDirective(AsmToken DirectiveID) override;
1611 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1612 OperandMode Mode = OperandMode_Default);
1613 StringRef parseMnemonicSuffix(StringRef Name);
1615 SMLoc NameLoc, OperandVector &Operands) override;
1616 //bool ProcessInstruction(MCInst &Inst);
1617
1619
1620 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1621
1623 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1624 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1625 std::function<bool(int64_t &)> ConvertResult = nullptr);
1626
1627 ParseStatus parseOperandArrayWithPrefix(
1628 const char *Prefix, OperandVector &Operands,
1629 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1630 bool (*ConvertResult)(int64_t &) = nullptr);
1631
1633 parseNamedBit(StringRef Name, OperandVector &Operands,
1634 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1635 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1637 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1638 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1639 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1640 SMLoc &StringLoc);
1641 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1644 int64_t &IntVal);
1645 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1648 AMDGPUOperand::ImmTy Type);
1649
1650 bool isModifier();
1651 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1652 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1653 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1654 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1655 bool parseSP3NegModifier();
1656 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1657 bool HasLit = false);
1659 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1660 bool HasLit = false);
1661 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1662 bool AllowImm = true);
1663 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1664 bool AllowImm = true);
1665 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1666 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1667 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1668 ParseStatus tryParseIndexKey(OperandVector &Operands,
1669 AMDGPUOperand::ImmTy ImmTy);
1670 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1671 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1672
1673 ParseStatus parseDfmtNfmt(int64_t &Format);
1674 ParseStatus parseUfmt(int64_t &Format);
1675 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1676 int64_t &Format);
1677 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1678 int64_t &Format);
1679 ParseStatus parseFORMAT(OperandVector &Operands);
1680 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1681 ParseStatus parseNumericFormat(int64_t &Format);
1682 ParseStatus parseFlatOffset(OperandVector &Operands);
1683 ParseStatus parseR128A16(OperandVector &Operands);
1685 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1686 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1687
1688 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1689
1690 bool parseCnt(int64_t &IntVal);
1691 ParseStatus parseSWaitCnt(OperandVector &Operands);
1692
1693 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1694 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1695 ParseStatus parseDepCtr(OperandVector &Operands);
1696
1697 bool parseDelay(int64_t &Delay);
1698 ParseStatus parseSDelayALU(OperandVector &Operands);
1699
1700 ParseStatus parseHwreg(OperandVector &Operands);
1701
1702private:
1703 struct OperandInfoTy {
1704 SMLoc Loc;
1705 int64_t Val;
1706 bool IsSymbolic = false;
1707 bool IsDefined = false;
1708
1709 OperandInfoTy(int64_t Val) : Val(Val) {}
1710 };
1711
1712 struct StructuredOpField : OperandInfoTy {
1715 unsigned Width;
1716 bool IsDefined = false;
1717
1718 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1719 int64_t Default)
1720 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1721 virtual ~StructuredOpField() = default;
1722
1723 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1724 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1725 return false;
1726 }
1727
1728 virtual bool validate(AMDGPUAsmParser &Parser) const {
1729 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1730 return Error(Parser, "not supported on this GPU");
1731 if (!isUIntN(Width, Val))
1732 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1733 return true;
1734 }
1735 };
1736
1737 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1738 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1739
1740 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1741 bool validateSendMsg(const OperandInfoTy &Msg,
1742 const OperandInfoTy &Op,
1743 const OperandInfoTy &Stream);
1744
1745 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1746 OperandInfoTy &Width);
1747
1748 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1749 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1750 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1751
1752 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1753 const OperandVector &Operands) const;
1754 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1755 SMLoc getRegLoc(MCRegister Reg, const OperandVector &Operands) const;
1756 SMLoc getLitLoc(const OperandVector &Operands,
1757 bool SearchMandatoryLiterals = false) const;
1758 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1759 SMLoc getConstLoc(const OperandVector &Operands) const;
1760 SMLoc getInstLoc(const OperandVector &Operands) const;
1761
1762 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1763 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1764 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1765 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1766 bool validateSOPLiteral(const MCInst &Inst) const;
1767 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1768 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1769 const OperandVector &Operands);
1770 bool validateIntClampSupported(const MCInst &Inst);
1771 bool validateMIMGAtomicDMask(const MCInst &Inst);
1772 bool validateMIMGGatherDMask(const MCInst &Inst);
1773 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1774 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1775 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1776 bool validateMIMGD16(const MCInst &Inst);
1777 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1778 bool validateMIMGMSAA(const MCInst &Inst);
1779 bool validateOpSel(const MCInst &Inst);
1780 bool validateNeg(const MCInst &Inst, int OpName);
1781 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1782 bool validateVccOperand(MCRegister Reg) const;
1783 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1784 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1785 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1786 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1787 bool validateAGPRLdSt(const MCInst &Inst) const;
1788 bool validateVGPRAlign(const MCInst &Inst) const;
1789 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1790 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1791 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1792 bool validateDivScale(const MCInst &Inst);
1793 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1794 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1795 const SMLoc &IDLoc);
1796 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1797 const unsigned CPol);
1798 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1799 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1800 unsigned getConstantBusLimit(unsigned Opcode) const;
1801 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1802 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1803 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1804
1805 bool isSupportedMnemo(StringRef Mnemo,
1806 const FeatureBitset &FBS);
1807 bool isSupportedMnemo(StringRef Mnemo,
1808 const FeatureBitset &FBS,
1809 ArrayRef<unsigned> Variants);
1810 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1811
1812 bool isId(const StringRef Id) const;
1813 bool isId(const AsmToken &Token, const StringRef Id) const;
1814 bool isToken(const AsmToken::TokenKind Kind) const;
1815 StringRef getId() const;
1816 bool trySkipId(const StringRef Id);
1817 bool trySkipId(const StringRef Pref, const StringRef Id);
1818 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1819 bool trySkipToken(const AsmToken::TokenKind Kind);
1820 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1821 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1822 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1823
1824 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1825 AsmToken::TokenKind getTokenKind() const;
1826 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1828 StringRef getTokenStr() const;
1829 AsmToken peekToken(bool ShouldSkipSpace = true);
1830 AsmToken getToken() const;
1831 SMLoc getLoc() const;
1832 void lex();
1833
1834public:
1835 void onBeginOfFile() override;
1836 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1837
1838 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1839
1840 ParseStatus parseExpTgt(OperandVector &Operands);
1841 ParseStatus parseSendMsg(OperandVector &Operands);
1842 ParseStatus parseInterpSlot(OperandVector &Operands);
1843 ParseStatus parseInterpAttr(OperandVector &Operands);
1844 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1845 ParseStatus parseBoolReg(OperandVector &Operands);
1846
1847 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1848 const unsigned MaxVal, const Twine &ErrMsg,
1849 SMLoc &Loc);
1850 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1851 const unsigned MinVal,
1852 const unsigned MaxVal,
1853 const StringRef ErrMsg);
1854 ParseStatus parseSwizzle(OperandVector &Operands);
1855 bool parseSwizzleOffset(int64_t &Imm);
1856 bool parseSwizzleMacro(int64_t &Imm);
1857 bool parseSwizzleQuadPerm(int64_t &Imm);
1858 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1859 bool parseSwizzleBroadcast(int64_t &Imm);
1860 bool parseSwizzleSwap(int64_t &Imm);
1861 bool parseSwizzleReverse(int64_t &Imm);
1862 bool parseSwizzleFFT(int64_t &Imm);
1863 bool parseSwizzleRotate(int64_t &Imm);
1864
1865 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1866 int64_t parseGPRIdxMacro();
1867
1868 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1869 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1870
1871 ParseStatus parseOModSI(OperandVector &Operands);
1872
1873 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1874 OptionalImmIndexMap &OptionalIdx);
1875 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1876 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1877 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1878 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1879
1880 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1881 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1882 OptionalImmIndexMap &OptionalIdx);
1883 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1884 OptionalImmIndexMap &OptionalIdx);
1885
1886 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1887 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1888
1889 bool parseDimId(unsigned &Encoding);
1891 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1893 ParseStatus parseDPPCtrl(OperandVector &Operands);
1894 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1895 int64_t parseDPPCtrlSel(StringRef Ctrl);
1896 int64_t parseDPPCtrlPerm();
1897 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1898 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1899 cvtDPP(Inst, Operands, true);
1900 }
1901 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1902 bool IsDPP8 = false);
1903 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1904 cvtVOP3DPP(Inst, Operands, true);
1905 }
1906
1907 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1908 AMDGPUOperand::ImmTy Type);
1909 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1910 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1911 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1912 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1913 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1914 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1915 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1916 uint64_t BasicInstType,
1917 bool SkipDstVcc = false,
1918 bool SkipSrcVcc = false);
1919
1920 ParseStatus parseEndpgm(OperandVector &Operands);
1921
1923};
1924
1925} // end anonymous namespace
1926
1927// May be called with integer type with equivalent bitwidth.
1928static const fltSemantics *getFltSemantics(unsigned Size) {
1929 switch (Size) {
1930 case 4:
1931 return &APFloat::IEEEsingle();
1932 case 8:
1933 return &APFloat::IEEEdouble();
1934 case 2:
1935 return &APFloat::IEEEhalf();
1936 default:
1937 llvm_unreachable("unsupported fp type");
1938 }
1939}
1940
1942 return getFltSemantics(VT.getSizeInBits() / 8);
1943}
1944
1946 switch (OperandType) {
1947 // When floating-point immediate is used as operand of type i16, the 32-bit
1948 // representation of the constant truncated to the 16 LSBs should be used.
1968 return &APFloat::IEEEsingle();
1974 return &APFloat::IEEEdouble();
1983 return &APFloat::IEEEhalf();
1991 return &APFloat::BFloat();
1992 default:
1993 llvm_unreachable("unsupported fp type");
1994 }
1995}
1996
1997//===----------------------------------------------------------------------===//
1998// Operand
1999//===----------------------------------------------------------------------===//
2000
2001static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2002 bool Lost;
2003
2004 // Convert literal to single precision
2006 APFloat::rmNearestTiesToEven,
2007 &Lost);
2008 // We allow precision lost but not overflow or underflow
2009 if (Status != APFloat::opOK &&
2010 Lost &&
2011 ((Status & APFloat::opOverflow) != 0 ||
2012 (Status & APFloat::opUnderflow) != 0)) {
2013 return false;
2014 }
2015
2016 return true;
2017}
2018
2019static bool isSafeTruncation(int64_t Val, unsigned Size) {
2020 return isUIntN(Size, Val) || isIntN(Size, Val);
2021}
2022
2023static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2024 if (VT.getScalarType() == MVT::i16)
2025 return isInlinableLiteral32(Val, HasInv2Pi);
2026
2027 if (VT.getScalarType() == MVT::f16)
2028 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2029
2030 assert(VT.getScalarType() == MVT::bf16);
2031
2032 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2033}
2034
2035bool AMDGPUOperand::isInlinableImm(MVT type) const {
2036
2037 // This is a hack to enable named inline values like
2038 // shared_base with both 32-bit and 64-bit operands.
2039 // Note that these values are defined as
2040 // 32-bit operands only.
2041 if (isInlineValue()) {
2042 return true;
2043 }
2044
2045 if (!isImmTy(ImmTyNone)) {
2046 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2047 return false;
2048 }
2049 // TODO: We should avoid using host float here. It would be better to
2050 // check the float bit values which is what a few other places do.
2051 // We've had bot failures before due to weird NaN support on mips hosts.
2052
2053 APInt Literal(64, Imm.Val);
2054
2055 if (Imm.IsFPImm) { // We got fp literal token
2056 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2058 AsmParser->hasInv2PiInlineImm());
2059 }
2060
2061 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2062 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2063 return false;
2064
2065 if (type.getScalarSizeInBits() == 16) {
2066 bool Lost = false;
2067 switch (type.getScalarType().SimpleTy) {
2068 default:
2069 llvm_unreachable("unknown 16-bit type");
2070 case MVT::bf16:
2071 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2072 &Lost);
2073 break;
2074 case MVT::f16:
2075 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2076 &Lost);
2077 break;
2078 case MVT::i16:
2079 FPLiteral.convert(APFloatBase::IEEEsingle(),
2080 APFloat::rmNearestTiesToEven, &Lost);
2081 break;
2082 }
2083 // We need to use 32-bit representation here because when a floating-point
2084 // inline constant is used as an i16 operand, its 32-bit representation
2085 // representation will be used. We will need the 32-bit value to check if
2086 // it is FP inline constant.
2087 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2088 return isInlineableLiteralOp16(ImmVal, type,
2089 AsmParser->hasInv2PiInlineImm());
2090 }
2091
2092 // Check if single precision literal is inlinable
2094 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2095 AsmParser->hasInv2PiInlineImm());
2096 }
2097
2098 // We got int literal token.
2099 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2101 AsmParser->hasInv2PiInlineImm());
2102 }
2103
2104 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2105 return false;
2106 }
2107
2108 if (type.getScalarSizeInBits() == 16) {
2110 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2111 type, AsmParser->hasInv2PiInlineImm());
2112 }
2113
2115 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2116 AsmParser->hasInv2PiInlineImm());
2117}
2118
2119bool AMDGPUOperand::isLiteralImm(MVT type) const {
2120 // Check that this immediate can be added as literal
2121 if (!isImmTy(ImmTyNone)) {
2122 return false;
2123 }
2124
2125 if (!Imm.IsFPImm) {
2126 // We got int literal token.
2127
2128 if (type == MVT::f64 && hasFPModifiers()) {
2129 // Cannot apply fp modifiers to int literals preserving the same semantics
2130 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2131 // disable these cases.
2132 return false;
2133 }
2134
2135 unsigned Size = type.getSizeInBits();
2136 if (Size == 64)
2137 Size = 32;
2138
2139 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2140 // types.
2141 return isSafeTruncation(Imm.Val, Size);
2142 }
2143
2144 // We got fp literal token
2145 if (type == MVT::f64) { // Expected 64-bit fp operand
2146 // We would set low 64-bits of literal to zeroes but we accept this literals
2147 return true;
2148 }
2149
2150 if (type == MVT::i64) { // Expected 64-bit int operand
2151 // We don't allow fp literals in 64-bit integer instructions. It is
2152 // unclear how we should encode them.
2153 return false;
2154 }
2155
2156 // We allow fp literals with f16x2 operands assuming that the specified
2157 // literal goes into the lower half and the upper half is zero. We also
2158 // require that the literal may be losslessly converted to f16.
2159 //
2160 // For i16x2 operands, we assume that the specified literal is encoded as a
2161 // single-precision float. This is pretty odd, but it matches SP3 and what
2162 // happens in hardware.
2163 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2164 : (type == MVT::v2i16) ? MVT::f32
2165 : (type == MVT::v2f32) ? MVT::f32
2166 : type;
2167
2168 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2169 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2170}
2171
2172bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2173 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2174}
2175
2176bool AMDGPUOperand::isVRegWithInputMods() const {
2177 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2178 // GFX90A allows DPP on 64-bit operands.
2179 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2180 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2181}
2182
2183template <bool IsFake16>
2184bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2185 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2186 : AMDGPU::VGPR_16_Lo128RegClassID);
2187}
2188
2189template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2190 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2191 : AMDGPU::VGPR_16RegClassID);
2192}
2193
2194bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2195 if (AsmParser->isVI())
2196 return isVReg32();
2197 if (AsmParser->isGFX9Plus())
2198 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2199 return false;
2200}
2201
2202bool AMDGPUOperand::isSDWAFP16Operand() const {
2203 return isSDWAOperand(MVT::f16);
2204}
2205
2206bool AMDGPUOperand::isSDWAFP32Operand() const {
2207 return isSDWAOperand(MVT::f32);
2208}
2209
2210bool AMDGPUOperand::isSDWAInt16Operand() const {
2211 return isSDWAOperand(MVT::i16);
2212}
2213
2214bool AMDGPUOperand::isSDWAInt32Operand() const {
2215 return isSDWAOperand(MVT::i32);
2216}
2217
2218bool AMDGPUOperand::isBoolReg() const {
2219 auto FB = AsmParser->getFeatureBits();
2220 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2221 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2222}
2223
2224uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2225{
2226 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2227 assert(Size == 2 || Size == 4 || Size == 8);
2228
2229 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2230
2231 if (Imm.Mods.Abs) {
2232 Val &= ~FpSignMask;
2233 }
2234 if (Imm.Mods.Neg) {
2235 Val ^= FpSignMask;
2236 }
2237
2238 return Val;
2239}
2240
2241void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2242 if (isExpr()) {
2244 return;
2245 }
2246
2247 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2248 Inst.getNumOperands())) {
2249 addLiteralImmOperand(Inst, Imm.Val,
2250 ApplyModifiers &
2251 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2252 } else {
2253 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2255 setImmKindNone();
2256 }
2257}
2258
2259void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2260 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2261 auto OpNum = Inst.getNumOperands();
2262 // Check that this operand accepts literals
2263 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2264
2265 if (ApplyModifiers) {
2266 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2267 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2268 Val = applyInputFPModifiers(Val, Size);
2269 }
2270
2271 APInt Literal(64, Val);
2272 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2273
2274 if (Imm.IsFPImm) { // We got fp literal token
2275 switch (OpTy) {
2281 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2282 AsmParser->hasInv2PiInlineImm())) {
2283 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2284 setImmKindConst();
2285 return;
2286 }
2287
2288 // Non-inlineable
2289 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2290 // For fp operands we check if low 32 bits are zeros
2291 if (Literal.getLoBits(32) != 0) {
2292 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2293 "Can't encode literal as exact 64-bit floating-point operand. "
2294 "Low 32-bits will be set to zero");
2295 Val &= 0xffffffff00000000u;
2296 }
2297
2299 setImmKindLiteral();
2300 return;
2301 }
2302
2303 // We don't allow fp literals in 64-bit integer instructions. It is
2304 // unclear how we should encode them. This case should be checked earlier
2305 // in predicate methods (isLiteralImm())
2306 llvm_unreachable("fp literal in 64-bit integer instruction.");
2307
2315 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2316 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2317 // loss of precision. The constant represents ideomatic fp32 value of
2318 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2319 // bits. Prevent rounding below.
2320 Inst.addOperand(MCOperand::createImm(0x3e22));
2321 setImmKindLiteral();
2322 return;
2323 }
2324 [[fallthrough]];
2325
2353 bool lost;
2354 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2355 // Convert literal to single precision
2356 FPLiteral.convert(*getOpFltSemantics(OpTy),
2357 APFloat::rmNearestTiesToEven, &lost);
2358 // We allow precision lost but not overflow or underflow. This should be
2359 // checked earlier in isLiteralImm()
2360
2361 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2362 Inst.addOperand(MCOperand::createImm(ImmVal));
2363 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2364 setImmKindMandatoryLiteral();
2365 } else {
2366 setImmKindLiteral();
2367 }
2368 return;
2369 }
2370 default:
2371 llvm_unreachable("invalid operand size");
2372 }
2373
2374 return;
2375 }
2376
2377 // We got int literal token.
2378 // Only sign extend inline immediates.
2379 switch (OpTy) {
2395 if (isSafeTruncation(Val, 32) &&
2396 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2397 AsmParser->hasInv2PiInlineImm())) {
2399 setImmKindConst();
2400 return;
2401 }
2402
2404 setImmKindLiteral();
2405 return;
2406
2412 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2414 setImmKindConst();
2415 return;
2416 }
2417
2418 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2419 : Lo_32(Val);
2420
2422 setImmKindLiteral();
2423 return;
2424
2428 if (isSafeTruncation(Val, 16) &&
2429 AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2431 setImmKindConst();
2432 return;
2433 }
2434
2435 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2436 setImmKindLiteral();
2437 return;
2438
2443 if (isSafeTruncation(Val, 16) &&
2444 AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2445 AsmParser->hasInv2PiInlineImm())) {
2447 setImmKindConst();
2448 return;
2449 }
2450
2451 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2452 setImmKindLiteral();
2453 return;
2454
2459 if (isSafeTruncation(Val, 16) &&
2460 AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2461 AsmParser->hasInv2PiInlineImm())) {
2463 setImmKindConst();
2464 return;
2465 }
2466
2467 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2468 setImmKindLiteral();
2469 return;
2470
2473 assert(isSafeTruncation(Val, 16));
2474 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2476 return;
2477 }
2480 assert(isSafeTruncation(Val, 16));
2481 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2482 AsmParser->hasInv2PiInlineImm()));
2483
2485 return;
2486 }
2487
2490 assert(isSafeTruncation(Val, 16));
2491 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2492 AsmParser->hasInv2PiInlineImm()));
2493
2495 return;
2496 }
2497
2499 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2500 setImmKindMandatoryLiteral();
2501 return;
2503 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2504 setImmKindMandatoryLiteral();
2505 return;
2506 default:
2507 llvm_unreachable("invalid operand size");
2508 }
2509}
2510
2511void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2512 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2513}
2514
2515bool AMDGPUOperand::isInlineValue() const {
2516 return isRegKind() && ::isInlineValue(getReg());
2517}
2518
2519//===----------------------------------------------------------------------===//
2520// AsmParser
2521//===----------------------------------------------------------------------===//
2522
2523void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2524 // TODO: make those pre-defined variables read-only.
2525 // Currently there is none suitable machinery in the core llvm-mc for this.
2526 // MCSymbol::isRedefinable is intended for another purpose, and
2527 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2528 MCContext &Ctx = getContext();
2529 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2530 Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
2531}
2532
2533static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2534 if (Is == IS_VGPR) {
2535 switch (RegWidth) {
2536 default: return -1;
2537 case 32:
2538 return AMDGPU::VGPR_32RegClassID;
2539 case 64:
2540 return AMDGPU::VReg_64RegClassID;
2541 case 96:
2542 return AMDGPU::VReg_96RegClassID;
2543 case 128:
2544 return AMDGPU::VReg_128RegClassID;
2545 case 160:
2546 return AMDGPU::VReg_160RegClassID;
2547 case 192:
2548 return AMDGPU::VReg_192RegClassID;
2549 case 224:
2550 return AMDGPU::VReg_224RegClassID;
2551 case 256:
2552 return AMDGPU::VReg_256RegClassID;
2553 case 288:
2554 return AMDGPU::VReg_288RegClassID;
2555 case 320:
2556 return AMDGPU::VReg_320RegClassID;
2557 case 352:
2558 return AMDGPU::VReg_352RegClassID;
2559 case 384:
2560 return AMDGPU::VReg_384RegClassID;
2561 case 512:
2562 return AMDGPU::VReg_512RegClassID;
2563 case 1024:
2564 return AMDGPU::VReg_1024RegClassID;
2565 }
2566 } else if (Is == IS_TTMP) {
2567 switch (RegWidth) {
2568 default: return -1;
2569 case 32:
2570 return AMDGPU::TTMP_32RegClassID;
2571 case 64:
2572 return AMDGPU::TTMP_64RegClassID;
2573 case 128:
2574 return AMDGPU::TTMP_128RegClassID;
2575 case 256:
2576 return AMDGPU::TTMP_256RegClassID;
2577 case 512:
2578 return AMDGPU::TTMP_512RegClassID;
2579 }
2580 } else if (Is == IS_SGPR) {
2581 switch (RegWidth) {
2582 default: return -1;
2583 case 32:
2584 return AMDGPU::SGPR_32RegClassID;
2585 case 64:
2586 return AMDGPU::SGPR_64RegClassID;
2587 case 96:
2588 return AMDGPU::SGPR_96RegClassID;
2589 case 128:
2590 return AMDGPU::SGPR_128RegClassID;
2591 case 160:
2592 return AMDGPU::SGPR_160RegClassID;
2593 case 192:
2594 return AMDGPU::SGPR_192RegClassID;
2595 case 224:
2596 return AMDGPU::SGPR_224RegClassID;
2597 case 256:
2598 return AMDGPU::SGPR_256RegClassID;
2599 case 288:
2600 return AMDGPU::SGPR_288RegClassID;
2601 case 320:
2602 return AMDGPU::SGPR_320RegClassID;
2603 case 352:
2604 return AMDGPU::SGPR_352RegClassID;
2605 case 384:
2606 return AMDGPU::SGPR_384RegClassID;
2607 case 512:
2608 return AMDGPU::SGPR_512RegClassID;
2609 }
2610 } else if (Is == IS_AGPR) {
2611 switch (RegWidth) {
2612 default: return -1;
2613 case 32:
2614 return AMDGPU::AGPR_32RegClassID;
2615 case 64:
2616 return AMDGPU::AReg_64RegClassID;
2617 case 96:
2618 return AMDGPU::AReg_96RegClassID;
2619 case 128:
2620 return AMDGPU::AReg_128RegClassID;
2621 case 160:
2622 return AMDGPU::AReg_160RegClassID;
2623 case 192:
2624 return AMDGPU::AReg_192RegClassID;
2625 case 224:
2626 return AMDGPU::AReg_224RegClassID;
2627 case 256:
2628 return AMDGPU::AReg_256RegClassID;
2629 case 288:
2630 return AMDGPU::AReg_288RegClassID;
2631 case 320:
2632 return AMDGPU::AReg_320RegClassID;
2633 case 352:
2634 return AMDGPU::AReg_352RegClassID;
2635 case 384:
2636 return AMDGPU::AReg_384RegClassID;
2637 case 512:
2638 return AMDGPU::AReg_512RegClassID;
2639 case 1024:
2640 return AMDGPU::AReg_1024RegClassID;
2641 }
2642 }
2643 return -1;
2644}
2645
2648 .Case("exec", AMDGPU::EXEC)
2649 .Case("vcc", AMDGPU::VCC)
2650 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2651 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2652 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2653 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2654 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2655 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2656 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2657 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2658 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2659 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2660 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2661 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2662 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2663 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2664 .Case("m0", AMDGPU::M0)
2665 .Case("vccz", AMDGPU::SRC_VCCZ)
2666 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2667 .Case("execz", AMDGPU::SRC_EXECZ)
2668 .Case("src_execz", AMDGPU::SRC_EXECZ)
2669 .Case("scc", AMDGPU::SRC_SCC)
2670 .Case("src_scc", AMDGPU::SRC_SCC)
2671 .Case("tba", AMDGPU::TBA)
2672 .Case("tma", AMDGPU::TMA)
2673 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2674 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2675 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2676 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2677 .Case("vcc_lo", AMDGPU::VCC_LO)
2678 .Case("vcc_hi", AMDGPU::VCC_HI)
2679 .Case("exec_lo", AMDGPU::EXEC_LO)
2680 .Case("exec_hi", AMDGPU::EXEC_HI)
2681 .Case("tma_lo", AMDGPU::TMA_LO)
2682 .Case("tma_hi", AMDGPU::TMA_HI)
2683 .Case("tba_lo", AMDGPU::TBA_LO)
2684 .Case("tba_hi", AMDGPU::TBA_HI)
2685 .Case("pc", AMDGPU::PC_REG)
2686 .Case("null", AMDGPU::SGPR_NULL)
2687 .Default(AMDGPU::NoRegister);
2688}
2689
2690bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2691 SMLoc &EndLoc, bool RestoreOnFailure) {
2692 auto R = parseRegister();
2693 if (!R) return true;
2694 assert(R->isReg());
2695 RegNo = R->getReg();
2696 StartLoc = R->getStartLoc();
2697 EndLoc = R->getEndLoc();
2698 return false;
2699}
2700
2701bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2702 SMLoc &EndLoc) {
2703 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2704}
2705
2706ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2707 SMLoc &EndLoc) {
2708 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2709 bool PendingErrors = getParser().hasPendingError();
2710 getParser().clearPendingErrors();
2711 if (PendingErrors)
2712 return ParseStatus::Failure;
2713 if (Result)
2714 return ParseStatus::NoMatch;
2715 return ParseStatus::Success;
2716}
2717
2718bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2719 RegisterKind RegKind,
2720 MCRegister Reg1, SMLoc Loc) {
2721 switch (RegKind) {
2722 case IS_SPECIAL:
2723 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2724 Reg = AMDGPU::EXEC;
2725 RegWidth = 64;
2726 return true;
2727 }
2728 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2729 Reg = AMDGPU::FLAT_SCR;
2730 RegWidth = 64;
2731 return true;
2732 }
2733 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2734 Reg = AMDGPU::XNACK_MASK;
2735 RegWidth = 64;
2736 return true;
2737 }
2738 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2739 Reg = AMDGPU::VCC;
2740 RegWidth = 64;
2741 return true;
2742 }
2743 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2744 Reg = AMDGPU::TBA;
2745 RegWidth = 64;
2746 return true;
2747 }
2748 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2749 Reg = AMDGPU::TMA;
2750 RegWidth = 64;
2751 return true;
2752 }
2753 Error(Loc, "register does not fit in the list");
2754 return false;
2755 case IS_VGPR:
2756 case IS_SGPR:
2757 case IS_AGPR:
2758 case IS_TTMP:
2759 if (Reg1 != Reg + RegWidth / 32) {
2760 Error(Loc, "registers in a list must have consecutive indices");
2761 return false;
2762 }
2763 RegWidth += 32;
2764 return true;
2765 default:
2766 llvm_unreachable("unexpected register kind");
2767 }
2768}
2769
2770struct RegInfo {
2772 RegisterKind Kind;
2773};
2774
2775static constexpr RegInfo RegularRegisters[] = {
2776 {{"v"}, IS_VGPR},
2777 {{"s"}, IS_SGPR},
2778 {{"ttmp"}, IS_TTMP},
2779 {{"acc"}, IS_AGPR},
2780 {{"a"}, IS_AGPR},
2781};
2782
2783static bool isRegularReg(RegisterKind Kind) {
2784 return Kind == IS_VGPR ||
2785 Kind == IS_SGPR ||
2786 Kind == IS_TTMP ||
2787 Kind == IS_AGPR;
2788}
2789
2791 for (const RegInfo &Reg : RegularRegisters)
2792 if (Str.starts_with(Reg.Name))
2793 return &Reg;
2794 return nullptr;
2795}
2796
2797static bool getRegNum(StringRef Str, unsigned& Num) {
2798 return !Str.getAsInteger(10, Num);
2799}
2800
2801bool
2802AMDGPUAsmParser::isRegister(const AsmToken &Token,
2803 const AsmToken &NextToken) const {
2804
2805 // A list of consecutive registers: [s0,s1,s2,s3]
2806 if (Token.is(AsmToken::LBrac))
2807 return true;
2808
2809 if (!Token.is(AsmToken::Identifier))
2810 return false;
2811
2812 // A single register like s0 or a range of registers like s[0:1]
2813
2814 StringRef Str = Token.getString();
2815 const RegInfo *Reg = getRegularRegInfo(Str);
2816 if (Reg) {
2817 StringRef RegName = Reg->Name;
2818 StringRef RegSuffix = Str.substr(RegName.size());
2819 if (!RegSuffix.empty()) {
2820 RegSuffix.consume_back(".l");
2821 RegSuffix.consume_back(".h");
2822 unsigned Num;
2823 // A single register with an index: rXX
2824 if (getRegNum(RegSuffix, Num))
2825 return true;
2826 } else {
2827 // A range of registers: r[XX:YY].
2828 if (NextToken.is(AsmToken::LBrac))
2829 return true;
2830 }
2831 }
2832
2833 return getSpecialRegForName(Str).isValid();
2834}
2835
2836bool
2837AMDGPUAsmParser::isRegister()
2838{
2839 return isRegister(getToken(), peekToken());
2840}
2841
2842MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2843 unsigned SubReg, unsigned RegWidth,
2844 SMLoc Loc) {
2845 assert(isRegularReg(RegKind));
2846
2847 unsigned AlignSize = 1;
2848 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2849 // SGPR and TTMP registers must be aligned.
2850 // Max required alignment is 4 dwords.
2851 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2852 }
2853
2854 if (RegNum % AlignSize != 0) {
2855 Error(Loc, "invalid register alignment");
2856 return MCRegister();
2857 }
2858
2859 unsigned RegIdx = RegNum / AlignSize;
2860 int RCID = getRegClass(RegKind, RegWidth);
2861 if (RCID == -1) {
2862 Error(Loc, "invalid or unsupported register size");
2863 return MCRegister();
2864 }
2865
2866 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2867 const MCRegisterClass RC = TRI->getRegClass(RCID);
2868 if (RegIdx >= RC.getNumRegs()) {
2869 Error(Loc, "register index is out of range");
2870 return MCRegister();
2871 }
2872
2873 MCRegister Reg = RC.getRegister(RegIdx);
2874
2875 if (SubReg) {
2876 Reg = TRI->getSubReg(Reg, SubReg);
2877
2878 // Currently all regular registers have their .l and .h subregisters, so
2879 // we should never need to generate an error here.
2880 assert(Reg && "Invalid subregister!");
2881 }
2882
2883 return Reg;
2884}
2885
2886bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2887 int64_t RegLo, RegHi;
2888 if (!skipToken(AsmToken::LBrac, "missing register index"))
2889 return false;
2890
2891 SMLoc FirstIdxLoc = getLoc();
2892 SMLoc SecondIdxLoc;
2893
2894 if (!parseExpr(RegLo))
2895 return false;
2896
2897 if (trySkipToken(AsmToken::Colon)) {
2898 SecondIdxLoc = getLoc();
2899 if (!parseExpr(RegHi))
2900 return false;
2901 } else {
2902 RegHi = RegLo;
2903 }
2904
2905 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2906 return false;
2907
2908 if (!isUInt<32>(RegLo)) {
2909 Error(FirstIdxLoc, "invalid register index");
2910 return false;
2911 }
2912
2913 if (!isUInt<32>(RegHi)) {
2914 Error(SecondIdxLoc, "invalid register index");
2915 return false;
2916 }
2917
2918 if (RegLo > RegHi) {
2919 Error(FirstIdxLoc, "first register index should not exceed second index");
2920 return false;
2921 }
2922
2923 Num = static_cast<unsigned>(RegLo);
2924 RegWidth = 32 * ((RegHi - RegLo) + 1);
2925 return true;
2926}
2927
2928MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2929 unsigned &RegNum,
2930 unsigned &RegWidth,
2931 SmallVectorImpl<AsmToken> &Tokens) {
2932 assert(isToken(AsmToken::Identifier));
2933 MCRegister Reg = getSpecialRegForName(getTokenStr());
2934 if (Reg) {
2935 RegNum = 0;
2936 RegWidth = 32;
2937 RegKind = IS_SPECIAL;
2938 Tokens.push_back(getToken());
2939 lex(); // skip register name
2940 }
2941 return Reg;
2942}
2943
2944MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2945 unsigned &RegNum,
2946 unsigned &RegWidth,
2947 SmallVectorImpl<AsmToken> &Tokens) {
2948 assert(isToken(AsmToken::Identifier));
2949 StringRef RegName = getTokenStr();
2950 auto Loc = getLoc();
2951
2952 const RegInfo *RI = getRegularRegInfo(RegName);
2953 if (!RI) {
2954 Error(Loc, "invalid register name");
2955 return MCRegister();
2956 }
2957
2958 Tokens.push_back(getToken());
2959 lex(); // skip register name
2960
2961 RegKind = RI->Kind;
2962 StringRef RegSuffix = RegName.substr(RI->Name.size());
2963 unsigned SubReg = NoSubRegister;
2964 if (!RegSuffix.empty()) {
2965 if (RegSuffix.consume_back(".l"))
2966 SubReg = AMDGPU::lo16;
2967 else if (RegSuffix.consume_back(".h"))
2968 SubReg = AMDGPU::hi16;
2969
2970 // Single 32-bit register: vXX.
2971 if (!getRegNum(RegSuffix, RegNum)) {
2972 Error(Loc, "invalid register index");
2973 return MCRegister();
2974 }
2975 RegWidth = 32;
2976 } else {
2977 // Range of registers: v[XX:YY]. ":YY" is optional.
2978 if (!ParseRegRange(RegNum, RegWidth))
2979 return MCRegister();
2980 }
2981
2982 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2983}
2984
2985MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
2986 unsigned &RegNum, unsigned &RegWidth,
2987 SmallVectorImpl<AsmToken> &Tokens) {
2989 auto ListLoc = getLoc();
2990
2991 if (!skipToken(AsmToken::LBrac,
2992 "expected a register or a list of registers")) {
2993 return MCRegister();
2994 }
2995
2996 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2997
2998 auto Loc = getLoc();
2999 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3000 return MCRegister();
3001 if (RegWidth != 32) {
3002 Error(Loc, "expected a single 32-bit register");
3003 return MCRegister();
3004 }
3005
3006 for (; trySkipToken(AsmToken::Comma); ) {
3007 RegisterKind NextRegKind;
3008 MCRegister NextReg;
3009 unsigned NextRegNum, NextRegWidth;
3010 Loc = getLoc();
3011
3012 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3013 NextRegNum, NextRegWidth,
3014 Tokens)) {
3015 return MCRegister();
3016 }
3017 if (NextRegWidth != 32) {
3018 Error(Loc, "expected a single 32-bit register");
3019 return MCRegister();
3020 }
3021 if (NextRegKind != RegKind) {
3022 Error(Loc, "registers in a list must be of the same kind");
3023 return MCRegister();
3024 }
3025 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3026 return MCRegister();
3027 }
3028
3029 if (!skipToken(AsmToken::RBrac,
3030 "expected a comma or a closing square bracket")) {
3031 return MCRegister();
3032 }
3033
3034 if (isRegularReg(RegKind))
3035 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3036
3037 return Reg;
3038}
3039
3040bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3041 MCRegister &Reg, unsigned &RegNum,
3042 unsigned &RegWidth,
3043 SmallVectorImpl<AsmToken> &Tokens) {
3044 auto Loc = getLoc();
3045 Reg = MCRegister();
3046
3047 if (isToken(AsmToken::Identifier)) {
3048 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3049 if (!Reg)
3050 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3051 } else {
3052 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3053 }
3054
3055 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3056 if (!Reg) {
3057 assert(Parser.hasPendingError());
3058 return false;
3059 }
3060
3061 if (!subtargetHasRegister(*TRI, Reg)) {
3062 if (Reg == AMDGPU::SGPR_NULL) {
3063 Error(Loc, "'null' operand is not supported on this GPU");
3064 } else {
3066 " register not available on this GPU");
3067 }
3068 return false;
3069 }
3070
3071 return true;
3072}
3073
3074bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3075 MCRegister &Reg, unsigned &RegNum,
3076 unsigned &RegWidth,
3077 bool RestoreOnFailure /*=false*/) {
3078 Reg = MCRegister();
3079
3081 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3082 if (RestoreOnFailure) {
3083 while (!Tokens.empty()) {
3084 getLexer().UnLex(Tokens.pop_back_val());
3085 }
3086 }
3087 return true;
3088 }
3089 return false;
3090}
3091
3092std::optional<StringRef>
3093AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3094 switch (RegKind) {
3095 case IS_VGPR:
3096 return StringRef(".amdgcn.next_free_vgpr");
3097 case IS_SGPR:
3098 return StringRef(".amdgcn.next_free_sgpr");
3099 default:
3100 return std::nullopt;
3101 }
3102}
3103
3104void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3105 auto SymbolName = getGprCountSymbolName(RegKind);
3106 assert(SymbolName && "initializing invalid register kind");
3107 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3108 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
3109}
3110
3111bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3112 unsigned DwordRegIndex,
3113 unsigned RegWidth) {
3114 // Symbols are only defined for GCN targets
3115 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3116 return true;
3117
3118 auto SymbolName = getGprCountSymbolName(RegKind);
3119 if (!SymbolName)
3120 return true;
3121 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3122
3123 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3124 int64_t OldCount;
3125
3126 if (!Sym->isVariable())
3127 return !Error(getLoc(),
3128 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3129 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
3130 return !Error(
3131 getLoc(),
3132 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3133
3134 if (OldCount <= NewMax)
3135 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
3136
3137 return true;
3138}
3139
3140std::unique_ptr<AMDGPUOperand>
3141AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3142 const auto &Tok = getToken();
3143 SMLoc StartLoc = Tok.getLoc();
3144 SMLoc EndLoc = Tok.getEndLoc();
3145 RegisterKind RegKind;
3147 unsigned RegNum, RegWidth;
3148
3149 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3150 return nullptr;
3151 }
3152 if (isHsaAbi(getSTI())) {
3153 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3154 return nullptr;
3155 } else
3156 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3157 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3158}
3159
3160ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3161 bool HasSP3AbsModifier, bool HasLit) {
3162 // TODO: add syntactic sugar for 1/(2*PI)
3163
3164 if (isRegister())
3165 return ParseStatus::NoMatch;
3166 assert(!isModifier());
3167
3168 if (!HasLit) {
3169 HasLit = trySkipId("lit");
3170 if (HasLit) {
3171 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3172 return ParseStatus::Failure;
3173 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
3174 if (S.isSuccess() &&
3175 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3176 return ParseStatus::Failure;
3177 return S;
3178 }
3179 }
3180
3181 const auto& Tok = getToken();
3182 const auto& NextTok = peekToken();
3183 bool IsReal = Tok.is(AsmToken::Real);
3184 SMLoc S = getLoc();
3185 bool Negate = false;
3186
3187 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3188 lex();
3189 IsReal = true;
3190 Negate = true;
3191 }
3192
3193 AMDGPUOperand::Modifiers Mods;
3194 Mods.Lit = HasLit;
3195
3196 if (IsReal) {
3197 // Floating-point expressions are not supported.
3198 // Can only allow floating-point literals with an
3199 // optional sign.
3200
3201 StringRef Num = getTokenStr();
3202 lex();
3203
3204 APFloat RealVal(APFloat::IEEEdouble());
3205 auto roundMode = APFloat::rmNearestTiesToEven;
3206 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3207 return ParseStatus::Failure;
3208 if (Negate)
3209 RealVal.changeSign();
3210
3211 Operands.push_back(
3212 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3213 AMDGPUOperand::ImmTyNone, true));
3214 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3215 Op.setModifiers(Mods);
3216
3217 return ParseStatus::Success;
3218
3219 } else {
3220 int64_t IntVal;
3221 const MCExpr *Expr;
3222 SMLoc S = getLoc();
3223
3224 if (HasSP3AbsModifier) {
3225 // This is a workaround for handling expressions
3226 // as arguments of SP3 'abs' modifier, for example:
3227 // |1.0|
3228 // |-1|
3229 // |1+x|
3230 // This syntax is not compatible with syntax of standard
3231 // MC expressions (due to the trailing '|').
3232 SMLoc EndLoc;
3233 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3234 return ParseStatus::Failure;
3235 } else {
3236 if (Parser.parseExpression(Expr))
3237 return ParseStatus::Failure;
3238 }
3239
3240 if (Expr->evaluateAsAbsolute(IntVal)) {
3241 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3242 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3243 Op.setModifiers(Mods);
3244 } else {
3245 if (HasLit)
3246 return ParseStatus::NoMatch;
3247 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3248 }
3249
3250 return ParseStatus::Success;
3251 }
3252
3253 return ParseStatus::NoMatch;
3254}
3255
3256ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3257 if (!isRegister())
3258 return ParseStatus::NoMatch;
3259
3260 if (auto R = parseRegister()) {
3261 assert(R->isReg());
3262 Operands.push_back(std::move(R));
3263 return ParseStatus::Success;
3264 }
3265 return ParseStatus::Failure;
3266}
3267
3268ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3269 bool HasSP3AbsMod, bool HasLit) {
3270 ParseStatus Res = parseReg(Operands);
3271 if (!Res.isNoMatch())
3272 return Res;
3273 if (isModifier())
3274 return ParseStatus::NoMatch;
3275 return parseImm(Operands, HasSP3AbsMod, HasLit);
3276}
3277
3278bool
3279AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3280 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3281 const auto &str = Token.getString();
3282 return str == "abs" || str == "neg" || str == "sext";
3283 }
3284 return false;
3285}
3286
3287bool
3288AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3289 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3290}
3291
3292bool
3293AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3294 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3295}
3296
3297bool
3298AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3299 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3300}
3301
3302// Check if this is an operand modifier or an opcode modifier
3303// which may look like an expression but it is not. We should
3304// avoid parsing these modifiers as expressions. Currently
3305// recognized sequences are:
3306// |...|
3307// abs(...)
3308// neg(...)
3309// sext(...)
3310// -reg
3311// -|...|
3312// -abs(...)
3313// name:...
3314//
3315bool
3316AMDGPUAsmParser::isModifier() {
3317
3318 AsmToken Tok = getToken();
3319 AsmToken NextToken[2];
3320 peekTokens(NextToken);
3321
3322 return isOperandModifier(Tok, NextToken[0]) ||
3323 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3324 isOpcodeModifierWithVal(Tok, NextToken[0]);
3325}
3326
3327// Check if the current token is an SP3 'neg' modifier.
3328// Currently this modifier is allowed in the following context:
3329//
3330// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3331// 2. Before an 'abs' modifier: -abs(...)
3332// 3. Before an SP3 'abs' modifier: -|...|
3333//
3334// In all other cases "-" is handled as a part
3335// of an expression that follows the sign.
3336//
3337// Note: When "-" is followed by an integer literal,
3338// this is interpreted as integer negation rather
3339// than a floating-point NEG modifier applied to N.
3340// Beside being contr-intuitive, such use of floating-point
3341// NEG modifier would have resulted in different meaning
3342// of integer literals used with VOP1/2/C and VOP3,
3343// for example:
3344// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3345// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3346// Negative fp literals with preceding "-" are
3347// handled likewise for uniformity
3348//
3349bool
3350AMDGPUAsmParser::parseSP3NegModifier() {
3351
3352 AsmToken NextToken[2];
3353 peekTokens(NextToken);
3354
3355 if (isToken(AsmToken::Minus) &&
3356 (isRegister(NextToken[0], NextToken[1]) ||
3357 NextToken[0].is(AsmToken::Pipe) ||
3358 isId(NextToken[0], "abs"))) {
3359 lex();
3360 return true;
3361 }
3362
3363 return false;
3364}
3365
3367AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3368 bool AllowImm) {
3369 bool Neg, SP3Neg;
3370 bool Abs, SP3Abs;
3371 bool Lit;
3372 SMLoc Loc;
3373
3374 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3375 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3376 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3377
3378 SP3Neg = parseSP3NegModifier();
3379
3380 Loc = getLoc();
3381 Neg = trySkipId("neg");
3382 if (Neg && SP3Neg)
3383 return Error(Loc, "expected register or immediate");
3384 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3385 return ParseStatus::Failure;
3386
3387 Abs = trySkipId("abs");
3388 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3389 return ParseStatus::Failure;
3390
3391 Lit = trySkipId("lit");
3392 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3393 return ParseStatus::Failure;
3394
3395 Loc = getLoc();
3396 SP3Abs = trySkipToken(AsmToken::Pipe);
3397 if (Abs && SP3Abs)
3398 return Error(Loc, "expected register or immediate");
3399
3400 ParseStatus Res;
3401 if (AllowImm) {
3402 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3403 } else {
3404 Res = parseReg(Operands);
3405 }
3406 if (!Res.isSuccess())
3407 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3408
3409 if (Lit && !Operands.back()->isImm())
3410 Error(Loc, "expected immediate with lit modifier");
3411
3412 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3413 return ParseStatus::Failure;
3414 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3415 return ParseStatus::Failure;
3416 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3417 return ParseStatus::Failure;
3418 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3419 return ParseStatus::Failure;
3420
3421 AMDGPUOperand::Modifiers Mods;
3422 Mods.Abs = Abs || SP3Abs;
3423 Mods.Neg = Neg || SP3Neg;
3424 Mods.Lit = Lit;
3425
3426 if (Mods.hasFPModifiers() || Lit) {
3427 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3428 if (Op.isExpr())
3429 return Error(Op.getStartLoc(), "expected an absolute expression");
3430 Op.setModifiers(Mods);
3431 }
3432 return ParseStatus::Success;
3433}
3434
3436AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3437 bool AllowImm) {
3438 bool Sext = trySkipId("sext");
3439 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3440 return ParseStatus::Failure;
3441
3442 ParseStatus Res;
3443 if (AllowImm) {
3444 Res = parseRegOrImm(Operands);
3445 } else {
3446 Res = parseReg(Operands);
3447 }
3448 if (!Res.isSuccess())
3449 return Sext ? ParseStatus::Failure : Res;
3450
3451 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3452 return ParseStatus::Failure;
3453
3454 AMDGPUOperand::Modifiers Mods;
3455 Mods.Sext = Sext;
3456
3457 if (Mods.hasIntModifiers()) {
3458 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3459 if (Op.isExpr())
3460 return Error(Op.getStartLoc(), "expected an absolute expression");
3461 Op.setModifiers(Mods);
3462 }
3463
3464 return ParseStatus::Success;
3465}
3466
3467ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3468 return parseRegOrImmWithFPInputMods(Operands, false);
3469}
3470
3471ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3472 return parseRegOrImmWithIntInputMods(Operands, false);
3473}
3474
3475ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3476 auto Loc = getLoc();
3477 if (trySkipId("off")) {
3478 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3479 AMDGPUOperand::ImmTyOff, false));
3480 return ParseStatus::Success;
3481 }
3482
3483 if (!isRegister())
3484 return ParseStatus::NoMatch;
3485
3486 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3487 if (Reg) {
3488 Operands.push_back(std::move(Reg));
3489 return ParseStatus::Success;
3490 }
3491
3492 return ParseStatus::Failure;
3493}
3494
3495unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3496 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3497
3498 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3499 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3500 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3501 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3502 return Match_InvalidOperand;
3503
3504 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3505 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3506 // v_mac_f32/16 allow only dst_sel == DWORD;
3507 auto OpNum =
3508 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3509 const auto &Op = Inst.getOperand(OpNum);
3510 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3511 return Match_InvalidOperand;
3512 }
3513 }
3514
3515 return Match_Success;
3516}
3517
3519 static const unsigned Variants[] = {
3523 };
3524
3525 return ArrayRef(Variants);
3526}
3527
3528// What asm variants we should check
3529ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3530 if (isForcedDPP() && isForcedVOP3()) {
3531 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3532 return ArrayRef(Variants);
3533 }
3534 if (getForcedEncodingSize() == 32) {
3535 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3536 return ArrayRef(Variants);
3537 }
3538
3539 if (isForcedVOP3()) {
3540 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3541 return ArrayRef(Variants);
3542 }
3543
3544 if (isForcedSDWA()) {
3545 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3547 return ArrayRef(Variants);
3548 }
3549
3550 if (isForcedDPP()) {
3551 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3552 return ArrayRef(Variants);
3553 }
3554
3555 return getAllVariants();
3556}
3557
3558StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3559 if (isForcedDPP() && isForcedVOP3())
3560 return "e64_dpp";
3561
3562 if (getForcedEncodingSize() == 32)
3563 return "e32";
3564
3565 if (isForcedVOP3())
3566 return "e64";
3567
3568 if (isForcedSDWA())
3569 return "sdwa";
3570
3571 if (isForcedDPP())
3572 return "dpp";
3573
3574 return "";
3575}
3576
3577unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3578 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3579 for (MCPhysReg Reg : Desc.implicit_uses()) {
3580 switch (Reg) {
3581 case AMDGPU::FLAT_SCR:
3582 case AMDGPU::VCC:
3583 case AMDGPU::VCC_LO:
3584 case AMDGPU::VCC_HI:
3585 case AMDGPU::M0:
3586 return Reg;
3587 default:
3588 break;
3589 }
3590 }
3591 return AMDGPU::NoRegister;
3592}
3593
3594// NB: This code is correct only when used to check constant
3595// bus limitations because GFX7 support no f16 inline constants.
3596// Note that there are no cases when a GFX7 opcode violates
3597// constant bus limitations due to the use of an f16 constant.
3598bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3599 unsigned OpIdx) const {
3600 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3601
3602 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3603 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3604 return false;
3605 }
3606
3607 const MCOperand &MO = Inst.getOperand(OpIdx);
3608
3609 int64_t Val = MO.getImm();
3610 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3611
3612 switch (OpSize) { // expected operand size
3613 case 8:
3614 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3615 case 4:
3616 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3617 case 2: {
3618 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3622 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3623
3628
3633
3638
3643 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3644
3649 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3650
3651 llvm_unreachable("invalid operand type");
3652 }
3653 default:
3654 llvm_unreachable("invalid operand size");
3655 }
3656}
3657
3658unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3659 if (!isGFX10Plus())
3660 return 1;
3661
3662 switch (Opcode) {
3663 // 64-bit shift instructions can use only one scalar value input
3664 case AMDGPU::V_LSHLREV_B64_e64:
3665 case AMDGPU::V_LSHLREV_B64_gfx10:
3666 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3667 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3668 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3669 case AMDGPU::V_LSHRREV_B64_e64:
3670 case AMDGPU::V_LSHRREV_B64_gfx10:
3671 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3672 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3673 case AMDGPU::V_ASHRREV_I64_e64:
3674 case AMDGPU::V_ASHRREV_I64_gfx10:
3675 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3676 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3677 case AMDGPU::V_LSHL_B64_e64:
3678 case AMDGPU::V_LSHR_B64_e64:
3679 case AMDGPU::V_ASHR_I64_e64:
3680 return 1;
3681 default:
3682 return 2;
3683 }
3684}
3685
3686constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3688
3689// Get regular operand indices in the same order as specified
3690// in the instruction (but append mandatory literals to the end).
3692 bool AddMandatoryLiterals = false) {
3693
3694 int16_t ImmIdx =
3695 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3696
3697 if (isVOPD(Opcode)) {
3698 int16_t ImmDeferredIdx =
3699 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3700 : -1;
3701
3702 return {getNamedOperandIdx(Opcode, OpName::src0X),
3703 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3704 getNamedOperandIdx(Opcode, OpName::src0Y),
3705 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3706 ImmDeferredIdx,
3707 ImmIdx};
3708 }
3709
3710 return {getNamedOperandIdx(Opcode, OpName::src0),
3711 getNamedOperandIdx(Opcode, OpName::src1),
3712 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3713}
3714
3715bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3716 const MCOperand &MO = Inst.getOperand(OpIdx);
3717 if (MO.isImm())
3718 return !isInlineConstant(Inst, OpIdx);
3719 if (MO.isReg()) {
3720 auto Reg = MO.getReg();
3721 if (!Reg)
3722 return false;
3723 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3724 auto PReg = mc2PseudoReg(Reg);
3725 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3726 }
3727 return true;
3728}
3729
3730// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3731// Writelane is special in that it can use SGPR and M0 (which would normally
3732// count as using the constant bus twice - but in this case it is allowed since
3733// the lane selector doesn't count as a use of the constant bus). However, it is
3734// still required to abide by the 1 SGPR rule.
3735static bool checkWriteLane(const MCInst &Inst) {
3736 const unsigned Opcode = Inst.getOpcode();
3737 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3738 return false;
3739 const MCOperand &LaneSelOp = Inst.getOperand(2);
3740 if (!LaneSelOp.isReg())
3741 return false;
3742 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3743 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3744}
3745
3746bool AMDGPUAsmParser::validateConstantBusLimitations(
3747 const MCInst &Inst, const OperandVector &Operands) {
3748 const unsigned Opcode = Inst.getOpcode();
3749 const MCInstrDesc &Desc = MII.get(Opcode);
3750 MCRegister LastSGPR;
3751 unsigned ConstantBusUseCount = 0;
3752 unsigned NumLiterals = 0;
3753 unsigned LiteralSize;
3754
3755 if (!(Desc.TSFlags &
3758 !isVOPD(Opcode))
3759 return true;
3760
3761 if (checkWriteLane(Inst))
3762 return true;
3763
3764 // Check special imm operands (used by madmk, etc)
3765 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3766 ++NumLiterals;
3767 LiteralSize = 4;
3768 }
3769
3770 SmallDenseSet<unsigned> SGPRsUsed;
3771 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3772 if (SGPRUsed != AMDGPU::NoRegister) {
3773 SGPRsUsed.insert(SGPRUsed);
3774 ++ConstantBusUseCount;
3775 }
3776
3777 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3778
3779 for (int OpIdx : OpIndices) {
3780 if (OpIdx == -1)
3781 continue;
3782
3783 const MCOperand &MO = Inst.getOperand(OpIdx);
3784 if (usesConstantBus(Inst, OpIdx)) {
3785 if (MO.isReg()) {
3786 LastSGPR = mc2PseudoReg(MO.getReg());
3787 // Pairs of registers with a partial intersections like these
3788 // s0, s[0:1]
3789 // flat_scratch_lo, flat_scratch
3790 // flat_scratch_lo, flat_scratch_hi
3791 // are theoretically valid but they are disabled anyway.
3792 // Note that this code mimics SIInstrInfo::verifyInstruction
3793 if (SGPRsUsed.insert(LastSGPR).second) {
3794 ++ConstantBusUseCount;
3795 }
3796 } else { // Expression or a literal
3797
3798 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3799 continue; // special operand like VINTERP attr_chan
3800
3801 // An instruction may use only one literal.
3802 // This has been validated on the previous step.
3803 // See validateVOPLiteral.
3804 // This literal may be used as more than one operand.
3805 // If all these operands are of the same size,
3806 // this literal counts as one scalar value.
3807 // Otherwise it counts as 2 scalar values.
3808 // See "GFX10 Shader Programming", section 3.6.2.3.
3809
3810 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3811 if (Size < 4)
3812 Size = 4;
3813
3814 if (NumLiterals == 0) {
3815 NumLiterals = 1;
3816 LiteralSize = Size;
3817 } else if (LiteralSize != Size) {
3818 NumLiterals = 2;
3819 }
3820 }
3821 }
3822 }
3823 ConstantBusUseCount += NumLiterals;
3824
3825 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3826 return true;
3827
3828 SMLoc LitLoc = getLitLoc(Operands);
3829 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3830 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3831 Error(Loc, "invalid operand (violates constant bus restrictions)");
3832 return false;
3833}
3834
3835bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3836 const MCInst &Inst, const OperandVector &Operands) {
3837
3838 const unsigned Opcode = Inst.getOpcode();
3839 if (!isVOPD(Opcode))
3840 return true;
3841
3842 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3843
3844 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3845 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3846 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3847 ? Opr.getReg()
3848 : MCRegister();
3849 };
3850
3851 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3852 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3853
3854 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3855 auto InvalidCompOprIdx =
3856 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3857 if (!InvalidCompOprIdx)
3858 return true;
3859
3860 auto CompOprIdx = *InvalidCompOprIdx;
3861 auto ParsedIdx =
3862 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3863 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3864 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3865
3866 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3867 if (CompOprIdx == VOPD::Component::DST) {
3868 Error(Loc, "one dst register must be even and the other odd");
3869 } else {
3870 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3871 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3872 " operands must use different VGPR banks");
3873 }
3874
3875 return false;
3876}
3877
3878bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3879
3880 const unsigned Opc = Inst.getOpcode();
3881 const MCInstrDesc &Desc = MII.get(Opc);
3882
3883 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3884 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3885 assert(ClampIdx != -1);
3886 return Inst.getOperand(ClampIdx).getImm() == 0;
3887 }
3888
3889 return true;
3890}
3891
3894
3895bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3896 const SMLoc &IDLoc) {
3897
3898 const unsigned Opc = Inst.getOpcode();
3899 const MCInstrDesc &Desc = MII.get(Opc);
3900
3901 if ((Desc.TSFlags & MIMGFlags) == 0)
3902 return true;
3903
3904 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3905 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3906 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3907
3908 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
3909 return true;
3910
3911 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3912 return true;
3913
3914 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3915 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3916 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3917 if (DMask == 0)
3918 DMask = 1;
3919
3920 bool IsPackedD16 = false;
3921 unsigned DataSize =
3922 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3923 if (hasPackedD16()) {
3924 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3925 IsPackedD16 = D16Idx >= 0;
3926 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3927 DataSize = (DataSize + 1) / 2;
3928 }
3929
3930 if ((VDataSize / 4) == DataSize + TFESize)
3931 return true;
3932
3933 StringRef Modifiers;
3934 if (isGFX90A())
3935 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3936 else
3937 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3938
3939 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3940 return false;
3941}
3942
3943bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3944 const SMLoc &IDLoc) {
3945 const unsigned Opc = Inst.getOpcode();
3946 const MCInstrDesc &Desc = MII.get(Opc);
3947
3948 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3949 return true;
3950
3952
3953 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3955 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3956 int RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
3957 : AMDGPU::OpName::rsrc;
3958 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3959 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3960 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3961
3962 assert(VAddr0Idx != -1);
3963 assert(SrsrcIdx != -1);
3964 assert(SrsrcIdx > VAddr0Idx);
3965
3966 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3967 if (BaseOpcode->BVH) {
3968 if (IsA16 == BaseOpcode->A16)
3969 return true;
3970 Error(IDLoc, "image address size does not match a16");
3971 return false;
3972 }
3973
3974 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3976 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3977 unsigned ActualAddrSize =
3978 IsNSA ? SrsrcIdx - VAddr0Idx
3979 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3980
3981 unsigned ExpectedAddrSize =
3982 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3983
3984 if (IsNSA) {
3985 if (hasPartialNSAEncoding() &&
3986 ExpectedAddrSize >
3988 int VAddrLastIdx = SrsrcIdx - 1;
3989 unsigned VAddrLastSize =
3990 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3991
3992 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3993 }
3994 } else {
3995 if (ExpectedAddrSize > 12)
3996 ExpectedAddrSize = 16;
3997
3998 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3999 // This provides backward compatibility for assembly created
4000 // before 160b/192b/224b types were directly supported.
4001 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4002 return true;
4003 }
4004
4005 if (ActualAddrSize == ExpectedAddrSize)
4006 return true;
4007
4008 Error(IDLoc, "image address size does not match dim and a16");
4009 return false;
4010}
4011
4012bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4013
4014 const unsigned Opc = Inst.getOpcode();
4015 const MCInstrDesc &Desc = MII.get(Opc);
4016
4017 if ((Desc.TSFlags & MIMGFlags) == 0)
4018 return true;
4019 if (!Desc.mayLoad() || !Desc.mayStore())
4020 return true; // Not atomic
4021
4022 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4023 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4024
4025 // This is an incomplete check because image_atomic_cmpswap
4026 // may only use 0x3 and 0xf while other atomic operations
4027 // may use 0x1 and 0x3. However these limitations are
4028 // verified when we check that dmask matches dst size.
4029 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4030}
4031
4032bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4033
4034 const unsigned Opc = Inst.getOpcode();
4035 const MCInstrDesc &Desc = MII.get(Opc);
4036
4037 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4038 return true;
4039
4040 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4041 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4042
4043 // GATHER4 instructions use dmask in a different fashion compared to
4044 // other MIMG instructions. The only useful DMASK values are
4045 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4046 // (red,red,red,red) etc.) The ISA document doesn't mention
4047 // this.
4048 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4049}
4050
4051bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4052 const OperandVector &Operands) {
4053 if (!isGFX10Plus())
4054 return true;
4055
4056 const unsigned Opc = Inst.getOpcode();
4057 const MCInstrDesc &Desc = MII.get(Opc);
4058
4059 if ((Desc.TSFlags & MIMGFlags) == 0)
4060 return true;
4061
4062 // image_bvh_intersect_ray instructions do not have dim
4063 if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH)
4064 return true;
4065
4066 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4067 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4068 if (Op.isDim())
4069 return true;
4070 }
4071 return false;
4072}
4073
4074bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4075 const unsigned Opc = Inst.getOpcode();
4076 const MCInstrDesc &Desc = MII.get(Opc);
4077
4078 if ((Desc.TSFlags & MIMGFlags) == 0)
4079 return true;
4080
4082 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4084
4085 if (!BaseOpcode->MSAA)
4086 return true;
4087
4088 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4089 assert(DimIdx != -1);
4090
4091 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4093
4094 return DimInfo->MSAA;
4095}
4096
4097static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4098{
4099 switch (Opcode) {
4100 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4101 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4102 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4103 return true;
4104 default:
4105 return false;
4106 }
4107}
4108
4109// movrels* opcodes should only allow VGPRS as src0.
4110// This is specified in .td description for vop1/vop3,
4111// but sdwa is handled differently. See isSDWAOperand.
4112bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4113 const OperandVector &Operands) {
4114
4115 const unsigned Opc = Inst.getOpcode();
4116 const MCInstrDesc &Desc = MII.get(Opc);
4117
4118 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4119 return true;
4120
4121 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4122 assert(Src0Idx != -1);
4123
4124 SMLoc ErrLoc;
4125 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4126 if (Src0.isReg()) {
4127 auto Reg = mc2PseudoReg(Src0.getReg());
4128 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4129 if (!isSGPR(Reg, TRI))
4130 return true;
4131 ErrLoc = getRegLoc(Reg, Operands);
4132 } else {
4133 ErrLoc = getConstLoc(Operands);
4134 }
4135
4136 Error(ErrLoc, "source operand must be a VGPR");
4137 return false;
4138}
4139
4140bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4141 const OperandVector &Operands) {
4142
4143 const unsigned Opc = Inst.getOpcode();
4144
4145 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4146 return true;
4147
4148 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4149 assert(Src0Idx != -1);
4150
4151 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4152 if (!Src0.isReg())
4153 return true;
4154
4155 auto Reg = mc2PseudoReg(Src0.getReg());
4156 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4157 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4158 Error(getRegLoc(Reg, Operands),
4159 "source operand must be either a VGPR or an inline constant");
4160 return false;
4161 }
4162
4163 return true;
4164}
4165
4166bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4167 const OperandVector &Operands) {
4168 unsigned Opcode = Inst.getOpcode();
4169 const MCInstrDesc &Desc = MII.get(Opcode);
4170
4171 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4172 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4173 return true;
4174
4175 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4176 if (Src2Idx == -1)
4177 return true;
4178
4179 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4180 Error(getConstLoc(Operands),
4181 "inline constants are not allowed for this operand");
4182 return false;
4183 }
4184
4185 return true;
4186}
4187
4188bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4189 const OperandVector &Operands) {
4190 const unsigned Opc = Inst.getOpcode();
4191 const MCInstrDesc &Desc = MII.get(Opc);
4192
4193 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4194 return true;
4195
4196 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4197 if (BlgpIdx != -1) {
4198 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4199 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4200
4201 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4202 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4203
4204 // Validate the correct register size was used for the floating point
4205 // format operands
4206
4207 bool Success = true;
4208 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4209 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4210 Error(getRegLoc(mc2PseudoReg(Inst.getOperand(Src0Idx).getReg()),
4211 Operands),
4212 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4213 Success = false;
4214 }
4215
4216 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4217 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4218 Error(getRegLoc(mc2PseudoReg(Inst.getOperand(Src1Idx).getReg()),
4219 Operands),
4220 "wrong register tuple size for blgp value " + Twine(BLGP));
4221 Success = false;
4222 }
4223
4224 return Success;
4225 }
4226 }
4227
4228 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4229 if (Src2Idx == -1)
4230 return true;
4231
4232 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4233 if (!Src2.isReg())
4234 return true;
4235
4236 MCRegister Src2Reg = Src2.getReg();
4237 MCRegister DstReg = Inst.getOperand(0).getReg();
4238 if (Src2Reg == DstReg)
4239 return true;
4240
4241 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4242 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4243 return true;
4244
4245 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4246 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
4247 "source 2 operand must not partially overlap with dst");
4248 return false;
4249 }
4250
4251 return true;
4252}
4253
4254bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4255 switch (Inst.getOpcode()) {
4256 default:
4257 return true;
4258 case V_DIV_SCALE_F32_gfx6_gfx7:
4259 case V_DIV_SCALE_F32_vi:
4260 case V_DIV_SCALE_F32_gfx10:
4261 case V_DIV_SCALE_F64_gfx6_gfx7:
4262 case V_DIV_SCALE_F64_vi:
4263 case V_DIV_SCALE_F64_gfx10:
4264 break;
4265 }
4266
4267 // TODO: Check that src0 = src1 or src2.
4268
4269 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4270 AMDGPU::OpName::src2_modifiers,
4271 AMDGPU::OpName::src2_modifiers}) {
4273 .getImm() &
4275 return false;
4276 }
4277 }
4278
4279 return true;
4280}
4281
4282bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4283
4284 const unsigned Opc = Inst.getOpcode();
4285 const MCInstrDesc &Desc = MII.get(Opc);
4286
4287 if ((Desc.TSFlags & MIMGFlags) == 0)
4288 return true;
4289
4290 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4291 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4292 if (isCI() || isSI())
4293 return false;
4294 }
4295
4296 return true;
4297}
4298
4299static bool IsRevOpcode(const unsigned Opcode)
4300{
4301 switch (Opcode) {
4302 case AMDGPU::V_SUBREV_F32_e32:
4303 case AMDGPU::V_SUBREV_F32_e64:
4304 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4305 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4306 case AMDGPU::V_SUBREV_F32_e32_vi:
4307 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4308 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4309 case AMDGPU::V_SUBREV_F32_e64_vi:
4310
4311 case AMDGPU::V_SUBREV_CO_U32_e32:
4312 case AMDGPU::V_SUBREV_CO_U32_e64:
4313 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4314 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4315
4316 case AMDGPU::V_SUBBREV_U32_e32:
4317 case AMDGPU::V_SUBBREV_U32_e64:
4318 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4319 case AMDGPU::V_SUBBREV_U32_e32_vi:
4320 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4321 case AMDGPU::V_SUBBREV_U32_e64_vi:
4322
4323 case AMDGPU::V_SUBREV_U32_e32:
4324 case AMDGPU::V_SUBREV_U32_e64:
4325 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4326 case AMDGPU::V_SUBREV_U32_e32_vi:
4327 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4328 case AMDGPU::V_SUBREV_U32_e64_vi:
4329
4330 case AMDGPU::V_SUBREV_F16_e32:
4331 case AMDGPU::V_SUBREV_F16_e64:
4332 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4333 case AMDGPU::V_SUBREV_F16_e32_vi:
4334 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4335 case AMDGPU::V_SUBREV_F16_e64_vi:
4336
4337 case AMDGPU::V_SUBREV_U16_e32:
4338 case AMDGPU::V_SUBREV_U16_e64:
4339 case AMDGPU::V_SUBREV_U16_e32_vi:
4340 case AMDGPU::V_SUBREV_U16_e64_vi:
4341
4342 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4343 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4344 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4345
4346 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4347 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4348
4349 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4350 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4351
4352 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4353 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4354
4355 case AMDGPU::V_LSHRREV_B32_e32:
4356 case AMDGPU::V_LSHRREV_B32_e64:
4357 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4358 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4359 case AMDGPU::V_LSHRREV_B32_e32_vi:
4360 case AMDGPU::V_LSHRREV_B32_e64_vi:
4361 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4362 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4363
4364 case AMDGPU::V_ASHRREV_I32_e32:
4365 case AMDGPU::V_ASHRREV_I32_e64:
4366 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4367 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4368 case AMDGPU::V_ASHRREV_I32_e32_vi:
4369 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4370 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4371 case AMDGPU::V_ASHRREV_I32_e64_vi:
4372
4373 case AMDGPU::V_LSHLREV_B32_e32:
4374 case AMDGPU::V_LSHLREV_B32_e64:
4375 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4376 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4377 case AMDGPU::V_LSHLREV_B32_e32_vi:
4378 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4379 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4380 case AMDGPU::V_LSHLREV_B32_e64_vi:
4381
4382 case AMDGPU::V_LSHLREV_B16_e32:
4383 case AMDGPU::V_LSHLREV_B16_e64:
4384 case AMDGPU::V_LSHLREV_B16_e32_vi:
4385 case AMDGPU::V_LSHLREV_B16_e64_vi:
4386 case AMDGPU::V_LSHLREV_B16_gfx10:
4387
4388 case AMDGPU::V_LSHRREV_B16_e32:
4389 case AMDGPU::V_LSHRREV_B16_e64:
4390 case AMDGPU::V_LSHRREV_B16_e32_vi:
4391 case AMDGPU::V_LSHRREV_B16_e64_vi:
4392 case AMDGPU::V_LSHRREV_B16_gfx10:
4393
4394 case AMDGPU::V_ASHRREV_I16_e32:
4395 case AMDGPU::V_ASHRREV_I16_e64:
4396 case AMDGPU::V_ASHRREV_I16_e32_vi:
4397 case AMDGPU::V_ASHRREV_I16_e64_vi:
4398 case AMDGPU::V_ASHRREV_I16_gfx10:
4399
4400 case AMDGPU::V_LSHLREV_B64_e64:
4401 case AMDGPU::V_LSHLREV_B64_gfx10:
4402 case AMDGPU::V_LSHLREV_B64_vi:
4403
4404 case AMDGPU::V_LSHRREV_B64_e64:
4405 case AMDGPU::V_LSHRREV_B64_gfx10:
4406 case AMDGPU::V_LSHRREV_B64_vi:
4407
4408 case AMDGPU::V_ASHRREV_I64_e64:
4409 case AMDGPU::V_ASHRREV_I64_gfx10:
4410 case AMDGPU::V_ASHRREV_I64_vi:
4411
4412 case AMDGPU::V_PK_LSHLREV_B16:
4413 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4414 case AMDGPU::V_PK_LSHLREV_B16_vi:
4415
4416 case AMDGPU::V_PK_LSHRREV_B16:
4417 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4418 case AMDGPU::V_PK_LSHRREV_B16_vi:
4419 case AMDGPU::V_PK_ASHRREV_I16:
4420 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4421 case AMDGPU::V_PK_ASHRREV_I16_vi:
4422 return true;
4423 default:
4424 return false;
4425 }
4426}
4427
4428std::optional<StringRef>
4429AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4430
4431 using namespace SIInstrFlags;
4432 const unsigned Opcode = Inst.getOpcode();
4433 const MCInstrDesc &Desc = MII.get(Opcode);
4434
4435 // lds_direct register is defined so that it can be used
4436 // with 9-bit operands only. Ignore encodings which do not accept these.
4437 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4438 if ((Desc.TSFlags & Enc) == 0)
4439 return std::nullopt;
4440
4441 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4442 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4443 if (SrcIdx == -1)
4444 break;
4445 const auto &Src = Inst.getOperand(SrcIdx);
4446 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4447
4448 if (isGFX90A() || isGFX11Plus())
4449 return StringRef("lds_direct is not supported on this GPU");
4450
4451 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4452 return StringRef("lds_direct cannot be used with this instruction");
4453
4454 if (SrcName != OpName::src0)
4455 return StringRef("lds_direct may be used as src0 only");
4456 }
4457 }
4458
4459 return std::nullopt;
4460}
4461
4462SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4463 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4464 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4465 if (Op.isFlatOffset())
4466 return Op.getStartLoc();
4467 }
4468 return getLoc();
4469}
4470
4471bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4472 const OperandVector &Operands) {
4473 auto Opcode = Inst.getOpcode();
4474 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4475 if (OpNum == -1)
4476 return true;
4477
4478 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4479 if ((TSFlags & SIInstrFlags::FLAT))
4480 return validateFlatOffset(Inst, Operands);
4481
4482 if ((TSFlags & SIInstrFlags::SMRD))
4483 return validateSMEMOffset(Inst, Operands);
4484
4485 const auto &Op = Inst.getOperand(OpNum);
4486 if (isGFX12Plus() &&
4487 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4488 const unsigned OffsetSize = 24;
4489 if (!isIntN(OffsetSize, Op.getImm())) {
4490 Error(getFlatOffsetLoc(Operands),
4491 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4492 return false;
4493 }
4494 } else {
4495 const unsigned OffsetSize = 16;
4496 if (!isUIntN(OffsetSize, Op.getImm())) {
4497 Error(getFlatOffsetLoc(Operands),
4498 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4499 return false;
4500 }
4501 }
4502 return true;
4503}
4504
4505bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4506 const OperandVector &Operands) {
4507 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4508 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4509 return true;
4510
4511 auto Opcode = Inst.getOpcode();
4512 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4513 assert(OpNum != -1);
4514
4515 const auto &Op = Inst.getOperand(OpNum);
4516 if (!hasFlatOffsets() && Op.getImm() != 0) {
4517 Error(getFlatOffsetLoc(Operands),
4518 "flat offset modifier is not supported on this GPU");
4519 return false;
4520 }
4521
4522 // For pre-GFX12 FLAT instructions the offset must be positive;
4523 // MSB is ignored and forced to zero.
4524 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4525 bool AllowNegative =
4527 isGFX12Plus();
4528 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4529 Error(getFlatOffsetLoc(Operands),
4530 Twine("expected a ") +
4531 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4532 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4533 return false;
4534 }
4535
4536 return true;
4537}
4538
4539SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4540 // Start with second operand because SMEM Offset cannot be dst or src0.
4541 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4542 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4543 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4544 return Op.getStartLoc();
4545 }
4546 return getLoc();
4547}
4548
4549bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4550 const OperandVector &Operands) {
4551 if (isCI() || isSI())
4552 return true;
4553
4554 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4555 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4556 return true;
4557
4558 auto Opcode = Inst.getOpcode();
4559 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4560 if (OpNum == -1)
4561 return true;
4562
4563 const auto &Op = Inst.getOperand(OpNum);
4564 if (!Op.isImm())
4565 return true;
4566
4567 uint64_t Offset = Op.getImm();
4568 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4571 return true;
4572
4573 Error(getSMEMOffsetLoc(Operands),
4574 isGFX12Plus() ? "expected a 24-bit signed offset"
4575 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4576 : "expected a 21-bit signed offset");
4577
4578 return false;
4579}
4580
4581bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4582 unsigned Opcode = Inst.getOpcode();
4583 const MCInstrDesc &Desc = MII.get(Opcode);
4584 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4585 return true;
4586
4587 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4588 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4589
4590 const int OpIndices[] = { Src0Idx, Src1Idx };
4591
4592 unsigned NumExprs = 0;
4593 unsigned NumLiterals = 0;
4595
4596 for (int OpIdx : OpIndices) {
4597 if (OpIdx == -1) break;
4598
4599 const MCOperand &MO = Inst.getOperand(OpIdx);
4600 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4601 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4602 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4603 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4604 if (NumLiterals == 0 || LiteralValue != Value) {
4606 ++NumLiterals;
4607 }
4608 } else if (MO.isExpr()) {
4609 ++NumExprs;
4610 }
4611 }
4612 }
4613
4614 return NumLiterals + NumExprs <= 1;
4615}
4616
4617bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4618 const unsigned Opc = Inst.getOpcode();
4619 if (isPermlane16(Opc)) {
4620 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4621 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4622
4623 if (OpSel & ~3)
4624 return false;
4625 }
4626
4627 uint64_t TSFlags = MII.get(Opc).TSFlags;
4628
4629 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4630 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4631 if (OpSelIdx != -1) {
4632 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4633 return false;
4634 }
4635 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4636 if (OpSelHiIdx != -1) {
4637 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4638 return false;
4639 }
4640 }
4641
4642 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4643 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4644 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4645 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4646 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4647 if (OpSel & 3)
4648 return false;
4649 }
4650
4651 return true;
4652}
4653
4654bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
4655 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4656
4657 const unsigned Opc = Inst.getOpcode();
4658 uint64_t TSFlags = MII.get(Opc).TSFlags;
4659
4660 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4661 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4662 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4663 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4664 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4665 !(TSFlags & SIInstrFlags::IsSWMMAC))
4666 return true;
4667
4668 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4669 if (NegIdx == -1)
4670 return true;
4671
4672 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4673
4674 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4675 // on some src operands but not allowed on other.
4676 // It is convenient that such instructions don't have src_modifiers operand
4677 // for src operands that don't allow neg because they also don't allow opsel.
4678
4679 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4680 AMDGPU::OpName::src1_modifiers,
4681 AMDGPU::OpName::src2_modifiers};
4682
4683 for (unsigned i = 0; i < 3; ++i) {
4684 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4685 if (Neg & (1 << i))
4686 return false;
4687 }
4688 }
4689
4690 return true;
4691}
4692
4693bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4694 const OperandVector &Operands) {
4695 const unsigned Opc = Inst.getOpcode();
4696 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4697 if (DppCtrlIdx >= 0) {
4698 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4699
4700 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4701 AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4702 // DP ALU DPP is supported for row_newbcast only on GFX9*
4703 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4704 Error(S, "DP ALU dpp only supports row_newbcast");
4705 return false;
4706 }
4707 }
4708
4709 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4710 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4711
4712 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4713 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4714 if (Src1Idx >= 0) {
4715 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4716 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4717 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
4718 auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());
4719 SMLoc S = getRegLoc(Reg, Operands);
4720 Error(S, "invalid operand for instruction");
4721 return false;
4722 }
4723 if (Src1.isImm()) {
4724 Error(getInstLoc(Operands),
4725 "src1 immediate operand invalid for instruction");
4726 return false;
4727 }
4728 }
4729 }
4730
4731 return true;
4732}
4733
4734// Check if VCC register matches wavefront size
4735bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
4736 auto FB = getFeatureBits();
4737 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4738 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4739}
4740
4741// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4742bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4743 const OperandVector &Operands) {
4744 unsigned Opcode = Inst.getOpcode();
4745 const MCInstrDesc &Desc = MII.get(Opcode);
4746 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4747 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4748 !HasMandatoryLiteral && !isVOPD(Opcode))
4749 return true;
4750
4751 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4752
4753 unsigned NumExprs = 0;
4754 unsigned NumLiterals = 0;
4756
4757 for (int OpIdx : OpIndices) {
4758 if (OpIdx == -1)
4759 continue;
4760
4761 const MCOperand &MO = Inst.getOperand(OpIdx);
4762 if (!MO.isImm() && !MO.isExpr())
4763 continue;
4764 if (!isSISrcOperand(Desc, OpIdx))
4765 continue;
4766
4767 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4768 uint64_t Value = static_cast<uint64_t>(MO.getImm());
4769 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4770 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4771 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4772
4773 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4774 Error(getLitLoc(Operands), "invalid operand for instruction");
4775 return false;
4776 }
4777
4778 if (IsFP64 && IsValid32Op)
4779 Value = Hi_32(Value);
4780
4781 if (NumLiterals == 0 || LiteralValue != Value) {
4783 ++NumLiterals;
4784 }
4785 } else if (MO.isExpr()) {
4786 ++NumExprs;
4787 }
4788 }
4789 NumLiterals += NumExprs;
4790
4791 if (!NumLiterals)
4792 return true;
4793
4794 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4795 Error(getLitLoc(Operands), "literal operands are not supported");
4796 return false;
4797 }
4798
4799 if (NumLiterals > 1) {
4800 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4801 return false;
4802 }
4803
4804 return true;
4805}
4806
4807// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4808static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4809 const MCRegisterInfo *MRI) {
4810 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4811 if (OpIdx < 0)
4812 return -1;
4813
4814 const MCOperand &Op = Inst.getOperand(OpIdx);
4815 if (!Op.isReg())
4816 return -1;
4817
4818 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4819 auto Reg = Sub ? Sub : Op.getReg();
4820 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4821 return AGPR32.contains(Reg) ? 1 : 0;
4822}
4823
4824bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4825 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4826 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4828 SIInstrFlags::DS)) == 0)
4829 return true;
4830
4831 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4832 : AMDGPU::OpName::vdata;
4833
4834 const MCRegisterInfo *MRI = getMRI();
4835 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4836 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4837
4838 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4839 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4840 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4841 return false;
4842 }
4843
4844 auto FB = getFeatureBits();
4845 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4846 if (DataAreg < 0 || DstAreg < 0)
4847 return true;
4848 return DstAreg == DataAreg;
4849 }
4850
4851 return DstAreg < 1 && DataAreg < 1;
4852}
4853
4854bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4855 auto FB = getFeatureBits();
4856 if (!FB[AMDGPU::FeatureGFX90AInsts])
4857 return true;
4858
4859 const MCRegisterInfo *MRI = getMRI();
4860 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4861 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4862 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4863 const MCOperand &Op = Inst.getOperand(I);
4864 if (!Op.isReg())
4865 continue;
4866
4867 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4868 if (!Sub)
4869 continue;
4870
4871 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4872 return false;
4873 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4874 return false;
4875 }
4876
4877 return true;
4878}
4879
4880SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4881 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4882 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4883 if (Op.isBLGP())
4884 return Op.getStartLoc();
4885 }
4886 return SMLoc();
4887}
4888
4889bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4890 const OperandVector &Operands) {
4891 unsigned Opc = Inst.getOpcode();
4892 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4893 if (BlgpIdx == -1)
4894 return true;
4895 SMLoc BLGPLoc = getBLGPLoc(Operands);
4896 if (!BLGPLoc.isValid())
4897 return true;
4898 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4899 auto FB = getFeatureBits();
4900 bool UsesNeg = false;
4901 if (FB[AMDGPU::FeatureGFX940Insts]) {
4902 switch (Opc) {
4903 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4904 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4905 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4906 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4907 UsesNeg = true;
4908 }
4909 }
4910
4911 if (IsNeg == UsesNeg)
4912 return true;
4913
4914 Error(BLGPLoc,
4915 UsesNeg ? "invalid modifier: blgp is not supported"
4916 : "invalid modifier: neg is not supported");
4917
4918 return false;
4919}
4920
4921bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4922 const OperandVector &Operands) {
4923 if (!isGFX11Plus())
4924 return true;
4925
4926 unsigned Opc = Inst.getOpcode();
4927 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4928 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4929 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4930 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4931 return true;
4932
4933 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4934 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4935 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4936 if (Reg == AMDGPU::SGPR_NULL)
4937 return true;
4938
4939 SMLoc RegLoc = getRegLoc(Reg, Operands);
4940 Error(RegLoc, "src0 must be null");
4941 return false;
4942}
4943
4944bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4945 const OperandVector &Operands) {
4946 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4947 if ((TSFlags & SIInstrFlags::DS) == 0)
4948 return true;
4949 if (TSFlags & SIInstrFlags::GWS)
4950 return validateGWS(Inst, Operands);
4951 // Only validate GDS for non-GWS instructions.
4952 if (hasGDS())
4953 return true;
4954 int GDSIdx =
4955 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4956 if (GDSIdx < 0)
4957 return true;
4958 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4959 if (GDS) {
4960 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4961 Error(S, "gds modifier is not supported on this GPU");
4962 return false;
4963 }
4964 return true;
4965}
4966
4967// gfx90a has an undocumented limitation:
4968// DS_GWS opcodes must use even aligned registers.
4969bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4970 const OperandVector &Operands) {
4971 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4972 return true;
4973
4974 int Opc = Inst.getOpcode();
4975 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4976 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4977 return true;
4978
4979 const MCRegisterInfo *MRI = getMRI();
4980 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4981 int Data0Pos =
4982 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4983 assert(Data0Pos != -1);
4984 auto Reg = Inst.getOperand(Data0Pos).getReg();
4985 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4986 if (RegIdx & 1) {
4987 SMLoc RegLoc = getRegLoc(Reg, Operands);
4988 Error(RegLoc, "vgpr must be even aligned");
4989 return false;
4990 }
4991
4992 return true;
4993}
4994
4995bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4996 const OperandVector &Operands,
4997 const SMLoc &IDLoc) {
4998 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4999 AMDGPU::OpName::cpol);
5000 if (CPolPos == -1)
5001 return true;
5002
5003 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5004
5005 if (isGFX12Plus())
5006 return validateTHAndScopeBits(Inst, Operands, CPol);
5007
5008 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5009 if (TSFlags & SIInstrFlags::SMRD) {
5010 if (CPol && (isSI() || isCI())) {
5011 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5012 Error(S, "cache policy is not supported for SMRD instructions");
5013 return false;
5014 }
5015 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5016 Error(IDLoc, "invalid cache policy for SMEM instruction");
5017 return false;
5018 }
5019 }
5020
5021 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5022 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5025 if (!(TSFlags & AllowSCCModifier)) {
5026 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5027 StringRef CStr(S.getPointer());
5028 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5029 Error(S,
5030 "scc modifier is not supported for this instruction on this GPU");
5031 return false;
5032 }
5033 }
5034
5036 return true;
5037
5038 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5039 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5040 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5041 : "instruction must use glc");
5042 return false;
5043 }
5044 } else {
5045 if (CPol & CPol::GLC) {
5046 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5047 StringRef CStr(S.getPointer());
5049 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5050 Error(S, isGFX940() ? "instruction must not use sc0"
5051 : "instruction must not use glc");
5052 return false;
5053 }
5054 }
5055
5056 return true;
5057}
5058
5059bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5060 const OperandVector &Operands,
5061 const unsigned CPol) {
5062 const unsigned TH = CPol & AMDGPU::CPol::TH;
5063 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5064
5065 const unsigned Opcode = Inst.getOpcode();
5066 const MCInstrDesc &TID = MII.get(Opcode);
5067
5068 auto PrintError = [&](StringRef Msg) {
5069 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5070 Error(S, Msg);
5071 return false;
5072 };
5073
5074 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5077 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5078
5079 if (TH == 0)
5080 return true;
5081
5082 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5083 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5084 (TH == AMDGPU::CPol::TH_NT_HT)))
5085 return PrintError("invalid th value for SMEM instruction");
5086
5087 if (TH == AMDGPU::CPol::TH_BYPASS) {
5088 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5090 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5092 return PrintError("scope and th combination is not valid");
5093 }
5094
5095 bool IsStore = TID.mayStore();
5096 bool IsAtomic =
5098
5099 if (IsAtomic) {
5100 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5101 return PrintError("invalid th value for atomic instructions");
5102 } else if (IsStore) {
5103 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5104 return PrintError("invalid th value for store instructions");
5105 } else {
5106 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5107 return PrintError("invalid th value for load instructions");
5108 }
5109
5110 return true;
5111}
5112
5113bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5114 const OperandVector &Operands) {
5115 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5116 if (Desc.mayStore() &&
5118 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5119 if (Loc != getInstLoc(Operands)) {
5120 Error(Loc, "TFE modifier has no meaning for store instructions");
5121 return false;
5122 }
5123 }
5124
5125 return true;
5126}
5127
5128bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5129 const SMLoc &IDLoc,
5130 const OperandVector &Operands) {
5131 if (auto ErrMsg = validateLdsDirect(Inst)) {
5132 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
5133 return false;
5134 }
5135 if (!validateSOPLiteral(Inst)) {
5136 Error(getLitLoc(Operands),
5137 "only one unique literal operand is allowed");
5138 return false;
5139 }
5140 if (!validateVOPLiteral(Inst, Operands)) {
5141 return false;
5142 }
5143 if (!validateConstantBusLimitations(Inst, Operands)) {
5144 return false;
5145 }
5146 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5147 return false;
5148 }
5149 if (!validateIntClampSupported(Inst)) {
5150 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5151 "integer clamping is not supported on this GPU");
5152 return false;
5153 }
5154 if (!validateOpSel(Inst)) {
5155 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5156 "invalid op_sel operand");
5157 return false;
5158 }
5159 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5160 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5161 "invalid neg_lo operand");
5162 return false;
5163 }
5164 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5165 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5166 "invalid neg_hi operand");
5167 return false;
5168 }
5169 if (!validateDPP(Inst, Operands)) {
5170 return false;
5171 }
5172 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5173 if (!validateMIMGD16(Inst)) {
5174 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5175 "d16 modifier is not supported on this GPU");
5176 return false;
5177 }
5178 if (!validateMIMGDim(Inst, Operands)) {
5179 Error(IDLoc, "missing dim operand");
5180 return false;
5181 }
5182 if (!validateMIMGMSAA(Inst)) {
5183 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5184 "invalid dim; must be MSAA type");
5185 return false;
5186 }
5187 if (!validateMIMGDataSize(Inst, IDLoc)) {
5188 return false;
5189 }
5190 if (!validateMIMGAddrSize(Inst, IDLoc))
5191 return false;
5192 if (!validateMIMGAtomicDMask(Inst)) {
5193 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5194 "invalid atomic image dmask");
5195 return false;
5196 }
5197 if (!validateMIMGGatherDMask(Inst)) {
5198 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5199 "invalid image_gather dmask: only one bit must be set");
5200 return false;
5201 }
5202 if (!validateMovrels(Inst, Operands)) {
5203 return false;
5204 }
5205 if (!validateOffset(Inst, Operands)) {
5206 return false;
5207 }
5208 if (!validateMAIAccWrite(Inst, Operands)) {
5209 return false;
5210 }
5211 if (!validateMAISrc2(Inst, Operands)) {
5212 return false;
5213 }
5214 if (!validateMFMA(Inst, Operands)) {
5215 return false;
5216 }
5217 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5218 return false;
5219 }
5220
5221 if (!validateAGPRLdSt(Inst)) {
5222 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5223 ? "invalid register class: data and dst should be all VGPR or AGPR"
5224 : "invalid register class: agpr loads and stores not supported on this GPU"
5225 );
5226 return false;
5227 }
5228 if (!validateVGPRAlign(Inst)) {
5229 Error(IDLoc,
5230 "invalid register class: vgpr tuples must be 64 bit aligned");
5231 return false;
5232 }
5233 if (!validateDS(Inst, Operands)) {
5234 return false;
5235 }
5236
5237 if (!validateBLGP(Inst, Operands)) {
5238 return false;
5239 }
5240
5241 if (!validateDivScale(Inst)) {
5242 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5243 return false;
5244 }
5245 if (!validateWaitCnt(Inst, Operands)) {
5246 return false;
5247 }
5248 if (!validateTFE(Inst, Operands)) {
5249 return false;
5250 }
5251
5252 return true;
5253}
5254
5256 const FeatureBitset &FBS,
5257 unsigned VariantID = 0);
5258
5259static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5260 const FeatureBitset &AvailableFeatures,
5261 unsigned VariantID);
5262
5263bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5264 const FeatureBitset &FBS) {
5265 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5266}
5267
5268bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5269 const FeatureBitset &FBS,
5270 ArrayRef<unsigned> Variants) {
5271 for (auto Variant : Variants) {
5272 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5273 return true;
5274 }
5275
5276 return false;
5277}
5278
5279bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5280 const SMLoc &IDLoc) {
5281 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5282
5283 // Check if requested instruction variant is supported.
5284 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5285 return false;
5286
5287 // This instruction is not supported.
5288 // Clear any other pending errors because they are no longer relevant.
5289 getParser().clearPendingErrors();
5290
5291 // Requested instruction variant is not supported.
5292 // Check if any other variants are supported.
5293 StringRef VariantName = getMatchedVariantName();
5294 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5295 return Error(IDLoc,
5296 Twine(VariantName,
5297 " variant of this instruction is not supported"));
5298 }
5299
5300 // Check if this instruction may be used with a different wavesize.
5301 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5302 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5303
5304 FeatureBitset FeaturesWS32 = getFeatureBits();
5305 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5306 .flip(AMDGPU::FeatureWavefrontSize32);
5307 FeatureBitset AvailableFeaturesWS32 =
5308 ComputeAvailableFeatures(FeaturesWS32);
5309
5310 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5311 return Error(IDLoc, "instruction requires wavesize=32");
5312 }
5313
5314 // Finally check if this instruction is supported on any other GPU.
5315 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5316 return Error(IDLoc, "instruction not supported on this GPU");
5317 }
5318
5319 // Instruction not supported on any GPU. Probably a typo.
5320 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5321 return Error(IDLoc, "invalid instruction" + Suggestion);
5322}
5323
5325 uint64_t InvalidOprIdx) {
5326 assert(InvalidOprIdx < Operands.size());
5327 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5328 if (Op.isToken() && InvalidOprIdx > 1) {
5329 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5330 return PrevOp.isToken() && PrevOp.getToken() == "::";
5331 }
5332 return false;
5333}
5334
5335bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5337 MCStreamer &Out,
5339 bool MatchingInlineAsm) {
5340 MCInst Inst;
5341 unsigned Result = Match_Success;
5342 for (auto Variant : getMatchedVariants()) {
5343 uint64_t EI;
5344 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5345 Variant);
5346 // We order match statuses from least to most specific. We use most specific
5347 // status as resulting
5348 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5349 if (R == Match_Success || R == Match_MissingFeature ||
5350 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5351 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5352 Result != Match_MissingFeature)) {
5353 Result = R;
5354 ErrorInfo = EI;
5355 }
5356 if (R == Match_Success)
5357 break;
5358 }
5359
5360 if (Result == Match_Success) {
5361 if (!validateInstruction(Inst, IDLoc, Operands)) {
5362 return true;
5363 }
5364 Inst.setLoc(IDLoc);
5365 Out.emitInstruction(Inst, getSTI());
5366 return false;
5367 }
5368
5369 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5370 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5371 return true;
5372 }
5373
5374 switch (Result) {
5375 default: break;
5376 case Match_MissingFeature:
5377 // It has been verified that the specified instruction
5378 // mnemonic is valid. A match was found but it requires
5379 // features which are not supported on this GPU.
5380 return Error(IDLoc, "operands are not valid for this GPU or mode");
5381
5382 case Match_InvalidOperand: {
5383 SMLoc ErrorLoc = IDLoc;
5384 if (ErrorInfo != ~0ULL) {
5385 if (ErrorInfo >= Operands.size()) {
5386 return Error(IDLoc, "too few operands for instruction");
5387 }
5388 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5389 if (ErrorLoc == SMLoc())
5390 ErrorLoc = IDLoc;
5391
5393 return Error(ErrorLoc, "invalid VOPDY instruction");
5394 }
5395 return Error(ErrorLoc, "invalid operand for instruction");
5396 }
5397
5398 case Match_MnemonicFail:
5399 llvm_unreachable("Invalid instructions should have been handled already");
5400 }
5401 llvm_unreachable("Implement any new match types added!");
5402}
5403
5404bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5405 int64_t Tmp = -1;
5406 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5407 return true;
5408 }
5409 if (getParser().parseAbsoluteExpression(Tmp)) {
5410 return true;
5411 }
5412 Ret = static_cast<uint32_t>(Tmp);
5413 return false;
5414}
5415
5416bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5417 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5418 return TokError("directive only supported for amdgcn architecture");
5419
5420 std::string TargetIDDirective;
5421 SMLoc TargetStart = getTok().getLoc();
5422 if (getParser().parseEscapedString(TargetIDDirective))
5423 return true;
5424
5425 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5426 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5427 return getParser().Error(TargetRange.Start,
5428 (Twine(".amdgcn_target directive's target id ") +
5429 Twine(TargetIDDirective) +
5430 Twine(" does not match the specified target id ") +
5431 Twine(getTargetStreamer().getTargetID()->toString())).str());
5432
5433 return false;
5434}
5435
5436bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5437 return Error(Range.Start, "value out of range", Range);
5438}
5439
5440bool AMDGPUAsmParser::calculateGPRBlocks(
5441 const FeatureBitset &Features, const MCExpr *VCCUsed,
5442 const MCExpr *FlatScrUsed, bool XNACKUsed,
5443 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5444 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5445 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5446 // TODO(scott.linder): These calculations are duplicated from
5447 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5448 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5449 MCContext &Ctx = getContext();
5450
5451 const MCExpr *NumSGPRs = NextFreeSGPR;
5452 int64_t EvaluatedSGPRs;
5453
5454 if (Version.Major >= 10)
5456 else {
5457 unsigned MaxAddressableNumSGPRs =
5459
5460 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5461 !Features.test(FeatureSGPRInitBug) &&
5462 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5463 return OutOfRangeError(SGPRRange);
5464
5465 const MCExpr *ExtraSGPRs =
5466 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5467 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5468
5469 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5470 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5471 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5472 return OutOfRangeError(SGPRRange);
5473
5474 if (Features.test(FeatureSGPRInitBug))
5475 NumSGPRs =
5477 }
5478
5479 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5480 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5481 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5482 unsigned Granule) -> const MCExpr * {
5483 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5484 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5485 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5486 const MCExpr *AlignToGPR =
5487 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5488 const MCExpr *DivGPR =
5489 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5490 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5491 return SubGPR;
5492 };
5493
5494 VGPRBlocks = GetNumGPRBlocks(
5495 NextFreeVGPR,
5496 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5497 SGPRBlocks =
5498 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5499
5500 return false;
5501}
5502
5503bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5504 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5505 return TokError("directive only supported for amdgcn architecture");
5506
5507 if (!isHsaAbi(getSTI()))
5508 return TokError("directive only supported for amdhsa OS");
5509
5510 StringRef KernelName;
5511 if (getParser().parseIdentifier(KernelName))
5512 return true;
5513
5516 &getSTI(), getContext());
5517
5518 StringSet<> Seen;
5519
5520 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5521
5522 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5523 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5524
5525 SMRange VGPRRange;
5526 const MCExpr *NextFreeVGPR = ZeroExpr;
5527 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5528 uint64_t SharedVGPRCount = 0;
5529 uint64_t PreloadLength = 0;
5530 uint64_t PreloadOffset = 0;
5531 SMRange SGPRRange;
5532 const MCExpr *NextFreeSGPR = ZeroExpr;
5533
5534 // Count the number of user SGPRs implied from the enabled feature bits.
5535 unsigned ImpliedUserSGPRCount = 0;
5536
5537 // Track if the asm explicitly contains the directive for the user SGPR
5538 // count.
5539 std::optional<unsigned> ExplicitUserSGPRCount;
5540 const MCExpr *ReserveVCC = OneExpr;
5541 const MCExpr *ReserveFlatScr = OneExpr;
5542 std::optional<bool> EnableWavefrontSize32;
5543
5544 while (true) {
5545 while (trySkipToken(AsmToken::EndOfStatement));
5546
5547 StringRef ID;
5548 SMRange IDRange = getTok().getLocRange();
5549 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5550 return true;
5551
5552 if (ID == ".end_amdhsa_kernel")
5553 break;
5554
5555 if (!Seen.insert(ID).second)
5556 return TokError(".amdhsa_ directives cannot be repeated");
5557
5558 SMLoc ValStart = getLoc();
5559 const MCExpr *ExprVal;
5560 if (getParser().parseExpression(ExprVal))
5561 return true;
5562 SMLoc ValEnd = getLoc();
5563 SMRange ValRange = SMRange(ValStart, ValEnd);
5564
5565 int64_t IVal = 0;
5566 uint64_t Val = IVal;
5567 bool EvaluatableExpr;
5568 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5569 if (IVal < 0)
5570 return OutOfRangeError(ValRange);
5571 Val = IVal;
5572 }
5573
5574#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5575 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5576 return OutOfRangeError(RANGE); \
5577 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5578 getContext());
5579
5580// Some fields use the parsed value immediately which requires the expression to
5581// be solvable.
5582#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5583 if (!(RESOLVED)) \
5584 return Error(IDRange.Start, "directive should have resolvable expression", \
5585 IDRange);
5586
5587 if (ID == ".amdhsa_group_segment_fixed_size") {
5589 CHAR_BIT>(Val))
5590 return OutOfRangeError(ValRange);
5591 KD.group_segment_fixed_size = ExprVal;
5592 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5594 CHAR_BIT>(Val))
5595 return OutOfRangeError(ValRange);
5596 KD.private_segment_fixed_size = ExprVal;
5597 } else if (ID == ".amdhsa_kernarg_size") {
5598 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
5599 return OutOfRangeError(ValRange);
5600 KD.kernarg_size = ExprVal;
5601 } else if (ID == ".amdhsa_user_sgpr_count") {
5602 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5603 ExplicitUserSGPRCount = Val;
5604 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5605 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5607 return Error(IDRange.Start,
5608 "directive is not supported with architected flat scratch",
5609 IDRange);
5611 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5612 ExprVal, ValRange);
5613 if (Val)
5614 ImpliedUserSGPRCount += 4;
5615 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5616 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5617 if (!hasKernargPreload())
5618 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5619
5620 if (Val > getMaxNumUserSGPRs())
5621 return OutOfRangeError(ValRange);
5622 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
5623 ValRange);
5624 if (Val) {
5625 ImpliedUserSGPRCount += Val;
5626 PreloadLength = Val;
5627 }
5628 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5629 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5630 if (!hasKernargPreload())
5631 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5632
5633 if (Val >= 1024)
5634 return OutOfRangeError(ValRange);
5635 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
5636 ValRange);
5637 if (Val)
5638 PreloadOffset = Val;
5639 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5640 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5642 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5643 ValRange);
5644 if (Val)
5645 ImpliedUserSGPRCount += 2;
5646 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5647 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5649 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5650 ValRange);
5651 if (Val)
5652 ImpliedUserSGPRCount += 2;
5653 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5654 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5656 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5657 ExprVal, ValRange);
5658 if (Val)
5659 ImpliedUserSGPRCount += 2;
5660 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5661 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5663 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5664 ValRange);
5665 if (Val)
5666 ImpliedUserSGPRCount += 2;
5667 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5669 return Error(IDRange.Start,
5670 "directive is not supported with architected flat scratch",
5671 IDRange);
5672 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5674 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5675 ExprVal, ValRange);
5676 if (Val)
5677 ImpliedUserSGPRCount += 2;
5678 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5679 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5681 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5682 ExprVal, ValRange);
5683 if (Val)
5684 ImpliedUserSGPRCount += 1;
5685 } else if (ID == ".amdhsa_wavefront_size32") {
5686 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5687 if (IVersion.Major < 10)
5688 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5689 EnableWavefrontSize32 = Val;
5691 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5692 ValRange);
5693 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5695 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5696 ValRange);
5697 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5699 return Error(IDRange.Start,
5700 "directive is not supported with architected flat scratch",
5701 IDRange);
5703 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5704 ValRange);
5705 } else if (ID == ".amdhsa_enable_private_segment") {
5707 return Error(
5708 IDRange.Start,
5709 "directive is not supported without architected flat scratch",
5710 IDRange);
5712 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5713 ValRange);
5714 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5716 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5717 ValRange);
5718 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5720 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5721 ValRange);
5722 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5724 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5725 ValRange);
5726 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5728 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5729 ValRange);
5730 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5732 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5733 ValRange);
5734 } else if (ID == ".amdhsa_next_free_vgpr") {
5735 VGPRRange = ValRange;
5736 NextFreeVGPR = ExprVal;
5737 } else if (ID == ".amdhsa_next_free_sgpr") {
5738 SGPRRange = ValRange;
5739 NextFreeSGPR = ExprVal;
5740 } else if (ID == ".amdhsa_accum_offset") {
5741 if (!isGFX90A())
5742 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5743 AccumOffset = ExprVal;
5744 } else if (ID == ".amdhsa_reserve_vcc") {
5745 if (EvaluatableExpr && !isUInt<1>(Val))
5746 return OutOfRangeError(ValRange);
5747 ReserveVCC = ExprVal;
5748 } else if (ID == ".amdhsa_reserve_flat_scratch") {
5749 if (IVersion.Major < 7)
5750 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5752 return Error(IDRange.Start,
5753 "directive is not supported with architected flat scratch",
5754 IDRange);
5755 if (EvaluatableExpr && !isUInt<1>(Val))
5756 return OutOfRangeError(ValRange);
5757 ReserveFlatScr = ExprVal;
5758 } else if (ID == ".amdhsa_reserve_xnack_mask") {
5759 if (IVersion.Major < 8)
5760 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5761 if (!isUInt<1>(Val))
5762 return OutOfRangeError(ValRange);
5763 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5764 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5765 IDRange);
5766 } else if (ID == ".amdhsa_float_round_mode_32") {
5768 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5769 ValRange);
5770 } else if (ID == ".amdhsa_float_round_mode_16_64") {
5772 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5773 ValRange);
5774 } else if (ID == ".amdhsa_float_denorm_mode_32") {
5776 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5777 ValRange);
5778 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5780 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5781 ValRange);
5782 } else if (ID == ".amdhsa_dx10_clamp") {
5783 if (IVersion.Major >= 12)
5784 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5786 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5787 ValRange);
5788 } else if (ID == ".amdhsa_ieee_mode") {
5789 if (IVersion.Major >= 12)
5790 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5792 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5793 ValRange);
5794 } else if (ID == ".amdhsa_fp16_overflow") {
5795 if (IVersion.Major < 9)
5796 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5798 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5799 ValRange);
5800 } else if (ID == ".amdhsa_tg_split") {
5801 if (!isGFX90A())
5802 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5803 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
5804 ExprVal, ValRange);
5805 } else if (ID == ".amdhsa_workgroup_processor_mode") {
5806 if (IVersion.Major < 10)
5807 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5809 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5810 ValRange);
5811 } else if (ID == ".amdhsa_memory_ordered") {
5812 if (IVersion.Major < 10)
5813 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5815 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5816 ValRange);
5817 } else if (ID == ".amdhsa_forward_progress") {
5818 if (IVersion.Major < 10)
5819 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5821 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5822 ValRange);
5823 } else if (ID == ".amdhsa_shared_vgpr_count") {
5824 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5825 if (IVersion.Major < 10 || IVersion.Major >= 12)
5826 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
5827 IDRange);
5828 SharedVGPRCount = Val;
5830 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5831 ValRange);
5832 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5835 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5836 ExprVal, ValRange);
5837 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5839 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5840 ExprVal, ValRange);
5841 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5844 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5845 ExprVal, ValRange);
5846 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5848 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5849 ExprVal, ValRange);
5850 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5852 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5853 ExprVal, ValRange);
5854 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5856 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5857 ExprVal, ValRange);
5858 } else if (ID == ".amdhsa_exception_int_div_zero") {
5860 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5861 ExprVal, ValRange);
5862 } else if (ID == ".amdhsa_round_robin_scheduling") {
5863 if (IVersion.Major < 12)
5864 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
5866 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5867 ValRange);
5868 } else {
5869 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5870 }
5871
5872#undef PARSE_BITS_ENTRY
5873 }
5874
5875 if (!Seen.contains(".amdhsa_next_free_vgpr"))
5876 return TokError(".amdhsa_next_free_vgpr directive is required");
5877
5878 if (!Seen.contains(".amdhsa_next_free_sgpr"))
5879 return TokError(".amdhsa_next_free_sgpr directive is required");
5880
5881 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
5882
5883 // Consider the case where the total number of UserSGPRs with trailing
5884 // allocated preload SGPRs, is greater than the number of explicitly
5885 // referenced SGPRs.
5886 if (PreloadLength) {
5887 MCContext &Ctx = getContext();
5888 NextFreeSGPR = AMDGPUMCExpr::createMax(
5889 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
5890 }
5891
5892 const MCExpr *VGPRBlocks;
5893 const MCExpr *SGPRBlocks;
5894 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5895 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5896 EnableWavefrontSize32, NextFreeVGPR,
5897 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5898 SGPRBlocks))
5899 return true;
5900
5901 int64_t EvaluatedVGPRBlocks;
5902 bool VGPRBlocksEvaluatable =
5903 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
5904 if (VGPRBlocksEvaluatable &&
5905 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5906 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
5907 return OutOfRangeError(VGPRRange);
5908 }
5910 KD.compute_pgm_rsrc1, VGPRBlocks,
5911 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5912 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
5913
5914 int64_t EvaluatedSGPRBlocks;
5915 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
5916 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5917 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
5918 return OutOfRangeError(SGPRRange);
5920 KD.compute_pgm_rsrc1, SGPRBlocks,
5921 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5922 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
5923
5924 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5925 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5926 "enabled user SGPRs");
5927
5928 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5929 return TokError("too many user SGPRs enabled");
5931 KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
5932 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5933 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
5934
5935 int64_t IVal = 0;
5936 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
5937 return TokError("Kernarg size should be resolvable");
5938 uint64_t kernarg_size = IVal;
5939 if (PreloadLength && kernarg_size &&
5940 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5941 return TokError("Kernarg preload length + offset is larger than the "
5942 "kernarg segment size");
5943
5944 if (isGFX90A()) {
5945 if (!Seen.contains(".amdhsa_accum_offset"))
5946 return TokError(".amdhsa_accum_offset directive is required");
5947 int64_t EvaluatedAccum;
5948 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
5949 uint64_t UEvaluatedAccum = EvaluatedAccum;
5950 if (AccumEvaluatable &&
5951 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
5952 return TokError("accum_offset should be in range [4..256] in "
5953 "increments of 4");
5954
5955 int64_t EvaluatedNumVGPR;
5956 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
5957 AccumEvaluatable &&
5958 UEvaluatedAccum >
5959 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
5960 return TokError("accum_offset exceeds total VGPR allocation");
5961 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
5963 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
5964 MCConstantExpr::create(1, getContext()), getContext());
5966 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5967 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5968 getContext());
5969 }
5970
5971 if (IVersion.Major >= 10 && IVersion.Major < 12) {
5972 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5973 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5974 return TokError("shared_vgpr_count directive not valid on "
5975 "wavefront size 32");
5976 }
5977
5978 if (VGPRBlocksEvaluatable &&
5979 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
5980 63)) {
5981 return TokError("shared_vgpr_count*2 + "
5982 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5983 "exceed 63\n");
5984 }
5985 }
5986
5987 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5988 NextFreeVGPR, NextFreeSGPR,
5989 ReserveVCC, ReserveFlatScr);
5990 return false;
5991}
5992
5993bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5995 if (ParseAsAbsoluteExpression(Version))
5996 return true;
5997
5998 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5999 return false;
6000}
6001
6002bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6004 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6005 // assembly for backwards compatibility.
6006 if (ID == "max_scratch_backing_memory_byte_size") {
6007 Parser.eatToEndOfStatement();
6008 return false;
6009 }
6010
6011 SmallString<40> ErrStr;
6012 raw_svector_ostream Err(ErrStr);
6013 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6014 return TokError(Err.str());
6015 }
6016 Lex();
6017
6018 if (ID == "enable_wavefront_size32") {
6019 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6020 if (!isGFX10Plus())
6021 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6022 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6023 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6024 } else {
6025 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6026 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6027 }
6028 }
6029
6030 if (ID == "wavefront_size") {
6031 if (C.wavefront_size == 5) {
6032 if (!isGFX10Plus())
6033 return TokError("wavefront_size=5 is only allowed on GFX10+");
6034 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6035 return TokError("wavefront_size=5 requires +WavefrontSize32");
6036 } else if (C.wavefront_size == 6) {
6037 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6038 return TokError("wavefront_size=6 requires +WavefrontSize64");
6039 }
6040 }
6041
6042 return false;
6043}
6044
6045bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6046 AMDGPUMCKernelCodeT KernelCode;
6047 KernelCode.initDefault(&getSTI(), getContext());
6048
6049 while (true) {
6050 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6051 // will set the current token to EndOfStatement.
6052 while(trySkipToken(AsmToken::EndOfStatement));
6053
6054 StringRef ID;
6055 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6056 return true;
6057
6058 if (ID == ".end_amd_kernel_code_t")
6059 break;
6060
6061 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6062 return true;
6063 }
6064
6065 KernelCode.validate(&getSTI(), getContext());
6066 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6067
6068 return false;
6069}
6070
6071bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6072 StringRef KernelName;
6073 if (!parseId(KernelName, "expected symbol name"))
6074 return true;
6075
6076 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6078
6079 KernelScope.initialize(getContext());
6080 return false;
6081}
6082
6083bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6084 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
6085 return Error(getLoc(),
6086 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6087 "architectures");
6088 }
6089
6090 auto TargetIDDirective = getLexer().getTok().getStringContents();
6091 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6092 return Error(getParser().getTok().getLoc(), "target id must match options");
6093
6094 getTargetStreamer().EmitISAVersion();
6095 Lex();
6096
6097 return false;
6098}
6099
6100bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6101 assert(isHsaAbi(getSTI()));
6102
6103 std::string HSAMetadataString;
6104 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6105 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6106 return true;
6107
6108 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6109 return Error(getLoc(), "invalid HSA metadata");
6110
6111 return false;
6112}
6113
6114/// Common code to parse out a block of text (typically YAML) between start and
6115/// end directives.
6116bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6117 const char *AssemblerDirectiveEnd,
6118 std::string &CollectString) {
6119
6120 raw_string_ostream CollectStream(CollectString);
6121
6122 getLexer().setSkipSpace(false);
6123
6124 bool FoundEnd = false;
6125 while (!isToken(AsmToken::Eof)) {
6126 while (isToken(AsmToken::Space)) {
6127 CollectStream << getTokenStr();
6128 Lex();
6129 }
6130
6131 if (trySkipId(AssemblerDirectiveEnd)) {
6132 FoundEnd = true;
6133 break;
6134 }
6135
6136 CollectStream << Parser.parseStringToEndOfStatement()
6137 << getContext().getAsmInfo()->getSeparatorString();
6138
6139 Parser.eatToEndOfStatement();
6140 }
6141
6142 getLexer().setSkipSpace(true);
6143
6144 if (isToken(AsmToken::Eof) && !FoundEnd) {
6145 return TokError(Twine("expected directive ") +
6146 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6147 }
6148
6149 return false;
6150}
6151
6152/// Parse the assembler directive for new MsgPack-format PAL metadata.
6153bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6154 std::string String;
6155 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6157 return true;
6158
6159 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6160 if (!PALMetadata->setFromString(String))
6161 return Error(getLoc(), "invalid PAL metadata");
6162 return false;
6163}
6164
6165/// Parse the assembler directive for old linear-format PAL metadata.
6166bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6167 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6168 return Error(getLoc(),
6169 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6170 "not available on non-amdpal OSes")).str());
6171 }
6172
6173 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6174 PALMetadata->setLegacy();
6175 for (;;) {
6177 if (ParseAsAbsoluteExpression(Key)) {
6178 return TokError(Twine("invalid value in ") +
6180 }
6181 if (!trySkipToken(AsmToken::Comma)) {
6182 return TokError(Twine("expected an even number of values in ") +
6184 }
6185 if (ParseAsAbsoluteExpression(Value)) {
6186 return TokError(Twine("invalid value in ") +
6188 }
6189 PALMetadata->setRegister(Key, Value);
6190 if (!trySkipToken(AsmToken::Comma))
6191 break;
6192 }
6193 return false;
6194}
6195
6196/// ParseDirectiveAMDGPULDS
6197/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6198bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6199 if (getParser().checkForValidSection())
6200 return true;
6201
6203 SMLoc NameLoc = getLoc();
6204 if (getParser().parseIdentifier(Name))
6205 return TokError("expected identifier in directive");
6206
6207 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6208 if (getParser().parseComma())
6209 return true;
6210
6211 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6212
6213 int64_t Size;
6214 SMLoc SizeLoc = getLoc();
6215 if (getParser().parseAbsoluteExpression(Size))
6216 return true;
6217 if (Size < 0)
6218 return Error(SizeLoc, "size must be non-negative");
6219 if (Size > LocalMemorySize)
6220 return Error(SizeLoc, "size is too large");
6221
6222 int64_t Alignment = 4;
6223 if (trySkipToken(AsmToken::Comma)) {
6224 SMLoc AlignLoc = getLoc();
6225 if (getParser().parseAbsoluteExpression(Alignment))
6226 return true;
6227 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6228 return Error(AlignLoc, "alignment must be a power of two");
6229
6230 // Alignment larger than the size of LDS is possible in theory, as long
6231 // as the linker manages to place to symbol at address 0, but we do want
6232 // to make sure the alignment fits nicely into a 32-bit integer.
6233 if (Alignment >= 1u << 31)
6234 return Error(AlignLoc, "alignment is too large");
6235 }
6236
6237 if (parseEOL())
6238 return true;
6239
6240 Symbol->redefineIfPossible();
6241 if (!Symbol->isUndefined())
6242 return Error(NameLoc, "invalid symbol redefinition");
6243
6244 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6245 return false;
6246}
6247
6248bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6249 StringRef IDVal = DirectiveID.getString();
6250
6251 if (isHsaAbi(getSTI())) {
6252 if (IDVal == ".amdhsa_kernel")
6253 return ParseDirectiveAMDHSAKernel();
6254
6255 if (IDVal == ".amdhsa_code_object_version")
6256 return ParseDirectiveAMDHSACodeObjectVersion();
6257
6258 // TODO: Restructure/combine with PAL metadata directive.
6260 return ParseDirectiveHSAMetadata();
6261 } else {
6262 if (IDVal == ".amd_kernel_code_t")
6263 return ParseDirectiveAMDKernelCodeT();
6264
6265 if (IDVal == ".amdgpu_hsa_kernel")
6266 return ParseDirectiveAMDGPUHsaKernel();
6267
6268 if (IDVal == ".amd_amdgpu_isa")
6269 return ParseDirectiveISAVersion();
6270
6272 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6273 Twine(" directive is "
6274 "not available on non-amdhsa OSes"))
6275 .str());
6276 }
6277 }
6278
6279 if (IDVal == ".amdgcn_target")
6280 return ParseDirectiveAMDGCNTarget();
6281
6282 if (IDVal == ".amdgpu_lds")
6283 return ParseDirectiveAMDGPULDS();
6284
6285 if (IDVal == PALMD::AssemblerDirectiveBegin)
6286 return ParseDirectivePALMetadataBegin();
6287
6288 if (IDVal == PALMD::AssemblerDirective)
6289 return ParseDirectivePALMetadata();
6290
6291 return true;
6292}
6293
6294bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6295 MCRegister Reg) {
6296 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6297 return isGFX9Plus();
6298
6299 // GFX10+ has 2 more SGPRs 104 and 105.
6300 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6301 return hasSGPR104_SGPR105();
6302
6303 switch (Reg.id()) {
6304 case SRC_SHARED_BASE_LO:
6305 case SRC_SHARED_BASE:
6306 case SRC_SHARED_LIMIT_LO:
6307 case SRC_SHARED_LIMIT:
6308 case SRC_PRIVATE_BASE_LO:
6309 case SRC_PRIVATE_BASE:
6310 case SRC_PRIVATE_LIMIT_LO:
6311 case SRC_PRIVATE_LIMIT:
6312 return isGFX9Plus();
6313 case SRC_POPS_EXITING_WAVE_ID:
6314 return isGFX9Plus() && !isGFX11Plus();
6315 case TBA:
6316 case TBA_LO:
6317 case TBA_HI:
6318 case TMA:
6319 case TMA_LO:
6320 case TMA_HI:
6321 return !isGFX9Plus();
6322 case XNACK_MASK:
6323 case XNACK_MASK_LO:
6324 case XNACK_MASK_HI:
6325 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6326 case SGPR_NULL:
6327 return isGFX10Plus();
6328 case SRC_EXECZ:
6329 case SRC_VCCZ:
6330 return !isGFX11Plus();
6331 default:
6332 break;
6333 }
6334
6335 if (isCI())
6336 return true;
6337
6338 if (isSI() || isGFX10Plus()) {
6339 // No flat_scr on SI.
6340 // On GFX10Plus flat scratch is not a valid register operand and can only be
6341 // accessed with s_setreg/s_getreg.
6342 switch (Reg.id()) {
6343 case FLAT_SCR:
6344 case FLAT_SCR_LO:
6345 case FLAT_SCR_HI:
6346 return false;
6347 default:
6348 return true;
6349 }
6350 }
6351
6352 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6353 // SI/CI have.
6354 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
6355 return hasSGPR102_SGPR103();
6356
6357 return true;
6358}
6359
6360ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6361 StringRef Mnemonic,
6362 OperandMode Mode) {
6363 ParseStatus Res = parseVOPD(Operands);
6364 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6365 return Res;
6366
6367 // Try to parse with a custom parser
6368 Res = MatchOperandParserImpl(Operands, Mnemonic);
6369
6370 // If we successfully parsed the operand or if there as an error parsing,
6371 // we are done.
6372 //
6373 // If we are parsing after we reach EndOfStatement then this means we
6374 // are appending default values to the Operands list. This is only done
6375 // by custom parser, so we shouldn't continue on to the generic parsing.
6376 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6377 return Res;
6378
6379 SMLoc RBraceLoc;
6380 SMLoc LBraceLoc = getLoc();
6381 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6382 unsigned Prefix = Operands.size();
6383
6384 for (;;) {
6385 auto Loc = getLoc();
6386 Res = parseReg(Operands);
6387 if (Res.isNoMatch())
6388 Error(Loc, "expected a register");
6389 if (!Res.isSuccess())
6390 return ParseStatus::Failure;
6391
6392 RBraceLoc = getLoc();
6393 if (trySkipToken(AsmToken::RBrac))
6394 break;
6395
6396 if (!skipToken(AsmToken::Comma,
6397 "expected a comma or a closing square bracket"))
6398 return ParseStatus::Failure;
6399 }
6400
6401 if (Operands.size() - Prefix > 1) {
6402 Operands.insert(Operands.begin() + Prefix,
6403 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6404 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6405 }
6406
6407 return ParseStatus::Success;
6408 }
6409
6410 return parseRegOrImm(Operands);
6411}
6412
6413StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6414 // Clear any forced encodings from the previous instruction.
6415 setForcedEncodingSize(0);
6416 setForcedDPP(false);
6417 setForcedSDWA(false);
6418
6419 if (Name.ends_with("_e64_dpp")) {
6420 setForcedDPP(true);
6421 setForcedEncodingSize(64);
6422 return Name.substr(0, Name.size() - 8);
6423 }
6424 if (Name.ends_with("_e64")) {
6425 setForcedEncodingSize(64);
6426 return Name.substr(0, Name.size() - 4);
6427 }
6428 if (Name.ends_with("_e32")) {
6429 setForcedEncodingSize(32);
6430 return Name.substr(0, Name.size() - 4);
6431 }
6432 if (Name.ends_with("_dpp")) {
6433 setForcedDPP(true);
6434 return Name.substr(0, Name.size() - 4);
6435 }
6436 if (Name.ends_with("_sdwa")) {
6437 setForcedSDWA(true);
6438 return Name.substr(0, Name.size() - 5);
6439 }
6440 return Name;
6441}
6442
6443static void applyMnemonicAliases(StringRef &Mnemonic,
6444 const FeatureBitset &Features,
6445 unsigned VariantID);
6446
6447bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6448 StringRef Name, SMLoc NameLoc,
6450 // Add the instruction mnemonic
6451 Name = parseMnemonicSuffix(Name);
6452
6453 // If the target architecture uses MnemonicAlias, call it here to parse
6454 // operands correctly.
6455 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6456
6457 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6458
6459 bool IsMIMG = Name.starts_with("image_");
6460
6461 while (!trySkipToken(AsmToken::EndOfStatement)) {
6462 OperandMode Mode = OperandMode_Default;
6463 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6464 Mode = OperandMode_NSA;
6465 ParseStatus Res = parseOperand(Operands, Name, Mode);
6466
6467 if (!Res.isSuccess()) {
6468 checkUnsupportedInstruction(Name, NameLoc);
6469 if (!Parser.hasPendingError()) {
6470 // FIXME: use real operand location rather than the current location.
6471 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6472 : "not a valid operand.";
6473 Error(getLoc(), Msg);
6474 }
6475 while (!trySkipToken(AsmToken::EndOfStatement)) {
6476 lex();
6477 }
6478 return true;
6479 }
6480
6481 // Eat the comma or space if there is one.
6482 trySkipToken(AsmToken::Comma);
6483 }
6484
6485 return false;
6486}
6487
6488//===----------------------------------------------------------------------===//
6489// Utility functions
6490//===----------------------------------------------------------------------===//
6491
6492ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6494 SMLoc S = getLoc();
6495 if (!trySkipId(Name))
6496 return ParseStatus::NoMatch;
6497
6498 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6499 return ParseStatus::Success;
6500}
6501
6502ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6503 int64_t &IntVal) {
6504
6505 if (!trySkipId(Prefix, AsmToken::Colon))
6506 return ParseStatus::NoMatch;
6507
6509}
6510
6511ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6512 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6513 std::function<bool(int64_t &)> ConvertResult) {
6514 SMLoc S = getLoc();
6515 int64_t Value = 0;
6516
6517 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6518 if (!Res.isSuccess())
6519 return Res;
6520
6521 if (ConvertResult && !ConvertResult(Value)) {
6522 Error(S, "invalid " + StringRef(Prefix) + " value.");
6523 }
6524
6525 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6526 return ParseStatus::Success;
6527}
6528
6529ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6530 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6531 bool (*ConvertResult)(int64_t &)) {
6532 SMLoc S = getLoc();
6533 if (!trySkipId(Prefix, AsmToken::Colon))
6534 return ParseStatus::NoMatch;
6535
6536 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6537 return ParseStatus::Failure;
6538
6539 unsigned Val = 0;
6540 const unsigned MaxSize = 4;
6541
6542 // FIXME: How to verify the number of elements matches the number of src
6543 // operands?
6544 for (int I = 0; ; ++I) {
6545 int64_t Op;
6546 SMLoc Loc = getLoc();
6547 if (!parseExpr(Op))
6548 return ParseStatus::Failure;
6549
6550 if (Op != 0 && Op != 1)
6551 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6552
6553 Val |= (Op << I);
6554
6555 if (trySkipToken(AsmToken::RBrac))
6556 break;
6557
6558 if (I + 1 == MaxSize)
6559 return Error(getLoc(), "expected a closing square bracket");
6560
6561 if (!skipToken(AsmToken::Comma, "expected a comma"))
6562 return ParseStatus::Failure;
6563 }
6564
6565 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6566 return ParseStatus::Success;
6567}
6568
6569ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6571 AMDGPUOperand::ImmTy ImmTy) {
6572 int64_t Bit;
6573 SMLoc S = getLoc();
6574
6575 if (trySkipId(Name)) {
6576 Bit = 1;
6577 } else if (trySkipId("no", Name)) {
6578 Bit = 0;
6579 } else {
6580 return ParseStatus::NoMatch;
6581 }
6582
6583 if (Name == "r128" && !hasMIMG_R128())
6584 return Error(S, "r128 modifier is not supported on this GPU");
6585 if (Name == "a16" && !hasA16())
6586 return Error(S, "a16 modifier is not supported on this GPU");
6587
6588 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6589 ImmTy = AMDGPUOperand::ImmTyR128A16;
6590
6591 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6592 return ParseStatus::Success;
6593}
6594
6595unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6596 bool &Disabling) const {
6597 Disabling = Id.consume_front("no");
6598
6599 if (isGFX940() && !Mnemo.starts_with("s_")) {
6600 return StringSwitch<unsigned>(Id)
6601 .Case("nt", AMDGPU::CPol::NT)
6602 .Case("sc0", AMDGPU::CPol::SC0)
6603 .Case("sc1", AMDGPU::CPol::SC1)
6604 .Default(0);
6605 }
6606
6607 return StringSwitch<unsigned>(Id)
6608 .Case("dlc", AMDGPU::CPol::DLC)
6609 .Case("glc", AMDGPU::CPol::GLC)
6610 .Case("scc", AMDGPU::CPol::SCC)
6611 .Case("slc", AMDGPU::CPol::SLC)
6612 .Default(0);
6613}
6614
6615ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6616 if (isGFX12Plus()) {
6617 SMLoc StringLoc = getLoc();
6618
6619 int64_t CPolVal = 0;
6622
6623 for (;;) {
6624 if (ResTH.isNoMatch()) {
6625 int64_t TH;
6626 ResTH = parseTH(Operands, TH);
6627 if (ResTH.isFailure())
6628 return ResTH;
6629 if (ResTH.isSuccess()) {
6630 CPolVal |= TH;
6631 continue;
6632 }
6633 }
6634
6635 if (ResScope.isNoMatch()) {
6636 int64_t Scope;
6637 ResScope = parseScope(Operands, Scope);
6638 if (ResScope.isFailure())
6639 return ResScope;
6640 if (ResScope.isSuccess()) {
6641 CPolVal |= Scope;
6642 continue;
6643 }
6644 }
6645
6646 break;
6647 }
6648
6649 if (ResTH.isNoMatch() && ResScope.isNoMatch())
6650 return ParseStatus::NoMatch;
6651
6652 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6653 AMDGPUOperand::ImmTyCPol));
6654 return ParseStatus::Success;
6655 }
6656
6657 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6658 SMLoc OpLoc = getLoc();
6659 unsigned Enabled = 0, Seen = 0;
6660 for (;;) {
6661 SMLoc S = getLoc();
6662 bool Disabling;
6663 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6664 if (!CPol)
6665 break;
6666
6667 lex();
6668
6669 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6670 return Error(S, "dlc modifier is not supported on this GPU");
6671
6672 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6673 return Error(S, "scc modifier is not supported on this GPU");
6674
6675 if (Seen & CPol)
6676 return Error(S, "duplicate cache policy modifier");
6677
6678 if (!Disabling)
6679 Enabled |= CPol;
6680
6681 Seen |= CPol;
6682 }
6683
6684 if (!Seen)
6685 return ParseStatus::NoMatch;
6686
6687 Operands.push_back(
6688 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6689 return ParseStatus::Success;
6690}
6691
6692ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6693 int64_t &Scope) {
6694 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
6696
6697 ParseStatus Res = parseStringOrIntWithPrefix(
6698 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
6699 Scope);
6700
6701 if (Res.isSuccess())
6702 Scope = Scopes[Scope];
6703
6704 return Res;
6705}
6706
6707ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6708 TH = AMDGPU::CPol::TH_RT; // default
6709
6711 SMLoc StringLoc;
6712 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
6713 if (!Res.isSuccess())
6714 return Res;
6715
6716 if (Value == "TH_DEFAULT")
6718 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
6719 Value == "TH_LOAD_NT_WB") {
6720 return Error(StringLoc, "invalid th value");
6721 } else if (Value.consume_front("TH_ATOMIC_")) {
6723 } else if (Value.consume_front("TH_LOAD_")) {
6725 } else if (Value.consume_front("TH_STORE_")) {
6727 } else {
6728 return Error(StringLoc, "invalid th value");
6729 }
6730
6731 if (Value == "BYPASS")
6733
6734 if (TH != 0) {
6741 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
6744 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
6746 .Default(0xffffffff);
6747 else
6753 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
6754 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
6755 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
6756 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
6757 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
6758 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
6759 .Default(0xffffffff);
6760 }
6761
6762 if (TH == 0xffffffff)
6763 return Error(StringLoc, "invalid th value");
6764
6765 return ParseStatus::Success;
6766}
6767
6769 MCInst& Inst, const OperandVector& Operands,
6770 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6771 AMDGPUOperand::ImmTy ImmT,
6772 int64_t Default = 0) {
6773 auto i = OptionalIdx.find(ImmT);
6774 if (i != OptionalIdx.end()) {
6775 unsigned Idx = i->second;
6776 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6777 } else {
6779 }
6780}
6781
6782ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6784 SMLoc &StringLoc) {
6785 if (!trySkipId(Prefix, AsmToken::Colon))
6786 return ParseStatus::NoMatch;
6787
6788 StringLoc = getLoc();
6789 return parseId(Value, "expected an identifier") ? ParseStatus::Success
6791}
6792
6793ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
6795 int64_t &IntVal) {
6796 if (!trySkipId(Name, AsmToken::Colon))
6797 return ParseStatus::NoMatch;
6798
6799 SMLoc StringLoc = getLoc();
6800
6802 if (isToken(AsmToken::Identifier)) {
6803 Value = getTokenStr();
6804 lex();
6805
6806 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
6807 if (Value == Ids[IntVal])
6808 break;
6809 } else if (!parseExpr(IntVal))
6810 return ParseStatus::Failure;
6811
6812 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
6813 return Error(StringLoc, "invalid " + Twine(Name) + " value");
6814
6815 return ParseStatus::Success;
6816}
6817
6818ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
6820 AMDGPUOperand::ImmTy Type) {
6821 SMLoc S = getLoc();
6822 int64_t IntVal;
6823
6824 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
6825 if (Res.isSuccess())
6826 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
6827
6828 return Res;
6829}
6830
6831//===----------------------------------------------------------------------===//
6832// MTBUF format
6833//===----------------------------------------------------------------------===//
6834
6835bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6836 int64_t MaxVal,
6837 int64_t &Fmt) {
6838 int64_t Val;
6839 SMLoc Loc = getLoc();
6840
6841 auto Res = parseIntWithPrefix(Pref, Val);
6842 if (Res.isFailure())
6843 return false;
6844 if (Res.isNoMatch())
6845 return true;
6846
6847 if (Val < 0 || Val > MaxVal) {
6848 Error(Loc, Twine("out of range ", StringRef(Pref)));
6849 return false;
6850 }
6851
6852 Fmt = Val;
6853 return true;
6854}
6855
6856ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
6857 AMDGPUOperand::ImmTy ImmTy) {
6858 const char *Pref = "index_key";
6859 int64_t ImmVal = 0;
6860 SMLoc Loc = getLoc();
6861 auto Res = parseIntWithPrefix(Pref, ImmVal);
6862 if (!Res.isSuccess())
6863 return Res;
6864
6865 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6866 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6867
6868 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6869 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6870
6871 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
6872 return ParseStatus::Success;
6873}
6874
6875ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
6876 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6877}
6878
6879ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
6880 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6881}
6882
6883// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6884// values to live in a joint format operand in the MCInst encoding.
6885ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6886 using namespace llvm::AMDGPU::MTBUFFormat;
6887
6888 int64_t Dfmt = DFMT_UNDEF;
6889 int64_t Nfmt = NFMT_UNDEF;
6890
6891 // dfmt and nfmt can appear in either order, and each is optional.
6892 for (int I = 0; I < 2; ++I) {
6893 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6894 return ParseStatus::Failure;
6895
6896 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6897 return ParseStatus::Failure;
6898
6899 // Skip optional comma between dfmt/nfmt
6900 // but guard against 2 commas following each other.
6901 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6902 !peekToken().is(AsmToken::Comma)) {
6903 trySkipToken(AsmToken::Comma);
6904 }
6905 }
6906
6907 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6908 return ParseStatus::NoMatch;
6909
6910 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6911 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6912
6913 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6914 return ParseStatus::Success;
6915}
6916
6917ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6918 using namespace llvm::AMDGPU::MTBUFFormat;
6919
6920 int64_t Fmt = UFMT_UNDEF;
6921
6922 if (!tryParseFmt("format", UFMT_MAX, Fmt))
6923 return ParseStatus::Failure;
6924
6925 if (Fmt == UFMT_UNDEF)
6926 return ParseStatus::NoMatch;
6927
6928 Format = Fmt;
6929 return ParseStatus::Success;
6930}
6931
6932bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6933 int64_t &Nfmt,
6934 StringRef FormatStr,
6935 SMLoc Loc) {
6936 using namespace llvm::AMDGPU::MTBUFFormat;
6937 int64_t Format;
6938
6939 Format = getDfmt(FormatStr);
6940 if (Format != DFMT_UNDEF) {
6941 Dfmt = Format;
6942 return true;
6943 }
6944
6945 Format = getNfmt(FormatStr, getSTI());
6946 if (Format != NFMT_UNDEF) {
6947 Nfmt = Format;
6948 return true;
6949 }
6950
6951 Error(Loc, "unsupported format");
6952 return false;
6953}
6954
6955ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6956 SMLoc FormatLoc,
6957 int64_t &Format) {
6958 using namespace llvm::AMDGPU::MTBUFFormat;
6959
6960 int64_t Dfmt = DFMT_UNDEF;
6961 int64_t Nfmt = NFMT_UNDEF;
6962 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6963 return ParseStatus::Failure;
6964
6965 if (trySkipToken(AsmToken::Comma)) {
6966 StringRef Str;
6967 SMLoc Loc = getLoc();
6968 if (!parseId(Str, "expected a format string") ||
6969 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6970 return ParseStatus::Failure;
6971 if (Dfmt == DFMT_UNDEF)
6972 return Error(Loc, "duplicate numeric format");
6973 if (Nfmt == NFMT_UNDEF)
6974 return Error(Loc, "duplicate data format");
6975 }
6976
6977 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6978 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6979
6980 if (isGFX10Plus()) {
6981 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6982 if (Ufmt == UFMT_UNDEF)
6983 return Error(FormatLoc, "unsupported format");
6984 Format = Ufmt;
6985 } else {
6986 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6987 }
6988
6989 return ParseStatus::Success;
6990}
6991
6992ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6993 SMLoc Loc,
6994 int64_t &Format) {
6995 using namespace llvm::AMDGPU::MTBUFFormat;
6996
6997 auto Id = getUnifiedFormat(FormatStr, getSTI());
6998 if (Id == UFMT_UNDEF)
6999 return ParseStatus::NoMatch;
7000
7001 if (!isGFX10Plus())
7002 return Error(Loc, "unified format is not supported on this GPU");
7003
7004 Format = Id;
7005 return ParseStatus::Success;
7006}
7007
7008ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7009 using namespace llvm::AMDGPU::MTBUFFormat;
7010 SMLoc Loc = getLoc();
7011
7012 if (!parseExpr(Format))
7013 return ParseStatus::Failure;
7014 if (!isValidFormatEncoding(Format, getSTI()))
7015 return Error(Loc, "out of range format");
7016
7017 return ParseStatus::Success;
7018}
7019
7020ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7021 using namespace llvm::AMDGPU::MTBUFFormat;
7022
7023 if (!trySkipId("format", AsmToken::Colon))
7024 return ParseStatus::NoMatch;
7025
7026 if (trySkipToken(AsmToken::LBrac)) {
7027 StringRef FormatStr;
7028 SMLoc Loc = getLoc();
7029 if (!parseId(FormatStr, "expected a format string"))
7030 return ParseStatus::Failure;
7031
7032 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7033 if (Res.isNoMatch())
7034 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7035 if (!Res.isSuccess())
7036 return Res;
7037
7038 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7039 return ParseStatus::Failure;
7040
7041 return ParseStatus::Success;
7042 }
7043
7044 return parseNumericFormat(Format);
7045}
7046
7047ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7048 using namespace llvm::AMDGPU::MTBUFFormat;
7049
7050 int64_t Format = getDefaultFormatEncoding(getSTI());
7051 ParseStatus Res;
7052 SMLoc Loc = getLoc();
7053
7054 // Parse legacy format syntax.
7055 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7056 if (Res.isFailure())
7057 return Res;
7058
7059 bool FormatFound = Res.isSuccess();
7060
7061 Operands.push_back(
7062 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7063
7064 if (FormatFound)
7065 trySkipToken(AsmToken::Comma);
7066
7067 if (isToken(AsmToken::EndOfStatement)) {
7068 // We are expecting an soffset operand,
7069 // but let matcher handle the error.
7070 return ParseStatus::Success;
7071 }
7072
7073 // Parse soffset.
7074 Res = parseRegOrImm(Operands);
7075 if (!Res.isSuccess())
7076 return Res;
7077
7078 trySkipToken(AsmToken::Comma);
7079
7080 if (!FormatFound) {
7081 Res = parseSymbolicOrNumericFormat(Format);
7082 if (Res.isFailure())
7083 return Res;
7084 if (Res.isSuccess()) {
7085 auto Size = Operands.size();
7086 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7087 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7088 Op.setImm(Format);
7089 }
7090 return ParseStatus::Success;
7091 }
7092
7093 if (isId("format") && peekToken().is(AsmToken::Colon))
7094 return Error(getLoc(), "duplicate format");
7095 return ParseStatus::Success;
7096}
7097
7098ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7099 ParseStatus Res =
7100 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7101 if (Res.isNoMatch()) {
7102 Res = parseIntWithPrefix("inst_offset", Operands,
7103 AMDGPUOperand::ImmTyInstOffset);
7104 }
7105 return Res;
7106}
7107
7108ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7109 ParseStatus Res =
7110 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7111 if (Res.isNoMatch())
7112 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7113 return Res;
7114}
7115
7116ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7117 ParseStatus Res =
7118 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7119 if (Res.isNoMatch()) {
7120 Res =
7121 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7122 }
7123 return Res;
7124}
7125
7126//===----------------------------------------------------------------------===//
7127// Exp
7128//===----------------------------------------------------------------------===//
7129
7130void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7131 OptionalImmIndexMap OptionalIdx;
7132
7133 unsigned OperandIdx[4];
7134 unsigned EnMask = 0;
7135 int SrcIdx = 0;
7136
7137 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7138 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7139
7140 // Add the register arguments
7141 if (Op.isReg()) {
7142 assert(SrcIdx < 4);
7143 OperandIdx[SrcIdx] = Inst.size();
7144 Op.addRegOperands(Inst, 1);
7145 ++SrcIdx;
7146 continue;
7147 }
7148
7149 if (Op.isOff()) {
7150 assert(SrcIdx < 4);
7151 OperandIdx[SrcIdx] = Inst.size();
7153 ++SrcIdx;
7154 continue;
7155 }
7156
7157 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7158 Op.addImmOperands(Inst, 1);
7159 continue;
7160 }
7161
7162 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7163 continue;
7164
7165 // Handle optional arguments
7166 OptionalIdx[Op.getImmTy()] = i;
7167 }
7168
7169 assert(SrcIdx == 4);
7170
7171 bool Compr = false;
7172 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7173 Compr = true;
7174 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7175 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7176 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7177 }
7178
7179 for (auto i = 0; i < SrcIdx; ++i) {
7180 if (Inst.getOperand(OperandIdx[i]).getReg()) {
7181 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7182 }
7183 }
7184
7185 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7186 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7187
7188 Inst.addOperand(MCOperand::createImm(EnMask));
7189}
7190
7191//===----------------------------------------------------------------------===//
7192// s_waitcnt
7193//===----------------------------------------------------------------------===//
7194
7195static bool
7197 const AMDGPU::IsaVersion ISA,
7198 int64_t &IntVal,
7199 int64_t CntVal,
7200 bool Saturate,
7201 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7202 unsigned (*decode)(const IsaVersion &Version, unsigned))
7203{
7204 bool Failed = false;
7205
7206 IntVal = encode(ISA, IntVal, CntVal);
7207 if (CntVal != decode(ISA, IntVal)) {
7208 if (Saturate) {
7209 IntVal = encode(ISA, IntVal, -1);
7210 } else {
7211 Failed = true;
7212 }
7213 }
7214 return Failed;
7215}
7216
7217bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7218
7219 SMLoc CntLoc = getLoc();
7220 StringRef CntName = getTokenStr();
7221
7222 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7223 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7224 return false;
7225
7226 int64_t CntVal;
7227 SMLoc ValLoc = getLoc();
7228 if (!parseExpr(CntVal))
7229 return false;
7230
7232
7233 bool Failed = true;
7234 bool Sat = CntName.ends_with("_sat");
7235
7236 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7237 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7238 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7239 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7240 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7241 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7242 } else {
7243 Error(CntLoc, "invalid counter name " + CntName);
7244 return false;
7245 }
7246
7247 if (Failed) {
7248 Error(ValLoc, "too large value for " + CntName);
7249 return false;
7250 }
7251
7252 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7253 return false;
7254
7255 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7256 if (isToken(AsmToken::EndOfStatement)) {
7257 Error(getLoc(), "expected a counter name");
7258 return false;
7259 }
7260 }
7261
7262 return true;
7263}
7264
7265ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7267 int64_t Waitcnt = getWaitcntBitMask(ISA);
7268 SMLoc S = getLoc();
7269
7270 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7271 while (!isToken(AsmToken::EndOfStatement)) {
7272 if (!parseCnt(Waitcnt))
7273 return ParseStatus::Failure;
7274 }
7275 } else {
7276 if (!parseExpr(Waitcnt))
7277 return ParseStatus::Failure;
7278 }
7279
7280 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7281 return ParseStatus::Success;
7282}
7283
7284bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7285 SMLoc FieldLoc = getLoc();
7286 StringRef FieldName = getTokenStr();
7287 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7288 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7289 return false;
7290
7291 SMLoc ValueLoc = getLoc();
7292 StringRef ValueName = getTokenStr();
7293 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7294 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7295 return false;
7296
7297 unsigned Shift;
7298 if (FieldName == "instid0") {
7299 Shift = 0;
7300 } else if (FieldName == "instskip") {
7301 Shift = 4;
7302 } else if (FieldName == "instid1") {
7303 Shift = 7;
7304 } else {
7305 Error(FieldLoc, "invalid field name " + FieldName);
7306 return false;
7307 }
7308
7309 int Value;
7310 if (Shift == 4) {
7311 // Parse values for instskip.
7313 .Case("SAME", 0)
7314 .Case("NEXT", 1)
7315 .Case("SKIP_1", 2)
7316 .Case("SKIP_2", 3)
7317 .Case("SKIP_3", 4)
7318 .Case("SKIP_4", 5)
7319 .Default(-1);
7320 } else {
7321 // Parse values for instid0 and instid1.
7323 .Case("NO_DEP", 0)
7324 .Case("VALU_DEP_1", 1)
7325 .Case("VALU_DEP_2", 2)
7326 .Case("VALU_DEP_3", 3)
7327 .Case("VALU_DEP_4", 4)
7328 .Case("TRANS32_DEP_1", 5)
7329 .Case("TRANS32_DEP_2", 6)
7330 .Case("TRANS32_DEP_3", 7)
7331 .Case("FMA_ACCUM_CYCLE_1", 8)
7332 .Case("SALU_CYCLE_1", 9)
7333 .Case("SALU_CYCLE_2", 10)
7334 .Case("SALU_CYCLE_3", 11)
7335 .Default(-1);
7336 }
7337 if (Value < 0) {
7338 Error(ValueLoc, "invalid value name " + ValueName);
7339 return false;
7340 }
7341
7342 Delay |= Value << Shift;
7343 return true;
7344}
7345
7346ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7347 int64_t Delay = 0;
7348 SMLoc S = getLoc();
7349
7350 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7351 do {
7352 if (!parseDelay(Delay))
7353 return ParseStatus::Failure;
7354 } while (trySkipToken(AsmToken::Pipe));
7355 } else {
7356 if (!parseExpr(Delay))
7357 return ParseStatus::Failure;
7358 }
7359
7360 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7361 return ParseStatus::Success;
7362}
7363
7364bool
7365AMDGPUOperand::isSWaitCnt() const {
7366 return isImm();
7367}
7368
7369bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7370
7371//===----------------------------------------------------------------------===//
7372// DepCtr
7373//===----------------------------------------------------------------------===//
7374
7375void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7376 StringRef DepCtrName) {
7377 switch (ErrorId) {
7378 case OPR_ID_UNKNOWN:
7379 Error(Loc, Twine("invalid counter name ", DepCtrName));
7380 return;
7381 case OPR_ID_UNSUPPORTED:
7382 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7383 return;
7384 case OPR_ID_DUPLICATE:
7385 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7386 return;
7387 case OPR_VAL_INVALID:
7388 Error(Loc, Twine("invalid value for ", DepCtrName));
7389 return;
7390 default:
7391 assert(false);
7392 }
7393}
7394
7395bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7396
7397 using namespace llvm::AMDGPU::DepCtr;
7398
7399 SMLoc DepCtrLoc = getLoc();
7400 StringRef DepCtrName = getTokenStr();
7401
7402 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7403 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7404 return false;
7405
7406 int64_t ExprVal;
7407 if (!parseExpr(ExprVal))
7408 return false;
7409
7410 unsigned PrevOprMask = UsedOprMask;
7411 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7412
7413 if (CntVal < 0) {
7414 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7415 return false;
7416 }
7417
7418 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7419 return false;
7420
7421 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7422 if (isToken(AsmToken::EndOfStatement)) {
7423 Error(getLoc(), "expected a counter name");
7424 return false;
7425 }
7426 }
7427
7428 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7429 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7430 return true;
7431}
7432
7433ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7434 using namespace llvm::AMDGPU::DepCtr;
7435
7436 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7437 SMLoc Loc = getLoc();
7438
7439 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7440 unsigned UsedOprMask = 0;
7441 while (!isToken(AsmToken::EndOfStatement)) {
7442 if (!parseDepCtr(DepCtr, UsedOprMask))
7443 return ParseStatus::Failure;
7444 }
7445 } else {
7446 if (!parseExpr(DepCtr))
7447 return ParseStatus::Failure;
7448 }
7449
7450 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7451 return ParseStatus::Success;
7452}
7453
7454bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7455
7456//===----------------------------------------------------------------------===//
7457// hwreg
7458//===----------------------------------------------------------------------===//
7459
7460ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7461 OperandInfoTy &Offset,
7462 OperandInfoTy &Width) {
7463 using namespace llvm::AMDGPU::Hwreg;
7464
7465 if (!trySkipId("hwreg", AsmToken::LParen))
7466 return ParseStatus::NoMatch;
7467
7468 // The register may be specified by name or using a numeric code
7469 HwReg.Loc = getLoc();
7470 if (isToken(AsmToken::Identifier) &&
7471 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7472 HwReg.IsSymbolic = true;
7473 lex(); // skip register name
7474 } else if (!parseExpr(HwReg.Val, "a register name")) {
7475 return ParseStatus::Failure;
7476 }
7477
7478 if (trySkipToken(AsmToken::RParen))
7479 return ParseStatus::Success;
7480
7481 // parse optional params
7482 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7483 return ParseStatus::Failure;
7484
7485 Offset.Loc = getLoc();
7486 if (!parseExpr(Offset.Val))
7487 return ParseStatus::Failure;
7488
7489 if (!skipToken(AsmToken::Comma, "expected a comma"))
7490 return ParseStatus::Failure;
7491
7492 Width.Loc = getLoc();
7493 if (!parseExpr(Width.Val) ||
7494 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7495 return ParseStatus::Failure;
7496
7497 return ParseStatus::Success;
7498}
7499
7500ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7501 using namespace llvm::AMDGPU::Hwreg;
7502
7503 int64_t ImmVal = 0;
7504 SMLoc Loc = getLoc();
7505
7506 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
7507 HwregId::Default);
7508 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
7509 HwregOffset::Default);
7510 struct : StructuredOpField {
7511 using StructuredOpField::StructuredOpField;
7512 bool validate(AMDGPUAsmParser &Parser) const override {
7513 if (!isUIntN(Width, Val - 1))
7514 return Error(Parser, "only values from 1 to 32 are legal");
7515 return true;
7516 }
7517 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
7518 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
7519
7520 if (Res.isNoMatch())
7521 Res = parseHwregFunc(HwReg, Offset, Width);
7522
7523 if (Res.isSuccess()) {
7524 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
7525 return ParseStatus::Failure;
7526 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
7527 }
7528
7529 if (Res.isNoMatch() &&
7530 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
7532
7533 if (!Res.isSuccess())
7534 return ParseStatus::Failure;
7535
7536 if (!isUInt<16>(ImmVal))
7537 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7538 Operands.push_back(
7539 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7540 return ParseStatus::Success;
7541}
7542
7543bool AMDGPUOperand::isHwreg() const {
7544 return isImmTy(ImmTyHwreg);
7545}
7546
7547//===----------------------------------------------------------------------===//
7548// sendmsg
7549//===----------------------------------------------------------------------===//
7550
7551bool
7552AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7553 OperandInfoTy &Op,
7554 OperandInfoTy &Stream) {
7555 using namespace llvm::AMDGPU::SendMsg;
7556
7557 Msg.Loc = getLoc();
7558 if (isToken(AsmToken::Identifier) &&
7559 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7560 Msg.IsSymbolic = true;
7561 lex(); // skip message name
7562 } else if (!parseExpr(Msg.Val, "a message name")) {
7563 return false;
7564 }
7565
7566 if (trySkipToken(AsmToken::Comma)) {
7567 Op.IsDefined = true;
7568 Op.Loc = getLoc();
7569 if (isToken(AsmToken::Identifier) &&
7570 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
7572 lex(); // skip operation name
7573 } else if (!parseExpr(Op.Val, "an operation name")) {
7574 return false;
7575 }
7576
7577 if (trySkipToken(AsmToken::Comma)) {
7578 Stream.IsDefined = true;
7579 Stream.Loc = getLoc();
7580 if (!parseExpr(Stream.Val))
7581 return false;
7582 }
7583 }
7584
7585 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7586}
7587
7588bool
7589AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7590 const OperandInfoTy &Op,
7591 const OperandInfoTy &Stream) {
7592 using namespace llvm::AMDGPU::SendMsg;
7593
7594 // Validation strictness depends on whether message is specified
7595 // in a symbolic or in a numeric form. In the latter case
7596 // only encoding possibility is checked.
7597 bool Strict = Msg.IsSymbolic;
7598
7599 if (Strict) {
7600 if (Msg.Val == OPR_ID_UNSUPPORTED) {
7601 Error(Msg.Loc, "specified message id is not supported on this GPU");
7602 return false;
7603 }
7604 } else {
7605 if (!isValidMsgId(Msg.Val, getSTI())) {
7606 Error(Msg.Loc, "invalid message id");
7607 return false;
7608 }
7609 }
7610 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
7611 if (Op.IsDefined) {
7612 Error(Op.Loc, "message does not support operations");
7613 } else {
7614 Error(Msg.Loc, "missing message operation");
7615 }
7616 return false;
7617 }
7618 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
7619 if (Op.Val == OPR_ID_UNSUPPORTED)
7620 Error(Op.Loc, "specified operation id is not supported on this GPU");
7621 else
7622 Error(Op.Loc, "invalid operation id");
7623 return false;
7624 }
7625 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
7626 Stream.IsDefined) {
7627 Error(Stream.Loc, "message operation does not support streams");
7628 return false;
7629 }
7630 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
7631 Error(Stream.Loc, "invalid message stream id");
7632 return false;
7633 }
7634 return true;
7635}
7636
7637ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7638 using namespace llvm::AMDGPU::SendMsg;
7639
7640 int64_t ImmVal = 0;
7641 SMLoc Loc = getLoc();
7642
7643 if (trySkipId("sendmsg", AsmToken::LParen)) {
7644 OperandInfoTy Msg(OPR_ID_UNKNOWN);
7645 OperandInfoTy Op(OP_NONE_);
7646 OperandInfoTy Stream(STREAM_ID_NONE_);
7647 if (parseSendMsgBody(Msg, Op, Stream) &&
7648 validateSendMsg(Msg, Op, Stream)) {
7649 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
7650 } else {
7651 return ParseStatus::Failure;
7652 }
7653 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7654 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7655 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7656 } else {
7657 return ParseStatus::Failure;
7658 }
7659
7660 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7661 return ParseStatus::Success;
7662}
7663
7664bool AMDGPUOperand::isSendMsg() const {
7665 return isImmTy(ImmTySendMsg);
7666}
7667
7668//===----------------------------------------------------------------------===//
7669// v_interp
7670//===----------------------------------------------------------------------===//
7671
7672ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7673 StringRef Str;
7674 SMLoc S = getLoc();
7675
7676 if (!parseId(Str))
7677 return ParseStatus::NoMatch;
7678
7679 int Slot = StringSwitch<int>(Str)
7680 .Case("p10", 0)
7681 .Case("p20", 1)
7682 .Case("p0", 2)
7683 .Default(-1);
7684
7685 if (Slot == -1)
7686 return Error(S, "invalid interpolation slot");
7687
7688 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
7689 AMDGPUOperand::ImmTyInterpSlot));
7690 return ParseStatus::Success;
7691}
7692
7693ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7694 StringRef Str;
7695 SMLoc S = getLoc();
7696
7697 if (!parseId(Str))
7698 return ParseStatus::NoMatch;
7699
7700 if (!Str.starts_with("attr"))
7701 return Error(S, "invalid interpolation attribute");
7702
7703 StringRef Chan = Str.take_back(2);
7704 int AttrChan = StringSwitch<int>(Chan)
7705 .Case(".x", 0)
7706 .Case(".y", 1)
7707 .Case(".z", 2)
7708 .Case(".w", 3)
7709 .Default(-1);
7710 if (AttrChan == -1)
7711 return Error(S, "invalid or missing interpolation attribute channel");
7712
7713 Str = Str.drop_back(2).drop_front(4);
7714
7715 uint8_t Attr;
7716 if (Str.getAsInteger(10, Attr))
7717 return Error(S, "invalid or missing interpolation attribute number");
7718
7719 if (Attr > 32)
7720 return Error(S, "out of bounds interpolation attribute number");
7721
7722 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
7723
7724 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
7725 AMDGPUOperand::ImmTyInterpAttr));
7726 Operands.push_back(AMDGPUOperand::CreateImm(
7727 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7728 return ParseStatus::Success;
7729}
7730
7731//===----------------------------------------------------------------------===//
7732// exp
7733//===----------------------------------------------------------------------===//
7734
7735ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7736 using namespace llvm::AMDGPU::Exp;
7737
7738 StringRef Str;
7739 SMLoc S = getLoc();
7740
7741 if (!parseId(Str))
7742 return ParseStatus::NoMatch;
7743
7744 unsigned Id = getTgtId(Str);
7745 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
7746 return Error(S, (Id == ET_INVALID)
7747 ? "invalid exp target"
7748 : "exp target is not supported on this GPU");
7749
7750 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
7751 AMDGPUOperand::ImmTyExpTgt));
7752 return ParseStatus::Success;
7753}
7754
7755//===----------------------------------------------------------------------===//
7756// parser helpers
7757//===----------------------------------------------------------------------===//
7758
7759bool
7760AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7761 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
7762}
7763
7764bool
7765AMDGPUAsmParser::isId(const StringRef Id) const {
7766 return isId(getToken(), Id);
7767}
7768
7769bool
7770AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7771 return getTokenKind() == Kind;
7772}
7773
7774StringRef AMDGPUAsmParser::getId() const {
7775 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7776}
7777
7778bool
7779AMDGPUAsmParser::trySkipId(const StringRef Id) {
7780 if (isId(Id)) {
7781 lex();
7782 return true;
7783 }
7784 return false;
7785}
7786
7787bool
7788AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7789 if (isToken(AsmToken::Identifier)) {
7790 StringRef Tok = getTokenStr();
7791 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
7792 lex();
7793 return true;
7794 }
7795 }
7796 return false;
7797}
7798
7799bool
7800AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7801 if (isId(Id) && peekToken().is(Kind)) {
7802 lex();
7803 lex();
7804 return true;
7805 }
7806 return false;
7807}
7808
7809bool
7810AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7811 if (isToken(Kind)) {
7812 lex();
7813 return true;
7814 }
7815 return false;
7816}
7817
7818bool
7819AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7820 const StringRef ErrMsg) {
7821 if (!trySkipToken(Kind)) {
7822 Error(getLoc(), ErrMsg);
7823 return false;
7824 }
7825 return true;
7826}
7827
7828bool
7829AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7830 SMLoc S = getLoc();
7831
7832 const MCExpr *Expr;
7833 if (Parser.parseExpression(Expr))
7834 return false;
7835
7836 if (Expr->evaluateAsAbsolute(Imm))
7837 return true;
7838
7839 if (Expected.empty()) {
7840 Error(S, "expected absolute expression");
7841 } else {
7842 Error(S, Twine("expected ", Expected) +
7843 Twine(" or an absolute expression"));
7844 }
7845 return false;
7846}
7847
7848bool
7849AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7850 SMLoc S = getLoc();
7851
7852 const MCExpr *Expr;
7853 if (Parser.parseExpression(Expr))
7854 return false;
7855
7856 int64_t IntVal;
7857 if (Expr->evaluateAsAbsolute(IntVal)) {
7858 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7859 } else {
7860 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7861 }
7862 return true;
7863}
7864
7865bool
7866AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7867 if (isToken(AsmToken::String)) {
7868 Val = getToken().getStringContents();
7869 lex();
7870 return true;
7871 }
7872 Error(getLoc(), ErrMsg);
7873 return false;
7874}
7875
7876bool
7877AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7878 if (isToken(AsmToken::Identifier)) {
7879 Val = getTokenStr();
7880 lex();
7881 return true;
7882 }
7883 if (!ErrMsg.empty())
7884 Error(getLoc(), ErrMsg);
7885 return false;
7886}
7887
7889AMDGPUAsmParser::getToken() const {
7890 return Parser.getTok();
7891}
7892
7893AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7894 return isToken(AsmToken::EndOfStatement)
7895 ? getToken()
7896 : getLexer().peekTok(ShouldSkipSpace);
7897}
7898
7899void
7900AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7901 auto TokCount = getLexer().peekTokens(Tokens);
7902
7903 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7904 Tokens[Idx] = AsmToken(AsmToken::Error, "");
7905}
7906
7908AMDGPUAsmParser::getTokenKind() const {
7909 return getLexer().getKind();
7910}
7911
7912SMLoc
7913AMDGPUAsmParser::getLoc() const {
7914 return getToken().getLoc();
7915}
7916
7918AMDGPUAsmParser::getTokenStr() const {
7919 return getToken().getString();
7920}
7921
7922void
7923AMDGPUAsmParser::lex() {
7924 Parser.Lex();
7925}
7926
7927SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7928 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7929}
7930
7931SMLoc
7932AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7933 const OperandVector &Operands) const {
7934 for (unsigned i = Operands.size() - 1; i > 0; --i) {
7935 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7936 if (Test(Op))
7937 return Op.getStartLoc();
7938 }
7939 return getInstLoc(Operands);
7940}
7941
7942SMLoc
7943AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7944 const OperandVector &Operands) const {
7945 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7946 return getOperandLoc(Test, Operands);
7947}
7948
7949SMLoc AMDGPUAsmParser::getRegLoc(MCRegister Reg,
7950 const OperandVector &Operands) const {
7951 auto Test = [=](const AMDGPUOperand& Op) {
7952 return Op.isRegKind() && Op.getReg() == Reg;
7953 };
7954 return getOperandLoc(Test, Operands);
7955}
7956
7957SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7958 bool SearchMandatoryLiterals) const {
7959 auto Test = [](const AMDGPUOperand& Op) {
7960 return Op.IsImmKindLiteral() || Op.isExpr();
7961 };
7962 SMLoc Loc = getOperandLoc(Test, Operands);
7963 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7964 Loc = getMandatoryLitLoc(Operands);
7965 return Loc;
7966}
7967
7968SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7969 auto Test = [](const AMDGPUOperand &Op) {
7970 return Op.IsImmKindMandatoryLiteral();
7971 };
7972 return getOperandLoc(Test, Operands);
7973}
7974
7975SMLoc
7976AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7977 auto Test = [](const AMDGPUOperand& Op) {
7978 return Op.isImmKindConst();
7979 };
7980 return getOperandLoc(Test, Operands);
7981}
7982
7984AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
7985 if (!trySkipToken(AsmToken::LCurly))
7986 return ParseStatus::NoMatch;
7987
7988 bool First = true;
7989 while (!trySkipToken(AsmToken::RCurly)) {
7990 if (!First &&
7991 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
7992 return ParseStatus::Failure;
7993
7994 StringRef Id = getTokenStr();
7995 SMLoc IdLoc = getLoc();
7996 if (!skipToken(AsmToken::Identifier, "field name expected") ||
7997 !skipToken(AsmToken::Colon, "colon expected"))
7998 return ParseStatus::Failure;
7999
8000 const auto *I =
8001 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8002 if (I == Fields.end())
8003 return Error(IdLoc, "unknown field");
8004 if ((*I)->IsDefined)
8005 return Error(IdLoc, "duplicate field");
8006
8007 // TODO: Support symbolic values.
8008 (*I)->Loc = getLoc();
8009 if (!parseExpr((*I)->Val))
8010 return ParseStatus::Failure;
8011 (*I)->IsDefined = true;
8012
8013 First = false;
8014 }
8015 return ParseStatus::Success;
8016}
8017
8018bool AMDGPUAsmParser::validateStructuredOpFields(
8020 return all_of(Fields, [this](const StructuredOpField *F) {
8021 return F->validate(*this);
8022 });
8023}
8024
8025//===----------------------------------------------------------------------===//
8026// swizzle
8027//===----------------------------------------------------------------------===//
8028
8030static unsigned
8031encodeBitmaskPerm(const unsigned AndMask,
8032 const unsigned OrMask,
8033 const unsigned XorMask) {
8034 using namespace llvm::AMDGPU::Swizzle;
8035
8036 return BITMASK_PERM_ENC |
8037 (AndMask << BITMASK_AND_SHIFT) |
8038 (OrMask << BITMASK_OR_SHIFT) |
8039 (XorMask << BITMASK_XOR_SHIFT);
8040}
8041
8042bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8043 const unsigned MaxVal,
8044 const Twine &ErrMsg, SMLoc &Loc) {
8045 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8046 return false;
8047 }
8048 Loc = getLoc();
8049 if (!parseExpr(Op)) {
8050 return false;
8051 }
8052 if (Op < MinVal || Op > MaxVal) {
8053 Error(Loc, ErrMsg);
8054 return false;
8055 }
8056
8057 return true;
8058}
8059
8060bool
8061AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8062 const unsigned MinVal,
8063 const unsigned MaxVal,
8064 const StringRef ErrMsg) {
8065 SMLoc Loc;
8066 for (unsigned i = 0; i < OpNum; ++i) {
8067 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8068 return false;
8069 }
8070
8071 return true;
8072}
8073
8074bool
8075AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8076 using namespace llvm::AMDGPU::Swizzle;
8077
8078 int64_t Lane[LANE_NUM];
8079 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8080 "expected a 2-bit lane id")) {
8082 for (unsigned I = 0; I < LANE_NUM; ++I) {
8083 Imm |= Lane[I] << (LANE_SHIFT * I);
8084 }
8085 return true;
8086 }
8087 return false;
8088}
8089
8090bool
8091AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8092 using namespace llvm::AMDGPU::Swizzle;
8093
8094 SMLoc Loc;
8095 int64_t GroupSize;
8096 int64_t LaneIdx;
8097
8098 if (!parseSwizzleOperand(GroupSize,
8099 2, 32,
8100 "group size must be in the interval [2,32]",
8101 Loc)) {
8102 return false;
8103 }
8104 if (!isPowerOf2_64(GroupSize)) {
8105 Error(Loc, "group size must be a power of two");
8106 return false;
8107 }
8108 if (parseSwizzleOperand(LaneIdx,
8109 0, GroupSize - 1,
8110 "lane id must be in the interval [0,group size - 1]",
8111 Loc)) {
8112 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8113 return true;
8114 }
8115 return false;
8116}
8117
8118bool
8119AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8120 using namespace llvm::AMDGPU::Swizzle;
8121
8122 SMLoc Loc;
8123 int64_t GroupSize;
8124
8125 if (!parseSwizzleOperand(GroupSize,
8126 2, 32,
8127 "group size must be in the interval [2,32]",
8128 Loc)) {
8129 return false;
8130 }
8131 if (!isPowerOf2_64(GroupSize)) {
8132 Error(Loc, "group size must be a power of two");
8133 return false;
8134 }
8135
8136 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8137 return true;
8138}
8139
8140bool
8141AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8142 using namespace llvm::AMDGPU::Swizzle;
8143
8144 SMLoc Loc;
8145 int64_t GroupSize;
8146
8147 if (!parseSwizzleOperand(GroupSize,
8148 1, 16,
8149 "group size must be in the interval [1,16]",
8150 Loc)) {
8151 return false;
8152 }
8153 if (!isPowerOf2_64(GroupSize)) {
8154 Error(Loc, "group size must be a power of two");
8155 return false;
8156 }
8157
8158 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8159 return true;
8160}
8161
8162bool
8163AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8164 using namespace llvm::AMDGPU::Swizzle;
8165
8166 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8167 return false;
8168 }
8169
8170 StringRef Ctl;
8171 SMLoc StrLoc = getLoc();
8172 if (!parseString(Ctl)) {
8173 return false;
8174 }
8175 if (Ctl.size() != BITMASK_WIDTH) {
8176 Error(StrLoc, "expected a 5-character mask");
8177 return false;
8178 }
8179
8180 unsigned AndMask = 0;
8181 unsigned OrMask = 0;
8182 unsigned XorMask = 0;
8183
8184 for (size_t i = 0; i < Ctl.size(); ++i) {
8185 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8186 switch(Ctl[i]) {
8187 default:
8188 Error(StrLoc, "invalid mask");
8189 return false;
8190 case '0':
8191 break;
8192 case '1':
8193 OrMask |= Mask;
8194 break;
8195 case 'p':
8196 AndMask |= Mask;
8197 break;
8198 case 'i':
8199 AndMask |= Mask;
8200 XorMask |= Mask;
8201 break;
8202 }
8203 }
8204
8205 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8206 return true;
8207}
8208
8209bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8210 using namespace llvm::AMDGPU::Swizzle;
8211
8212 if (!AMDGPU::isGFX9Plus(getSTI())) {
8213 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
8214 return false;
8215 }
8216
8217 int64_t Swizzle;
8218 SMLoc Loc;
8219 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8220 "FFT swizzle must be in the interval [0," +
8221 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8222 Loc))
8223 return false;
8224
8225 Imm = FFT_MODE_ENC | Swizzle;
8226 return true;
8227}
8228
8229bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8230 using namespace llvm::AMDGPU::Swizzle;
8231
8232 if (!AMDGPU::isGFX9Plus(getSTI())) {
8233 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
8234 return false;
8235 }
8236
8237 SMLoc Loc;
8238 int64_t Direction;
8239
8240 if (!parseSwizzleOperand(Direction, 0, 1,
8241 "direction must be 0 (left) or 1 (right)", Loc))
8242 return false;
8243
8244 int64_t RotateSize;
8245 if (!parseSwizzleOperand(
8246 RotateSize, 0, ROTATE_MAX_SIZE,
8247 "number of threads to rotate must be in the interval [0," +
8248 Twine(ROTATE_MAX_SIZE) + Twine(']'),
8249 Loc))
8250 return false;
8251
8253 (RotateSize << ROTATE_SIZE_SHIFT);
8254 return true;
8255}
8256
8257bool
8258AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8259
8260 SMLoc OffsetLoc = getLoc();
8261
8262 if (!parseExpr(Imm, "a swizzle macro")) {
8263 return false;
8264 }
8265 if (!isUInt<16>(Imm)) {
8266 Error(OffsetLoc, "expected a 16-bit offset");
8267 return false;
8268 }
8269 return true;
8270}
8271
8272bool
8273AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8274 using namespace llvm::AMDGPU::Swizzle;
8275
8276 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8277
8278 SMLoc ModeLoc = getLoc();
8279 bool Ok = false;
8280
8281 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8282 Ok = parseSwizzleQuadPerm(Imm);
8283 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8284 Ok = parseSwizzleBitmaskPerm(Imm);
8285 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8286 Ok = parseSwizzleBroadcast(Imm);
8287 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8288 Ok = parseSwizzleSwap(Imm);
8289 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8290 Ok = parseSwizzleReverse(Imm);
8291 } else if (trySkipId(IdSymbolic[ID_FFT])) {
8292 Ok = parseSwizzleFFT(Imm);
8293 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8294 Ok = parseSwizzleRotate(Imm);
8295 } else {
8296 Error(ModeLoc, "expected a swizzle mode");
8297 }
8298
8299 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8300 }
8301
8302 return false;
8303}
8304
8305ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8306 SMLoc S = getLoc();
8307 int64_t Imm = 0;
8308
8309 if (trySkipId("offset")) {
8310
8311 bool Ok = false;
8312 if (skipToken(AsmToken::Colon, "expected a colon")) {
8313 if (trySkipId("swizzle")) {
8314 Ok = parseSwizzleMacro(Imm);
8315 } else {
8316 Ok = parseSwizzleOffset(Imm);
8317 }
8318 }
8319
8320 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8321
8323 }
8324 return ParseStatus::NoMatch;
8325}
8326
8327bool
8328AMDGPUOperand::isSwizzle() const {
8329 return isImmTy(ImmTySwizzle);
8330}
8331
8332//===----------------------------------------------------------------------===//
8333// VGPR Index Mode
8334//===----------------------------------------------------------------------===//
8335
8336int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8337
8338 using namespace llvm::AMDGPU::VGPRIndexMode;
8339
8340 if (trySkipToken(AsmToken::RParen)) {
8341 return OFF;
8342 }
8343
8344 int64_t Imm = 0;
8345
8346 while (true) {
8347 unsigned Mode = 0;
8348 SMLoc S = getLoc();
8349
8350 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8351 if (trySkipId(IdSymbolic[ModeId])) {
8352 Mode = 1 << ModeId;
8353 break;
8354 }
8355 }
8356
8357 if (Mode == 0) {
8358 Error(S, (Imm == 0)?
8359 "expected a VGPR index mode or a closing parenthesis" :
8360 "expected a VGPR index mode");
8361 return UNDEF;
8362 }
8363
8364 if (Imm & Mode) {
8365 Error(S, "duplicate VGPR index mode");
8366 return UNDEF;
8367 }
8368 Imm |= Mode;
8369
8370 if (trySkipToken(AsmToken::RParen))
8371 break;
8372 if (!skipToken(AsmToken::Comma,
8373 "expected a comma or a closing parenthesis"))
8374 return UNDEF;
8375 }
8376
8377 return Imm;
8378}
8379
8380ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8381
8382 using namespace llvm::AMDGPU::VGPRIndexMode;
8383
8384 int64_t Imm = 0;
8385 SMLoc S = getLoc();
8386
8387 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8388 Imm = parseGPRIdxMacro();
8389 if (Imm == UNDEF)
8390 return ParseStatus::Failure;
8391 } else {
8392 if (getParser().parseAbsoluteExpression(Imm))
8393 return ParseStatus::Failure;
8394 if (Imm < 0 || !isUInt<4>(Imm))
8395 return Error(S, "invalid immediate: only 4-bit values are legal");
8396 }
8397
8398 Operands.push_back(
8399 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8400 return ParseStatus::Success;
8401}
8402
8403bool AMDGPUOperand::isGPRIdxMode() const {
8404 return isImmTy(ImmTyGprIdxMode);
8405}
8406
8407//===----------------------------------------------------------------------===//
8408// sopp branch targets
8409//===----------------------------------------------------------------------===//
8410
8411ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8412
8413 // Make sure we are not parsing something
8414 // that looks like a label or an expression but is not.
8415 // This will improve error messages.
8416 if (isRegister() || isModifier())
8417 return ParseStatus::NoMatch;
8418
8419 if (!parseExpr(Operands))
8420 return ParseStatus::Failure;
8421
8422 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8423 assert(Opr.isImm() || Opr.isExpr());
8424 SMLoc Loc = Opr.getStartLoc();
8425
8426 // Currently we do not support arbitrary expressions as branch targets.
8427 // Only labels and absolute expressions are accepted.
8428 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8429 Error(Loc, "expected an absolute expression or a label");
8430 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8431 Error(Loc, "expected a 16-bit signed jump offset");
8432 }
8433
8434 return ParseStatus::Success;
8435}
8436
8437//===----------------------------------------------------------------------===//
8438// Boolean holding registers
8439//===----------------------------------------------------------------------===//
8440
8441ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8442 return parseReg(Operands);
8443}
8444
8445//===----------------------------------------------------------------------===//
8446// mubuf
8447//===----------------------------------------------------------------------===//
8448
8449void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8450 const OperandVector &Operands,
8451 bool IsAtomic) {
8452 OptionalImmIndexMap OptionalIdx;
8453 unsigned FirstOperandIdx = 1;
8454 bool IsAtomicReturn = false;
8455
8456 if (IsAtomic) {
8457 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
8459 }
8460
8461 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8462 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8463
8464 // Add the register arguments
8465 if (Op.isReg()) {
8466 Op.addRegOperands(Inst, 1);
8467 // Insert a tied src for atomic return dst.
8468 // This cannot be postponed as subsequent calls to
8469 // addImmOperands rely on correct number of MC operands.
8470 if (IsAtomicReturn && i == FirstOperandIdx)
8471 Op.addRegOperands(Inst, 1);
8472 continue;
8473 }
8474
8475 // Handle the case where soffset is an immediate
8476 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8477 Op.addImmOperands(Inst, 1);
8478 continue;
8479 }
8480
8481 // Handle tokens like 'offen' which are sometimes hard-coded into the
8482 // asm string. There are no MCInst operands for these.
8483 if (Op.isToken()) {
8484 continue;
8485 }
8486 assert(Op.isImm());
8487
8488 // Handle optional arguments
8489 OptionalIdx[Op.getImmTy()] = i;
8490 }
8491
8492 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8493 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8494}
8495
8496//===----------------------------------------------------------------------===//
8497// smrd
8498//===----------------------------------------------------------------------===//
8499
8500bool AMDGPUOperand::isSMRDOffset8() const {
8501 return isImmLiteral() && isUInt<8>(getImm());
8502}
8503
8504bool AMDGPUOperand::isSMEMOffset() const {
8505 // Offset range is checked later by validator.
8506 return isImmLiteral();
8507}
8508
8509bool AMDGPUOperand::isSMRDLiteralOffset() const {
8510 // 32-bit literals are only supported on CI and we only want to use them
8511 // when the offset is > 8-bits.
8512 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8513}
8514
8515//===----------------------------------------------------------------------===//
8516// vop3
8517//===----------------------------------------------------------------------===//
8518
8519static bool ConvertOmodMul(int64_t &Mul) {
8520 if (Mul != 1 && Mul != 2 && Mul != 4)
8521 return false;
8522
8523 Mul >>= 1;
8524 return true;
8525}
8526
8527static bool ConvertOmodDiv(int64_t &Div) {
8528 if (Div == 1) {
8529 Div = 0;
8530 return true;
8531 }
8532
8533 if (Div == 2) {
8534 Div = 3;
8535 return true;
8536 }
8537
8538 return false;
8539}
8540
8541// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8542// This is intentional and ensures compatibility with sp3.
8543// See bug 35397 for details.
8544bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8545 if (BoundCtrl == 0 || BoundCtrl == 1) {
8546 if (!isGFX11Plus())
8547 BoundCtrl = 1;
8548 return true;
8549 }
8550 return false;
8551}
8552
8553void AMDGPUAsmParser::onBeginOfFile() {
8554 if (!getParser().getStreamer().getTargetStreamer() ||
8555 getSTI().getTargetTriple().getArch() == Triple::r600)
8556 return;
8557
8558 if (!getTargetStreamer().getTargetID())
8559 getTargetStreamer().initializeTargetID(getSTI(),
8560 getSTI().getFeatureString());
8561
8562 if (isHsaAbi(getSTI()))
8563 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8564}
8565
8566/// Parse AMDGPU specific expressions.
8567///
8568/// expr ::= or(expr, ...) |
8569/// max(expr, ...)
8570///
8571bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8572 using AGVK = AMDGPUMCExpr::VariantKind;
8573
8574 if (isToken(AsmToken::Identifier)) {
8575 StringRef TokenId = getTokenStr();
8576 AGVK VK = StringSwitch<AGVK>(TokenId)
8577 .Case("max", AGVK::AGVK_Max)
8578 .Case("or", AGVK::AGVK_Or)
8579 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
8580 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
8581 .Case("alignto", AGVK::AGVK_AlignTo)
8582 .Case("occupancy", AGVK::AGVK_Occupancy)
8583 .Default(AGVK::AGVK_None);
8584
8585 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
8587 uint64_t CommaCount = 0;
8588 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
8589 lex(); // Eat '('
8590 while (true) {
8591 if (trySkipToken(AsmToken::RParen)) {
8592 if (Exprs.empty()) {
8593 Error(getToken().getLoc(),
8594 "empty " + Twine(TokenId) + " expression");
8595 return true;
8596 }
8597 if (CommaCount + 1 != Exprs.size()) {
8598 Error(getToken().getLoc(),
8599 "mismatch of commas in " + Twine(TokenId) + " expression");
8600 return true;
8601 }
8602 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
8603 return false;
8604 }
8605 const MCExpr *Expr;
8606 if (getParser().parseExpression(Expr, EndLoc))
8607 return true;
8608 Exprs.push_back(Expr);
8609 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
8610 if (LastTokenWasComma)
8611 CommaCount++;
8612 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
8613 Error(getToken().getLoc(),
8614 "unexpected token in " + Twine(TokenId) + " expression");
8615 return true;
8616 }
8617 }
8618 }
8619 }
8620 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
8621}
8622
8623ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8624 StringRef Name = getTokenStr();
8625 if (Name == "mul") {
8626 return parseIntWithPrefix("mul", Operands,
8627 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8628 }
8629
8630 if (Name == "div") {
8631 return parseIntWithPrefix("div", Operands,
8632 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8633 }
8634
8635 return ParseStatus::NoMatch;
8636}
8637
8638// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8639// the number of src operands present, then copies that bit into src0_modifiers.
8640static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
8641 int Opc = Inst.getOpcode();
8642 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8643 if (OpSelIdx == -1)
8644 return;
8645
8646 int SrcNum;
8647 const int Ops[] = { AMDGPU::OpName::src0,
8648 AMDGPU::OpName::src1,
8649 AMDGPU::OpName::src2 };
8650 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8651 ++SrcNum)
8652 ;
8653 assert(SrcNum > 0);
8654
8655 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8656
8657 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
8658 if (DstIdx == -1)
8659 return;
8660
8661 const MCOperand &DstOp = Inst.getOperand(DstIdx);
8662 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8663 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8664 if (DstOp.isReg() &&
8665 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
8667 ModVal |= SISrcMods::DST_OP_SEL;
8668 } else {
8669 if ((OpSel & (1 << SrcNum)) != 0)
8670 ModVal |= SISrcMods::DST_OP_SEL;
8671 }
8672 Inst.getOperand(ModIdx).setImm(ModVal);
8673}
8674
8675void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8676 const OperandVector &Operands) {
8677 cvtVOP3P(Inst, Operands);
8678 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8679}
8680
8681void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8682 OptionalImmIndexMap &OptionalIdx) {
8683 cvtVOP3P(Inst, Operands, OptionalIdx);
8684 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8685}
8686
8687static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8688 return
8689 // 1. This operand is input modifiers
8690 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8691 // 2. This is not last operand
8692 && Desc.NumOperands > (OpNum + 1)
8693 // 3. Next operand is register class
8694 && Desc.operands()[OpNum + 1].RegClass != -1
8695 // 4. Next register is not tied to any other operand
8696 && Desc.getOperandConstraint(OpNum + 1,
8697 MCOI::OperandConstraint::TIED_TO) == -1;
8698}
8699
8700void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8701{
8702 OptionalImmIndexMap OptionalIdx;
8703 unsigned Opc = Inst.getOpcode();
8704
8705 unsigned I = 1;
8706 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8707 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8708 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8709 }
8710
8711 for (unsigned E = Operands.size(); I != E; ++I) {
8712 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8714 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8715 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
8716 Op.isInterpAttrChan()) {
8717 Inst.addOperand(MCOperand::createImm(Op.getImm()));
8718 } else if (Op.isImmModifier()) {
8719 OptionalIdx[Op.getImmTy()] = I;
8720 } else {
8721 llvm_unreachable("unhandled operand type");
8722 }
8723 }
8724
8725 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
8726 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8727 AMDGPUOperand::ImmTyHigh);
8728
8729 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8730 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8731 AMDGPUOperand::ImmTyClamp);
8732
8733 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8734 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8735 AMDGPUOperand::ImmTyOModSI);
8736}
8737
8738void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8739{
8740 OptionalImmIndexMap OptionalIdx;
8741 unsigned Opc = Inst.getOpcode();
8742
8743 unsigned I = 1;
8744 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8745 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8746 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8747 }
8748
8749 for (unsigned E = Operands.size(); I != E; ++I) {
8750 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8752 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8753 } else if (Op.isImmModifier()) {
8754 OptionalIdx[Op.getImmTy()] = I;
8755 } else {
8756 llvm_unreachable("unhandled operand type");
8757 }
8758 }
8759
8760 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
8761
8762 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8763 if (OpSelIdx != -1)
8764 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8765
8766 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8767
8768 if (OpSelIdx == -1)
8769 return;
8770
8771 const int Ops[] = { AMDGPU::OpName::src0,
8772 AMDGPU::OpName::src1,
8773 AMDGPU::OpName::src2 };
8774 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8775 AMDGPU::OpName::src1_modifiers,
8776 AMDGPU::OpName::src2_modifiers };
8777
8778 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8779
8780 for (int J = 0; J < 3; ++J) {
8781 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8782 if (OpIdx == -1)
8783 break;
8784
8785 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8786 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8787
8788 if ((OpSel & (1 << J)) != 0)
8789 ModVal |= SISrcMods::OP_SEL_0;
8790 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8791 (OpSel & (1 << 3)) != 0)
8792 ModVal |= SISrcMods::DST_OP_SEL;
8793
8794 Inst.getOperand(ModIdx).setImm(ModVal);
8795 }
8796}
8797
8798void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8799 OptionalImmIndexMap &OptionalIdx) {
8800 unsigned Opc = Inst.getOpcode();
8801
8802 unsigned I = 1;
8803 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8804 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8805 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8806 }
8807
8808 for (unsigned E = Operands.size(); I != E; ++I) {
8809 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8811 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8812 } else if (Op.isImmModifier()) {
8813 OptionalIdx[Op.getImmTy()] = I;
8814 } else {
8815 Op.addRegOrImmOperands(Inst, 1);
8816 }
8817 }
8818
8819 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
8820 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
8821 Inst.addOperand(Inst.getOperand(0));
8822 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8823 AMDGPUOperand::ImmTyByteSel);
8824 }
8825
8826 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8827 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8828 AMDGPUOperand::ImmTyClamp);
8829
8830 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8831 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8832 AMDGPUOperand::ImmTyOModSI);
8833
8834 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8835 // it has src2 register operand that is tied to dst operand
8836 // we don't allow modifiers for this operand in assembler so src2_modifiers
8837 // should be 0.
8838 if (isMAC(Opc)) {
8839 auto *it = Inst.begin();
8840 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8841 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8842 ++it;
8843 // Copy the operand to ensure it's not invalidated when Inst grows.
8844 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8845 }
8846}
8847
8848void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8849 OptionalImmIndexMap OptionalIdx;
8850 cvtVOP3(Inst, Operands, OptionalIdx);
8851}
8852
8853void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8854 OptionalImmIndexMap &OptIdx) {
8855 const int Opc = Inst.getOpcode();
8856 const MCInstrDesc &Desc = MII.get(Opc);
8857
8858 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8859
8860 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
8861 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
8862 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8863 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8864 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
8865 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
8866 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8867 Inst.addOperand(Inst.getOperand(0));
8868 }
8869
8870 // Adding vdst_in operand is already covered for these DPP instructions in
8871 // cvtVOP3DPP.
8872 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
8873 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8874 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8875 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8876 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 ||
8877 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
8878 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
8879 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
8880 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
8881 Inst.addOperand(Inst.getOperand(0));
8882 }
8883
8884 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
8885 if (BitOp3Idx != -1) {
8886 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
8887 }
8888
8889 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8890 // instruction, and then figure out where to actually put the modifiers
8891
8892 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8893 if (OpSelIdx != -1) {
8894 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8895 }
8896
8897 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8898 if (OpSelHiIdx != -1) {
8899 int DefaultVal = IsPacked ? -1 : 0;
8900 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8901 DefaultVal);
8902 }
8903
8904 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8905 if (NegLoIdx != -1)
8906 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8907
8908 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8909 if (NegHiIdx != -1)
8910 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8911
8912 const int Ops[] = { AMDGPU::OpName::src0,
8913 AMDGPU::OpName::src1,
8914 AMDGPU::OpName::src2 };
8915 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8916 AMDGPU::OpName::src1_modifiers,
8917 AMDGPU::OpName::src2_modifiers };
8918
8919 unsigned OpSel = 0;
8920 unsigned OpSelHi = 0;
8921 unsigned NegLo = 0;
8922 unsigned NegHi = 0;
8923
8924 if (OpSelIdx != -1)
8925 OpSel = Inst.getOperand(OpSelIdx).getImm();
8926
8927 if (OpSelHiIdx != -1)
8928 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8929
8930 if (NegLoIdx != -1)
8931 NegLo = Inst.getOperand(NegLoIdx).getImm();
8932
8933 if (NegHiIdx != -1)
8934 NegHi = Inst.getOperand(NegHiIdx).getImm();
8935
8936 for (int J = 0; J < 3; ++J) {
8937 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8938 if (OpIdx == -1)
8939 break;
8940
8941 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8942
8943 if (ModIdx == -1)
8944 continue;
8945
8946 uint32_t ModVal = 0;
8947
8948 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
8949 if (SrcOp.isReg() && getMRI()
8950 ->getRegClass(AMDGPU::VGPR_16RegClassID)
8951 .contains(SrcOp.getReg())) {
8952 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
8953 if (VGPRSuffixIsHi)
8954 ModVal |= SISrcMods::OP_SEL_0;
8955 } else {
8956 if ((OpSel & (1 << J)) != 0)
8957 ModVal |= SISrcMods::OP_SEL_0;
8958 }
8959
8960 if ((OpSelHi & (1 << J)) != 0)
8961 ModVal |= SISrcMods::OP_SEL_1;
8962
8963 if ((NegLo & (1 << J)) != 0)
8964 ModVal |= SISrcMods::NEG;
8965
8966 if ((NegHi & (1 << J)) != 0)
8967 ModVal |= SISrcMods::NEG_HI;
8968
8969 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8970 }
8971}
8972
8973void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8974 OptionalImmIndexMap OptIdx;
8975 cvtVOP3(Inst, Operands, OptIdx);
8976 cvtVOP3P(Inst, Operands, OptIdx);
8977}
8978
8980 unsigned i, unsigned Opc, unsigned OpName) {
8981 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
8982 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8983 else
8984 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
8985}
8986
8987void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
8988 unsigned Opc = Inst.getOpcode();
8989
8990 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
8991 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
8992 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
8993 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
8994 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
8995
8996 OptionalImmIndexMap OptIdx;
8997 for (unsigned i = 5; i < Operands.size(); ++i) {
8998 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8999 OptIdx[Op.getImmTy()] = i;
9000 }
9001
9002 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9003 addOptionalImmOperand(Inst, Operands, OptIdx,
9004 AMDGPUOperand::ImmTyIndexKey8bit);
9005
9006 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9007 addOptionalImmOperand(Inst, Operands, OptIdx,
9008 AMDGPUOperand::ImmTyIndexKey16bit);
9009
9010 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9011 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9012
9013 cvtVOP3P(Inst, Operands, OptIdx);
9014}
9015
9016//===----------------------------------------------------------------------===//
9017// VOPD
9018//===----------------------------------------------------------------------===//
9019
9020ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9021 if (!hasVOPD(getSTI()))
9022 return ParseStatus::NoMatch;
9023
9024 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9025 SMLoc S = getLoc();
9026 lex();
9027 lex();
9028 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9029 SMLoc OpYLoc = getLoc();
9030 StringRef OpYName;
9031 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9032 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9033 return ParseStatus::Success;
9034 }
9035 return Error(OpYLoc, "expected a VOPDY instruction after ::");
9036 }
9037 return ParseStatus::NoMatch;
9038}
9039
9040// Create VOPD MCInst operands using parsed assembler operands.
9041void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9042 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9043 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9044 if (Op.isReg()) {
9045 Op.addRegOperands(Inst, 1);
9046 return;
9047 }
9048 if (Op.isImm()) {
9049 Op.addImmOperands(Inst, 1);
9050 return;
9051 }
9052 llvm_unreachable("Unhandled operand type in cvtVOPD");
9053 };
9054
9055 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
9056
9057 // MCInst operands are ordered as follows:
9058 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9059
9060 for (auto CompIdx : VOPD::COMPONENTS) {
9061 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9062 }
9063
9064 for (auto CompIdx : VOPD::COMPONENTS) {
9065 const auto &CInfo = InstInfo[CompIdx];
9066 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9067 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9068 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9069 if (CInfo.hasSrc2Acc())
9070 addOp(CInfo.getIndexOfDstInParsedOperands());
9071 }
9072}
9073
9074//===----------------------------------------------------------------------===//
9075// dpp
9076//===----------------------------------------------------------------------===//
9077
9078bool AMDGPUOperand::isDPP8() const {
9079 return isImmTy(ImmTyDPP8);
9080}
9081
9082bool AMDGPUOperand::isDPPCtrl() const {
9083 using namespace AMDGPU::DPP;
9084
9085 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
9086 if (result) {
9087 int64_t Imm = getImm();
9088 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9089 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9090 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9091 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9092 (Imm == DppCtrl::WAVE_SHL1) ||
9093 (Imm == DppCtrl::WAVE_ROL1) ||
9094 (Imm == DppCtrl::WAVE_SHR1) ||
9095 (Imm == DppCtrl::WAVE_ROR1) ||
9096 (Imm == DppCtrl::ROW_MIRROR) ||
9097 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9098 (Imm == DppCtrl::BCAST15) ||
9099 (Imm == DppCtrl::BCAST31) ||
9100 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9101 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9102 }
9103 return false;
9104}
9105
9106//===----------------------------------------------------------------------===//
9107// mAI
9108//===----------------------------------------------------------------------===//
9109
9110bool AMDGPUOperand::isBLGP() const {
9111 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
9112}
9113
9114bool AMDGPUOperand::isS16Imm() const {
9115 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
9116}
9117
9118bool AMDGPUOperand::isU16Imm() const {
9119 return isImmLiteral() && isUInt<16>(getImm());
9120}
9121
9122//===----------------------------------------------------------------------===//
9123// dim
9124//===----------------------------------------------------------------------===//
9125
9126bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9127 // We want to allow "dim:1D" etc.,
9128 // but the initial 1 is tokenized as an integer.
9129 std::string Token;
9130 if (isToken(AsmToken::Integer)) {
9131 SMLoc Loc = getToken().getEndLoc();
9132 Token = std::string(getTokenStr());
9133 lex();
9134 if (getLoc() != Loc)
9135 return false;
9136 }
9137
9138 StringRef Suffix;
9139 if (!parseId(Suffix))
9140 return false;
9141 Token += Suffix;
9142
9143 StringRef DimId = Token;
9144 if (DimId.starts_with("SQ_RSRC_IMG_"))
9145 DimId = DimId.drop_front(12);
9146
9148 if (!DimInfo)
9149 return false;
9150
9151 Encoding = DimInfo->Encoding;
9152 return true;
9153}
9154
9155ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9156 if (!isGFX10Plus())
9157 return ParseStatus::NoMatch;
9158
9159 SMLoc S = getLoc();
9160
9161 if (!trySkipId("dim", AsmToken::Colon))
9162 return ParseStatus::NoMatch;
9163
9164 unsigned Encoding;
9165 SMLoc Loc = getLoc();
9166 if (!parseDimId(Encoding))
9167 return Error(Loc, "invalid dim value");
9168
9169 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9170 AMDGPUOperand::ImmTyDim));
9171 return ParseStatus::Success;
9172}
9173
9174//===----------------------------------------------------------------------===//
9175// dpp
9176//===----------------------------------------------------------------------===//
9177
9178ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9179 SMLoc S = getLoc();
9180
9181 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9182 return ParseStatus::NoMatch;
9183
9184 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9185
9186 int64_t Sels[8];
9187
9188 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9189 return ParseStatus::Failure;
9190
9191 for (size_t i = 0; i < 8; ++i) {
9192 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9193 return ParseStatus::Failure;
9194
9195 SMLoc Loc = getLoc();
9196 if (getParser().parseAbsoluteExpression(Sels[i]))
9197 return ParseStatus::Failure;
9198 if (0 > Sels[i] || 7 < Sels[i])
9199 return Error(Loc, "expected a 3-bit value");
9200 }
9201
9202 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9203 return ParseStatus::Failure;
9204
9205 unsigned DPP8 = 0;
9206 for (size_t i = 0; i < 8; ++i)
9207 DPP8 |= (Sels[i] << (i * 3));
9208
9209 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9210 return ParseStatus::Success;
9211}
9212
9213bool
9214AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9215 const OperandVector &Operands) {
9216 if (Ctrl == "row_newbcast")
9217 return isGFX90A();
9218
9219 if (Ctrl == "row_share" ||
9220 Ctrl == "row_xmask")
9221 return isGFX10Plus();
9222
9223 if (Ctrl == "wave_shl" ||
9224 Ctrl == "wave_shr" ||
9225 Ctrl == "wave_rol" ||
9226 Ctrl == "wave_ror" ||
9227 Ctrl == "row_bcast")
9228 return isVI() || isGFX9();
9229
9230 return Ctrl == "row_mirror" ||
9231 Ctrl == "row_half_mirror" ||
9232 Ctrl == "quad_perm" ||
9233 Ctrl == "row_shl" ||
9234 Ctrl == "row_shr" ||
9235 Ctrl == "row_ror";
9236}
9237
9238int64_t
9239AMDGPUAsmParser::parseDPPCtrlPerm() {
9240 // quad_perm:[%d,%d,%d,%d]
9241
9242 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9243 return -1;
9244
9245 int64_t Val = 0;
9246 for (int i = 0; i < 4; ++i) {
9247 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9248 return -1;
9249
9250 int64_t Temp;
9251 SMLoc Loc = getLoc();
9252 if (getParser().parseAbsoluteExpression(Temp))
9253 return -1;
9254 if (Temp < 0 || Temp > 3) {
9255 Error(Loc, "expected a 2-bit value");
9256 return -1;
9257 }
9258
9259 Val += (Temp << i * 2);
9260 }
9261
9262 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9263 return -1;
9264
9265 return Val;
9266}
9267
9268int64_t
9269AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9270 using namespace AMDGPU::DPP;
9271
9272 // sel:%d
9273
9274 int64_t Val;
9275 SMLoc Loc = getLoc();
9276
9277 if (getParser().parseAbsoluteExpression(Val))
9278 return -1;
9279
9280 struct DppCtrlCheck {
9281 int64_t Ctrl;
9282 int Lo;
9283 int Hi;
9284 };
9285
9286 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9287 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9288 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9289 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9290 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9291 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9292 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9293 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9294 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9295 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9296 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9297 .Default({-1, 0, 0});
9298
9299 bool Valid;
9300 if (Check.Ctrl == -1) {
9301 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
9302 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9303 } else {
9304 Valid = Check.Lo <= Val && Val <= Check.Hi;
9305 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
9306 }
9307
9308 if (!Valid) {
9309 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
9310 return -1;
9311 }
9312
9313 return Val;
9314}
9315
9316ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
9317 using namespace AMDGPU::DPP;
9318
9319 if (!isToken(AsmToken::Identifier) ||
9320 !isSupportedDPPCtrl(getTokenStr(), Operands))
9321 return ParseStatus::NoMatch;
9322
9323 SMLoc S = getLoc();
9324 int64_t Val = -1;
9326
9327 parseId(Ctrl);
9328
9329 if (Ctrl == "row_mirror") {
9330 Val = DppCtrl::ROW_MIRROR;
9331 } else if (Ctrl == "row_half_mirror") {
9332 Val = DppCtrl::ROW_HALF_MIRROR;
9333 } else {
9334 if (skipToken(AsmToken::Colon, "expected a colon")) {
9335 if (Ctrl == "quad_perm") {
9336 Val = parseDPPCtrlPerm();
9337 } else {
9338 Val = parseDPPCtrlSel(Ctrl);
9339 }
9340 }
9341 }
9342
9343 if (Val == -1)
9344 return ParseStatus::Failure;
9345
9346 Operands.push_back(
9347 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9348 return ParseStatus::Success;
9349}
9350
9351void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
9352 bool IsDPP8) {
9353 OptionalImmIndexMap OptionalIdx;
9354 unsigned Opc = Inst.getOpcode();
9355 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9356
9357 // MAC instructions are special because they have 'old'
9358 // operand which is not tied to dst (but assumed to be).
9359 // They also have dummy unused src2_modifiers.
9360 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
9361 int Src2ModIdx =
9362 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
9363 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9364 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
9365
9366 unsigned I = 1;
9367 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9368 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9369 }
9370
9371 int Fi = 0;
9372 for (unsigned E = Operands.size(); I != E; ++I) {
9373
9374 if (IsMAC) {
9375 int NumOperands = Inst.getNumOperands();
9376 if (OldIdx == NumOperands) {
9377 // Handle old operand
9378 constexpr int DST_IDX = 0;
9379 Inst.addOperand(Inst.getOperand(DST_IDX));
9380 } else if (Src2ModIdx == NumOperands) {
9381 // Add unused dummy src2_modifiers
9383 }
9384 }
9385
9386 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9387 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
9388 Inst.addOperand(Inst.getOperand(0));
9389 }
9390
9391 bool IsVOP3CvtSrDpp =
9392 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9393 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9394 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9395 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9396 if (IsVOP3CvtSrDpp) {
9397 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
9400 }
9401 }
9402
9403 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9405 if (TiedTo != -1) {
9406 assert((unsigned)TiedTo < Inst.getNumOperands());
9407 // handle tied old or src2 for MAC instructions
9408 Inst.addOperand(Inst.getOperand(TiedTo));
9409 }
9410 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9411 // Add the register arguments
9412 if (IsDPP8 && Op.isDppFI()) {
9413 Fi = Op.getImm();
9414 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9415 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9416 } else if (Op.isReg()) {
9417 Op.addRegOperands(Inst, 1);
9418 } else if (Op.isImm() &&
9419 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
9420 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
9421 Op.addImmOperands(Inst, 1);
9422 } else if (Op.isImm()) {
9423 OptionalIdx[Op.getImmTy()] = I;
9424 } else {
9425 llvm_unreachable("unhandled operand type");
9426 }
9427 }
9428
9429 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel))
9430 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9431 AMDGPUOperand::ImmTyByteSel);
9432
9433 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9434 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9435 AMDGPUOperand::ImmTyClamp);
9436
9437 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9438 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
9439
9440 if (Desc.TSFlags & SIInstrFlags::VOP3P)
9441 cvtVOP3P(Inst, Operands, OptionalIdx);
9442 else if (Desc.TSFlags & SIInstrFlags::VOP3)
9443 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
9444 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9445 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9446 }
9447
9448 if (IsDPP8) {
9449 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
9450 using namespace llvm::AMDGPU::DPP;
9451 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9452 } else {
9453 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
9454 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9455 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9456 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9457
9458 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
9459 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9460 AMDGPUOperand::ImmTyDppFI);
9461 }
9462}
9463
9464void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
9465 OptionalImmIndexMap OptionalIdx;
9466
9467 unsigned I = 1;
9468 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9469 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9470 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9471 }
9472
9473 int Fi = 0;
9474 for (unsigned E = Operands.size(); I != E; ++I) {
9475 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9477 if (TiedTo != -1) {
9478 assert((unsigned)TiedTo < Inst.getNumOperands());
9479 // handle tied old or src2 for MAC instructions
9480 Inst.addOperand(Inst.getOperand(TiedTo));
9481 }
9482 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9483 // Add the register arguments
9484 if (Op.isReg() && validateVccOperand(Op.getReg())) {
9485 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
9486 // Skip it.
9487 continue;
9488 }
9489
9490 if (IsDPP8) {
9491 if (Op.isDPP8()) {
9492 Op.addImmOperands(Inst, 1);
9493 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9494 Op.addRegWithFPInputModsOperands(Inst, 2);
9495 } else if (Op.isDppFI()) {
9496 Fi = Op.getImm();
9497 } else if (Op.isReg()) {
9498 Op.addRegOperands(Inst, 1);
9499 } else {
9500 llvm_unreachable("Invalid operand type");
9501 }
9502 } else {
9504 Op.addRegWithFPInputModsOperands(Inst, 2);
9505 } else if (Op.isReg()) {
9506 Op.addRegOperands(Inst, 1);
9507 } else if (Op.isDPPCtrl()) {
9508 Op.addImmOperands(Inst, 1);
9509 } else if (Op.isImm()) {
9510 // Handle optional arguments
9511 OptionalIdx[Op.getImmTy()] = I;
9512 } else {
9513 llvm_unreachable("Invalid operand type");
9514 }
9515 }
9516 }
9517
9518 if (IsDPP8) {
9519 using namespace llvm::AMDGPU::DPP;
9520 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9521 } else {
9522 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9523 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9524 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9525 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
9526 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9527 AMDGPUOperand::ImmTyDppFI);
9528 }
9529 }
9530}
9531
9532//===----------------------------------------------------------------------===//
9533// sdwa
9534//===----------------------------------------------------------------------===//
9535
9536ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9537 StringRef Prefix,
9538 AMDGPUOperand::ImmTy Type) {
9539 return parseStringOrIntWithPrefix(
9540 Operands, Prefix,
9541 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
9542 Type);
9543}
9544
9545ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9546 return parseStringOrIntWithPrefix(
9547 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
9548 AMDGPUOperand::ImmTySDWADstUnused);
9549}
9550
9551void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9552 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
9553}
9554
9555void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9556 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
9557}
9558
9559void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9560 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
9561}
9562
9563void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9564 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
9565}
9566
9567void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9568 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9569}
9570
9571void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9572 uint64_t BasicInstType,
9573 bool SkipDstVcc,
9574 bool SkipSrcVcc) {
9575 using namespace llvm::AMDGPU::SDWA;
9576
9577 OptionalImmIndexMap OptionalIdx;
9578 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9579 bool SkippedVcc = false;
9580
9581 unsigned I = 1;
9582 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9583 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9584 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9585 }
9586
9587 for (unsigned E = Operands.size(); I != E; ++I) {
9588 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9589 if (SkipVcc && !SkippedVcc && Op.isReg() &&
9590 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9591 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9592 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9593 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9594 // Skip VCC only if we didn't skip it on previous iteration.
9595 // Note that src0 and src1 occupy 2 slots each because of modifiers.
9596 if (BasicInstType == SIInstrFlags::VOP2 &&
9597 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9598 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9599 SkippedVcc = true;
9600 continue;
9601 }
9602 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
9603 SkippedVcc = true;
9604 continue;
9605 }
9606 }
9608 Op.addRegOrImmWithInputModsOperands(Inst, 2);
9609 } else if (Op.isImm()) {
9610 // Handle optional arguments
9611 OptionalIdx[Op.getImmTy()] = I;
9612 } else {
9613 llvm_unreachable("Invalid operand type");
9614 }
9615 SkippedVcc = false;
9616 }
9617
9618 const unsigned Opc = Inst.getOpcode();
9619 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9620 Opc != AMDGPU::V_NOP_sdwa_vi) {
9621 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9622 switch (BasicInstType) {
9623 case SIInstrFlags::VOP1:
9624 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9625 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9626 AMDGPUOperand::ImmTyClamp, 0);
9627
9628 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9629 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9630 AMDGPUOperand::ImmTyOModSI, 0);
9631
9632 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
9633 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9634 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9635
9636 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
9637 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9638 AMDGPUOperand::ImmTySDWADstUnused,
9639 DstUnused::UNUSED_PRESERVE);
9640
9641 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9642 break;
9643
9644 case SIInstrFlags::VOP2:
9645 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9646 AMDGPUOperand::ImmTyClamp, 0);
9647
9648 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
9649 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9650
9651 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9652 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
9653 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9654 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9655 break;
9656
9657 case SIInstrFlags::VOPC:
9658 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
9659 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9660 AMDGPUOperand::ImmTyClamp, 0);
9661 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9662 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9663 break;
9664
9665 default:
9666 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9667 }
9668 }
9669
9670 // special case v_mac_{f16, f32}:
9671 // it has src2 register operand that is tied to dst operand
9672 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9673 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9674 auto *it = Inst.begin();
9675 std::advance(
9676 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9677 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9678 }
9679}
9680
9681/// Force static initialization.
9685}
9686
9687#define GET_REGISTER_MATCHER
9688#define GET_MATCHER_IMPLEMENTATION
9689#define GET_MNEMONIC_SPELL_CHECKER
9690#define GET_MNEMONIC_CHECKER
9691#include "AMDGPUGenAsmMatcher.inc"
9692
9693ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9694 unsigned MCK) {
9695 switch (MCK) {
9696 case MCK_addr64:
9697 return parseTokenOp("addr64", Operands);
9698 case MCK_done:
9699 return parseTokenOp("done", Operands);
9700 case MCK_idxen:
9701 return parseTokenOp("idxen", Operands);
9702 case MCK_lds:
9703 return parseTokenOp("lds", Operands);
9704 case MCK_offen:
9705 return parseTokenOp("offen", Operands);
9706 case MCK_off:
9707 return parseTokenOp("off", Operands);
9708 case MCK_row_95_en:
9709 return parseTokenOp("row_en", Operands);
9710 case MCK_gds:
9711 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
9712 case MCK_tfe:
9713 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
9714 }
9715 return tryCustomParseOperand(Operands, MCK);
9716}
9717
9718// This function should be defined after auto-generated include so that we have
9719// MatchClassKind enum defined
9720unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9721 unsigned Kind) {
9722 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9723 // But MatchInstructionImpl() expects to meet token and fails to validate
9724 // operand. This method checks if we are given immediate operand but expect to
9725 // get corresponding token.
9726 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9727 switch (Kind) {
9728 case MCK_addr64:
9729 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9730 case MCK_gds:
9731 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9732 case MCK_lds:
9733 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9734 case MCK_idxen:
9735 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9736 case MCK_offen:
9737 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9738 case MCK_tfe:
9739 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9740 case MCK_SSrc_b32:
9741 // When operands have expression values, they will return true for isToken,
9742 // because it is not possible to distinguish between a token and an
9743 // expression at parse time. MatchInstructionImpl() will always try to
9744 // match an operand as a token, when isToken returns true, and when the
9745 // name of the expression is not a valid token, the match will fail,
9746 // so we need to handle it here.
9747 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9748 case MCK_SSrc_f32:
9749 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9750 case MCK_SOPPBrTarget:
9751 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9752 case MCK_VReg32OrOff:
9753 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9754 case MCK_InterpSlot:
9755 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9756 case MCK_InterpAttr:
9757 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9758 case MCK_InterpAttrChan:
9759 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9760 case MCK_SReg_64:
9761 case MCK_SReg_64_XEXEC:
9762 // Null is defined as a 32-bit register but
9763 // it should also be enabled with 64-bit operands.
9764 // The following code enables it for SReg_64 operands
9765 // used as source and destination. Remaining source
9766 // operands are handled in isInlinableImm.
9767 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9768 default:
9769 return Match_InvalidOperand;
9770 }
9771}
9772
9773//===----------------------------------------------------------------------===//
9774// endpgm
9775//===----------------------------------------------------------------------===//
9776
9777ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9778 SMLoc S = getLoc();
9779 int64_t Imm = 0;
9780
9781 if (!parseExpr(Imm)) {
9782 // The operand is optional, if not present default to 0
9783 Imm = 0;
9784 }
9785
9786 if (!isUInt<16>(Imm))
9787 return Error(S, "expected a 16-bit value");
9788
9789 Operands.push_back(
9790 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9791 return ParseStatus::Success;
9792}
9793
9794bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9795
9796//===----------------------------------------------------------------------===//
9797// Split Barrier
9798//===----------------------------------------------------------------------===//
9799
9800bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, unsigned OpName)
static bool IsRevOpcode(const unsigned Opcode)
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_READNONE
Definition: Compiler.h:299
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:128
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
@ Default
Definition: DwarfDebug.cpp:87
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
Definition: InlineInfo.cpp:179
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
unsigned unsigned DefaultVal
raw_pwrite_stream & OS
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file implements the SmallBitVector class.
static bool Enabled
Definition: Statistic.cpp:46
StringSet - A set-like wrapper for the StringMap.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
BinaryOperator * Mul
support::ulittle16_t & Lo
Definition: aarch32.cpp:204
support::ulittle16_t & Hi
Definition: aarch32.cpp:203
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
Definition: AMDGPUMCExpr.h:69
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
Definition: AMDGPUMCExpr.h:83
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5465
Class for arbitrary precision integers.
Definition: APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:157
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmMacro.h:110
bool is(TokenKind K) const
Definition: MCAsmMacro.h:82
TokenKind getKind() const
Definition: MCAsmMacro.h:81
This class represents an Operation in the Expression.
Register getReg() const
Base class for user error types.
Definition: Error.h:355
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:481
Class representing an expression and its matching format.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:123
virtual MCStreamer & getStreamer()=0
Return the output streamer for the assembler.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:537
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:547
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:622
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:222
Context object for machine code objects.
Definition: MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:414
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:212
const MCSubtargetInfo * getSubtargetInfo() const
Definition: MCContext.h:418
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:185
unsigned getNumOperands() const
Definition: MCInst.h:209
SMLoc getLoc() const
Definition: MCInst.h:205
void setLoc(SMLoc loc)
Definition: MCInst.h:204
unsigned getOpcode() const
Definition: MCInst.h:199
iterator insert(iterator I, const MCOperand &Op)
Definition: MCInst.h:225
void addOperand(const MCOperand Op)
Definition: MCInst.h:211
iterator begin()
Definition: MCInst.h:220
size_t size() const
Definition: MCInst.h:219
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:207
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:444
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:37
void setImm(int64_t Val)
Definition: MCInst.h:86
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:163
int64_t getImm() const
Definition: MCInst.h:81
static MCOperand createReg(MCRegister Reg)
Definition: MCInst.h:135
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:142
bool isImm() const
Definition: MCInst.h:63
void setReg(MCRegister Reg)
Set the register number.
Definition: MCInst.h:76
bool isReg() const
Definition: MCInst.h:62
MCRegister getReg() const
Returns the register number.
Definition: MCInst.h:70
bool isExpr() const
Definition: MCInst.h:66
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual bool isReg() const =0
isReg - Is this a register operand?
virtual bool isMem() const =0
isMem - Is this a memory operand?
virtual MCRegister getReg() const =0
virtual bool isToken() const =0
isToken - Is this a token operand?
virtual bool isImm() const =0
isImm - Is this an immediate operand?
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
constexpr bool isValid() const
Definition: MCRegister.h:81
Streaming machine code generation interface.
Definition: MCStreamer.h:213
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Definition: MCStreamer.h:309
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
void setVariableValue(const MCExpr *Value)
Definition: MCSymbol.cpp:47
MCTargetAsmParser - Generic interface to target specific assembly parsers.
virtual bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
Parse one assembly instruction.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
virtual bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
Recognize a series of operands of a parsed instruction as an actual MCInst and emit it to the specifi...
const MCInstrInfo & MII
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn't/can't handle effec...
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
Target specific streamer interface.
Definition: MCStreamer.h:94
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Represents a location in source code.
Definition: SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
constexpr const char * getPointer() const
Definition: SMLoc.h:34
constexpr bool isValid() const
Definition: SMLoc.h:29
Represents a range in source code.
Definition: SMLoc.h:48
SMLoc Start
Definition: SMLoc.h:50
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:298
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
Register getReg() const
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:853
StringMapEntry - This is used to represent one value that is inserted into a StringMap.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition: StringRef.h:655
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:147
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:609
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:144
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:277
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
bool contains(StringRef key) const
Check if the set contains the given key.
Definition: StringSet.h:55
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:213
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
Key
PAL metadata keys.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool isGFX9(const MCSubtargetInfo &STI)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition: SIDefines.h:201
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:211
@ OPERAND_REG_INLINE_C_V2INT32
Definition: SIDefines.h:227
@ OPERAND_REG_INLINE_C_FP64
Definition: SIDefines.h:223
@ OPERAND_REG_INLINE_C_BF16
Definition: SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2BF16
Definition: SIDefines.h:225
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:212
@ OPERAND_REG_IMM_BF16
Definition: SIDefines.h:205
@ OPERAND_REG_INLINE_AC_V2FP16
Definition: SIDefines.h:246
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
Definition: SIDefines.h:200
@ OPERAND_REG_IMM_V2BF16
Definition: SIDefines.h:210
@ OPERAND_REG_IMM_BF16_DEFERRED
Definition: SIDefines.h:207
@ OPERAND_REG_IMM_FP16
Definition: SIDefines.h:206
@ OPERAND_REG_INLINE_C_INT64
Definition: SIDefines.h:219
@ OPERAND_REG_INLINE_AC_BF16
Definition: SIDefines.h:240
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition: SIDefines.h:217
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
Definition: SIDefines.h:238
@ OPERAND_REG_IMM_FP64
Definition: SIDefines.h:204
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:226
@ OPERAND_REG_INLINE_AC_V2INT16
Definition: SIDefines.h:244
@ OPERAND_REG_INLINE_AC_FP16
Definition: SIDefines.h:241
@ OPERAND_REG_INLINE_AC_INT32
Definition: SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition: SIDefines.h:242
@ OPERAND_REG_INLINE_AC_V2BF16
Definition: SIDefines.h:245
@ OPERAND_REG_IMM_V2INT32
Definition: SIDefines.h:213
@ OPERAND_REG_IMM_FP32
Definition: SIDefines.h:203
@ OPERAND_INPUT_MODS
Definition: SIDefines.h:251
@ OPERAND_REG_INLINE_C_FP32
Definition: SIDefines.h:222
@ OPERAND_REG_INLINE_C_INT32
Definition: SIDefines.h:218
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:224
@ OPERAND_REG_IMM_V2FP32
Definition: SIDefines.h:214
@ OPERAND_REG_INLINE_AC_FP64
Definition: SIDefines.h:243
@ OPERAND_REG_INLINE_C_FP16
Definition: SIDefines.h:221
@ OPERAND_REG_IMM_INT16
Definition: SIDefines.h:202
@ OPERAND_REG_INLINE_C_V2FP32
Definition: SIDefines.h:228
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition: SIDefines.h:231
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1370
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
const uint64_t Version
Definition: CodeGenData.h:286
@ OPERAND_IMMEDIATE
Definition: MCInstrDesc.h:60
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Reg
All possible values of the reg field in the ModR/M byte.
Format
The format used for serializing/deserializing remarks.
Definition: RemarkFormat.h:25
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition: Error.h:1099
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition: Alignment.h:217
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:255
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
void PrintError(const Twine &Msg)
Definition: Error.cpp:104
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
Target & getTheR600Target()
The target for R600 GPUs.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:154
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:193
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:159
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:403
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:260
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:303
unsigned M0(unsigned Val)
Definition: VE.h:375
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
const char * toString(DWARFSectionKind Kind)
#define N
RegisterKind Kind
StringLiteral Name
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
Definition: TargetParser.h:130
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
Represents the counter values to wait for in an s_waitcnt instruction.
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:265
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:263
static const fltSemantics & BFloat() LLVM_READNONE
Definition: APFloat.cpp:264
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:313
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
Direction
An enum for the direction of the loop.
Definition: LoopInfo.h:215
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...