LLVM 20.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
15#include "SIDefines.h"
16#include "SIInstrInfo.h"
17#include "SIRegisterInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
37#include "llvm/MC/MCSymbol.h"
44#include <optional>
45
46using namespace llvm;
47using namespace llvm::AMDGPU;
48using namespace llvm::amdhsa;
49
50namespace {
51
52class AMDGPUAsmParser;
53
54enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
55
56//===----------------------------------------------------------------------===//
57// Operand
58//===----------------------------------------------------------------------===//
59
60class AMDGPUOperand : public MCParsedAsmOperand {
61 enum KindTy {
62 Token,
63 Immediate,
66 } Kind;
67
68 SMLoc StartLoc, EndLoc;
69 const AMDGPUAsmParser *AsmParser;
70
71public:
72 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
73 : Kind(Kind_), AsmParser(AsmParser_) {}
74
75 using Ptr = std::unique_ptr<AMDGPUOperand>;
76
77 struct Modifiers {
78 bool Abs = false;
79 bool Neg = false;
80 bool Sext = false;
81 bool Lit = false;
82
83 bool hasFPModifiers() const { return Abs || Neg; }
84 bool hasIntModifiers() const { return Sext; }
85 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
86
87 int64_t getFPModifiersOperand() const {
88 int64_t Operand = 0;
89 Operand |= Abs ? SISrcMods::ABS : 0u;
90 Operand |= Neg ? SISrcMods::NEG : 0u;
91 return Operand;
92 }
93
94 int64_t getIntModifiersOperand() const {
95 int64_t Operand = 0;
96 Operand |= Sext ? SISrcMods::SEXT : 0u;
97 return Operand;
98 }
99
100 int64_t getModifiersOperand() const {
101 assert(!(hasFPModifiers() && hasIntModifiers())
102 && "fp and int modifiers should not be used simultaneously");
103 if (hasFPModifiers())
104 return getFPModifiersOperand();
105 if (hasIntModifiers())
106 return getIntModifiersOperand();
107 return 0;
108 }
109
110 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
111 };
112
113 enum ImmTy {
114 ImmTyNone,
115 ImmTyGDS,
116 ImmTyLDS,
117 ImmTyOffen,
118 ImmTyIdxen,
119 ImmTyAddr64,
120 ImmTyOffset,
121 ImmTyInstOffset,
122 ImmTyOffset0,
123 ImmTyOffset1,
124 ImmTySMEMOffsetMod,
125 ImmTyCPol,
126 ImmTyTFE,
127 ImmTyD16,
128 ImmTyClamp,
129 ImmTyOModSI,
130 ImmTySDWADstSel,
131 ImmTySDWASrc0Sel,
132 ImmTySDWASrc1Sel,
133 ImmTySDWADstUnused,
134 ImmTyDMask,
135 ImmTyDim,
136 ImmTyUNorm,
137 ImmTyDA,
138 ImmTyR128A16,
139 ImmTyA16,
140 ImmTyLWE,
141 ImmTyExpTgt,
142 ImmTyExpCompr,
143 ImmTyExpVM,
144 ImmTyFORMAT,
145 ImmTyHwreg,
146 ImmTyOff,
147 ImmTySendMsg,
148 ImmTyInterpSlot,
149 ImmTyInterpAttr,
150 ImmTyInterpAttrChan,
151 ImmTyOpSel,
152 ImmTyOpSelHi,
153 ImmTyNegLo,
154 ImmTyNegHi,
155 ImmTyIndexKey8bit,
156 ImmTyIndexKey16bit,
157 ImmTyDPP8,
158 ImmTyDppCtrl,
159 ImmTyDppRowMask,
160 ImmTyDppBankMask,
161 ImmTyDppBoundCtrl,
162 ImmTyDppFI,
163 ImmTySwizzle,
164 ImmTyGprIdxMode,
165 ImmTyHigh,
166 ImmTyBLGP,
167 ImmTyCBSZ,
168 ImmTyABID,
169 ImmTyEndpgm,
170 ImmTyWaitVDST,
171 ImmTyWaitEXP,
172 ImmTyWaitVAVDst,
173 ImmTyWaitVMVSrc,
174 ImmTyByteSel,
175 };
176
177 // Immediate operand kind.
178 // It helps to identify the location of an offending operand after an error.
179 // Note that regular literals and mandatory literals (KImm) must be handled
180 // differently. When looking for an offending operand, we should usually
181 // ignore mandatory literals because they are part of the instruction and
182 // cannot be changed. Report location of mandatory operands only for VOPD,
183 // when both OpX and OpY have a KImm and there are no other literals.
184 enum ImmKindTy {
185 ImmKindTyNone,
186 ImmKindTyLiteral,
187 ImmKindTyMandatoryLiteral,
188 ImmKindTyConst,
189 };
190
191private:
192 struct TokOp {
193 const char *Data;
194 unsigned Length;
195 };
196
197 struct ImmOp {
198 int64_t Val;
199 ImmTy Type;
200 bool IsFPImm;
201 mutable ImmKindTy Kind;
202 Modifiers Mods;
203 };
204
205 struct RegOp {
206 unsigned RegNo;
207 Modifiers Mods;
208 };
209
210 union {
211 TokOp Tok;
212 ImmOp Imm;
213 RegOp Reg;
214 const MCExpr *Expr;
215 };
216
217public:
218 bool isToken() const override { return Kind == Token; }
219
220 bool isSymbolRefExpr() const {
221 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
222 }
223
224 bool isImm() const override {
225 return Kind == Immediate;
226 }
227
228 void setImmKindNone() const {
229 assert(isImm());
230 Imm.Kind = ImmKindTyNone;
231 }
232
233 void setImmKindLiteral() const {
234 assert(isImm());
235 Imm.Kind = ImmKindTyLiteral;
236 }
237
238 void setImmKindMandatoryLiteral() const {
239 assert(isImm());
240 Imm.Kind = ImmKindTyMandatoryLiteral;
241 }
242
243 void setImmKindConst() const {
244 assert(isImm());
245 Imm.Kind = ImmKindTyConst;
246 }
247
248 bool IsImmKindLiteral() const {
249 return isImm() && Imm.Kind == ImmKindTyLiteral;
250 }
251
252 bool IsImmKindMandatoryLiteral() const {
253 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
254 }
255
256 bool isImmKindConst() const {
257 return isImm() && Imm.Kind == ImmKindTyConst;
258 }
259
260 bool isInlinableImm(MVT type) const;
261 bool isLiteralImm(MVT type) const;
262
263 bool isRegKind() const {
264 return Kind == Register;
265 }
266
267 bool isReg() const override {
268 return isRegKind() && !hasModifiers();
269 }
270
271 bool isRegOrInline(unsigned RCID, MVT type) const {
272 return isRegClass(RCID) || isInlinableImm(type);
273 }
274
275 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
276 return isRegOrInline(RCID, type) || isLiteralImm(type);
277 }
278
279 bool isRegOrImmWithInt16InputMods() const {
280 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
281 }
282
283 bool isRegOrImmWithIntT16InputMods() const {
284 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
285 }
286
287 bool isRegOrImmWithInt32InputMods() const {
288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
289 }
290
291 bool isRegOrInlineImmWithInt16InputMods() const {
292 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
293 }
294
295 bool isRegOrInlineImmWithInt32InputMods() const {
296 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
297 }
298
299 bool isRegOrImmWithInt64InputMods() const {
300 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
301 }
302
303 bool isRegOrImmWithFP16InputMods() const {
304 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
305 }
306
307 bool isRegOrImmWithFPT16InputMods() const {
308 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
309 }
310
311 bool isRegOrImmWithFP32InputMods() const {
312 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
313 }
314
315 bool isRegOrImmWithFP64InputMods() const {
316 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
317 }
318
319 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
320 return isRegOrInline(
321 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
322 }
323
324 bool isRegOrInlineImmWithFP32InputMods() const {
325 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
326 }
327
328 bool isPackedFP16InputMods() const {
329 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
330 }
331
332 bool isVReg() const {
333 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
334 isRegClass(AMDGPU::VReg_64RegClassID) ||
335 isRegClass(AMDGPU::VReg_96RegClassID) ||
336 isRegClass(AMDGPU::VReg_128RegClassID) ||
337 isRegClass(AMDGPU::VReg_160RegClassID) ||
338 isRegClass(AMDGPU::VReg_192RegClassID) ||
339 isRegClass(AMDGPU::VReg_256RegClassID) ||
340 isRegClass(AMDGPU::VReg_512RegClassID) ||
341 isRegClass(AMDGPU::VReg_1024RegClassID);
342 }
343
344 bool isVReg32() const {
345 return isRegClass(AMDGPU::VGPR_32RegClassID);
346 }
347
348 bool isVReg32OrOff() const {
349 return isOff() || isVReg32();
350 }
351
352 bool isNull() const {
353 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
354 }
355
356 bool isVRegWithInputMods() const;
357 template <bool IsFake16> bool isT16VRegWithInputMods() const;
358
359 bool isSDWAOperand(MVT type) const;
360 bool isSDWAFP16Operand() const;
361 bool isSDWAFP32Operand() const;
362 bool isSDWAInt16Operand() const;
363 bool isSDWAInt32Operand() const;
364
365 bool isImmTy(ImmTy ImmT) const {
366 return isImm() && Imm.Type == ImmT;
367 }
368
369 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
370
371 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
372
373 bool isImmModifier() const {
374 return isImm() && Imm.Type != ImmTyNone;
375 }
376
377 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
378 bool isDim() const { return isImmTy(ImmTyDim); }
379 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
380 bool isOff() const { return isImmTy(ImmTyOff); }
381 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
382 bool isOffen() const { return isImmTy(ImmTyOffen); }
383 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
384 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
385 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
386 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
387 bool isGDS() const { return isImmTy(ImmTyGDS); }
388 bool isLDS() const { return isImmTy(ImmTyLDS); }
389 bool isCPol() const { return isImmTy(ImmTyCPol); }
390 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
391 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
392 bool isTFE() const { return isImmTy(ImmTyTFE); }
393 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
394 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
395 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
396 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
397 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
398 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
399 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
400 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
401 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
402 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
403 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
404 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
405 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
406
407 bool isRegOrImm() const {
408 return isReg() || isImm();
409 }
410
411 bool isRegClass(unsigned RCID) const;
412
413 bool isInlineValue() const;
414
415 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
416 return isRegOrInline(RCID, type) && !hasModifiers();
417 }
418
419 bool isSCSrcB16() const {
420 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
421 }
422
423 bool isSCSrcV2B16() const {
424 return isSCSrcB16();
425 }
426
427 bool isSCSrc_b32() const {
428 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
429 }
430
431 bool isSCSrc_b64() const {
432 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
433 }
434
435 bool isBoolReg() const;
436
437 bool isSCSrcF16() const {
438 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
439 }
440
441 bool isSCSrcV2F16() const {
442 return isSCSrcF16();
443 }
444
445 bool isSCSrcF32() const {
446 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
447 }
448
449 bool isSCSrcF64() const {
450 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
451 }
452
453 bool isSSrc_b32() const {
454 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
455 }
456
457 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
458
459 bool isSSrcV2B16() const {
460 llvm_unreachable("cannot happen");
461 return isSSrc_b16();
462 }
463
464 bool isSSrc_b64() const {
465 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
466 // See isVSrc64().
467 return isSCSrc_b64() || isLiteralImm(MVT::i64);
468 }
469
470 bool isSSrc_f32() const {
471 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
472 }
473
474 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
475
476 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
477
478 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
479
480 bool isSSrcV2F16() const {
481 llvm_unreachable("cannot happen");
482 return isSSrc_f16();
483 }
484
485 bool isSSrcV2FP32() const {
486 llvm_unreachable("cannot happen");
487 return isSSrc_f32();
488 }
489
490 bool isSCSrcV2FP32() const {
491 llvm_unreachable("cannot happen");
492 return isSCSrcF32();
493 }
494
495 bool isSSrcV2INT32() const {
496 llvm_unreachable("cannot happen");
497 return isSSrc_b32();
498 }
499
500 bool isSCSrcV2INT32() const {
501 llvm_unreachable("cannot happen");
502 return isSCSrc_b32();
503 }
504
505 bool isSSrcOrLds_b32() const {
506 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
507 isLiteralImm(MVT::i32) || isExpr();
508 }
509
510 bool isVCSrc_b32() const {
511 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
512 }
513
514 bool isVCSrcB64() const {
515 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
516 }
517
518 bool isVCSrcTB16() const {
519 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
520 }
521
522 bool isVCSrcTB16_Lo128() const {
523 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
524 }
525
526 bool isVCSrcFake16B16_Lo128() const {
527 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
528 }
529
530 bool isVCSrc_b16() const {
531 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
532 }
533
534 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
535
536 bool isVCSrc_f32() const {
537 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
538 }
539
540 bool isVCSrcF64() const {
541 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
542 }
543
544 bool isVCSrcTBF16() const {
545 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
546 }
547
548 bool isVCSrcTF16() const {
549 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
550 }
551
552 bool isVCSrcTBF16_Lo128() const {
553 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
554 }
555
556 bool isVCSrcTF16_Lo128() const {
557 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
558 }
559
560 bool isVCSrcFake16BF16_Lo128() const {
561 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
562 }
563
564 bool isVCSrcFake16F16_Lo128() const {
565 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
566 }
567
568 bool isVCSrc_bf16() const {
569 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
570 }
571
572 bool isVCSrc_f16() const {
573 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
574 }
575
576 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
577
578 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
579
580 bool isVSrc_b32() const {
581 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
582 }
583
584 bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
585
586 bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
587
588 bool isVSrcT_b16_Lo128() const {
589 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
590 }
591
592 bool isVSrcFake16_b16_Lo128() const {
593 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
594 }
595
596 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
597
598 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
599
600 bool isVCSrcV2FP32() const {
601 return isVCSrcF64();
602 }
603
604 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
605
606 bool isVCSrcV2INT32() const {
607 return isVCSrcB64();
608 }
609
610 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
611
612 bool isVSrc_f32() const {
613 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
614 }
615
616 bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
617
618 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
619
620 bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
621
622 bool isVSrcT_bf16_Lo128() const {
623 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
624 }
625
626 bool isVSrcT_f16_Lo128() const {
627 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
628 }
629
630 bool isVSrcFake16_bf16_Lo128() const {
631 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
632 }
633
634 bool isVSrcFake16_f16_Lo128() const {
635 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
636 }
637
638 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
639
640 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
641
642 bool isVSrc_v2bf16() const {
643 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
644 }
645
646 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
647
648 bool isVISrcB32() const {
649 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
650 }
651
652 bool isVISrcB16() const {
653 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
654 }
655
656 bool isVISrcV2B16() const {
657 return isVISrcB16();
658 }
659
660 bool isVISrcF32() const {
661 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
662 }
663
664 bool isVISrcF16() const {
665 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
666 }
667
668 bool isVISrcV2F16() const {
669 return isVISrcF16() || isVISrcB32();
670 }
671
672 bool isVISrc_64_bf16() const {
673 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
674 }
675
676 bool isVISrc_64_f16() const {
677 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
678 }
679
680 bool isVISrc_64_b32() const {
681 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
682 }
683
684 bool isVISrc_64B64() const {
685 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
686 }
687
688 bool isVISrc_64_f64() const {
689 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
690 }
691
692 bool isVISrc_64V2FP32() const {
693 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
694 }
695
696 bool isVISrc_64V2INT32() const {
697 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
698 }
699
700 bool isVISrc_256_b32() const {
701 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
702 }
703
704 bool isVISrc_256_f32() const {
705 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
706 }
707
708 bool isVISrc_256B64() const {
709 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
710 }
711
712 bool isVISrc_256_f64() const {
713 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
714 }
715
716 bool isVISrc_128B16() const {
717 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
718 }
719
720 bool isVISrc_128V2B16() const {
721 return isVISrc_128B16();
722 }
723
724 bool isVISrc_128_b32() const {
725 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
726 }
727
728 bool isVISrc_128_f32() const {
729 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
730 }
731
732 bool isVISrc_256V2FP32() const {
733 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
734 }
735
736 bool isVISrc_256V2INT32() const {
737 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
738 }
739
740 bool isVISrc_512_b32() const {
741 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
742 }
743
744 bool isVISrc_512B16() const {
745 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
746 }
747
748 bool isVISrc_512V2B16() const {
749 return isVISrc_512B16();
750 }
751
752 bool isVISrc_512_f32() const {
753 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
754 }
755
756 bool isVISrc_512F16() const {
757 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
758 }
759
760 bool isVISrc_512V2F16() const {
761 return isVISrc_512F16() || isVISrc_512_b32();
762 }
763
764 bool isVISrc_1024_b32() const {
765 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
766 }
767
768 bool isVISrc_1024B16() const {
769 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
770 }
771
772 bool isVISrc_1024V2B16() const {
773 return isVISrc_1024B16();
774 }
775
776 bool isVISrc_1024_f32() const {
777 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
778 }
779
780 bool isVISrc_1024F16() const {
781 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
782 }
783
784 bool isVISrc_1024V2F16() const {
785 return isVISrc_1024F16() || isVISrc_1024_b32();
786 }
787
788 bool isAISrcB32() const {
789 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
790 }
791
792 bool isAISrcB16() const {
793 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
794 }
795
796 bool isAISrcV2B16() const {
797 return isAISrcB16();
798 }
799
800 bool isAISrcF32() const {
801 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
802 }
803
804 bool isAISrcF16() const {
805 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
806 }
807
808 bool isAISrcV2F16() const {
809 return isAISrcF16() || isAISrcB32();
810 }
811
812 bool isAISrc_64B64() const {
813 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
814 }
815
816 bool isAISrc_64_f64() const {
817 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
818 }
819
820 bool isAISrc_128_b32() const {
821 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
822 }
823
824 bool isAISrc_128B16() const {
825 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
826 }
827
828 bool isAISrc_128V2B16() const {
829 return isAISrc_128B16();
830 }
831
832 bool isAISrc_128_f32() const {
833 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
834 }
835
836 bool isAISrc_128F16() const {
837 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
838 }
839
840 bool isAISrc_128V2F16() const {
841 return isAISrc_128F16() || isAISrc_128_b32();
842 }
843
844 bool isVISrc_128_bf16() const {
845 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
846 }
847
848 bool isVISrc_128_f16() const {
849 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
850 }
851
852 bool isVISrc_128V2F16() const {
853 return isVISrc_128_f16() || isVISrc_128_b32();
854 }
855
856 bool isAISrc_256B64() const {
857 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
858 }
859
860 bool isAISrc_256_f64() const {
861 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
862 }
863
864 bool isAISrc_512_b32() const {
865 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
866 }
867
868 bool isAISrc_512B16() const {
869 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
870 }
871
872 bool isAISrc_512V2B16() const {
873 return isAISrc_512B16();
874 }
875
876 bool isAISrc_512_f32() const {
877 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
878 }
879
880 bool isAISrc_512F16() const {
881 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
882 }
883
884 bool isAISrc_512V2F16() const {
885 return isAISrc_512F16() || isAISrc_512_b32();
886 }
887
888 bool isAISrc_1024_b32() const {
889 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
890 }
891
892 bool isAISrc_1024B16() const {
893 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
894 }
895
896 bool isAISrc_1024V2B16() const {
897 return isAISrc_1024B16();
898 }
899
900 bool isAISrc_1024_f32() const {
901 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
902 }
903
904 bool isAISrc_1024F16() const {
905 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
906 }
907
908 bool isAISrc_1024V2F16() const {
909 return isAISrc_1024F16() || isAISrc_1024_b32();
910 }
911
912 bool isKImmFP32() const {
913 return isLiteralImm(MVT::f32);
914 }
915
916 bool isKImmFP16() const {
917 return isLiteralImm(MVT::f16);
918 }
919
920 bool isMem() const override {
921 return false;
922 }
923
924 bool isExpr() const {
925 return Kind == Expression;
926 }
927
928 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
929
930 bool isSWaitCnt() const;
931 bool isDepCtr() const;
932 bool isSDelayALU() const;
933 bool isHwreg() const;
934 bool isSendMsg() const;
935 bool isSplitBarrier() const;
936 bool isSwizzle() const;
937 bool isSMRDOffset8() const;
938 bool isSMEMOffset() const;
939 bool isSMRDLiteralOffset() const;
940 bool isDPP8() const;
941 bool isDPPCtrl() const;
942 bool isBLGP() const;
943 bool isGPRIdxMode() const;
944 bool isS16Imm() const;
945 bool isU16Imm() const;
946 bool isEndpgm() const;
947
948 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
949 return [=](){ return P(*this); };
950 }
951
952 StringRef getToken() const {
953 assert(isToken());
954 return StringRef(Tok.Data, Tok.Length);
955 }
956
957 int64_t getImm() const {
958 assert(isImm());
959 return Imm.Val;
960 }
961
962 void setImm(int64_t Val) {
963 assert(isImm());
964 Imm.Val = Val;
965 }
966
967 ImmTy getImmTy() const {
968 assert(isImm());
969 return Imm.Type;
970 }
971
972 MCRegister getReg() const override {
973 assert(isRegKind());
974 return Reg.RegNo;
975 }
976
977 SMLoc getStartLoc() const override {
978 return StartLoc;
979 }
980
981 SMLoc getEndLoc() const override {
982 return EndLoc;
983 }
984
985 SMRange getLocRange() const {
986 return SMRange(StartLoc, EndLoc);
987 }
988
989 Modifiers getModifiers() const {
990 assert(isRegKind() || isImmTy(ImmTyNone));
991 return isRegKind() ? Reg.Mods : Imm.Mods;
992 }
993
994 void setModifiers(Modifiers Mods) {
995 assert(isRegKind() || isImmTy(ImmTyNone));
996 if (isRegKind())
997 Reg.Mods = Mods;
998 else
999 Imm.Mods = Mods;
1000 }
1001
1002 bool hasModifiers() const {
1003 return getModifiers().hasModifiers();
1004 }
1005
1006 bool hasFPModifiers() const {
1007 return getModifiers().hasFPModifiers();
1008 }
1009
1010 bool hasIntModifiers() const {
1011 return getModifiers().hasIntModifiers();
1012 }
1013
1014 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1015
1016 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1017
1018 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1019
1020 void addRegOperands(MCInst &Inst, unsigned N) const;
1021
1022 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1023 if (isRegKind())
1024 addRegOperands(Inst, N);
1025 else
1026 addImmOperands(Inst, N);
1027 }
1028
1029 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1030 Modifiers Mods = getModifiers();
1031 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1032 if (isRegKind()) {
1033 addRegOperands(Inst, N);
1034 } else {
1035 addImmOperands(Inst, N, false);
1036 }
1037 }
1038
1039 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1040 assert(!hasIntModifiers());
1041 addRegOrImmWithInputModsOperands(Inst, N);
1042 }
1043
1044 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1045 assert(!hasFPModifiers());
1046 addRegOrImmWithInputModsOperands(Inst, N);
1047 }
1048
1049 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1050 Modifiers Mods = getModifiers();
1051 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1052 assert(isRegKind());
1053 addRegOperands(Inst, N);
1054 }
1055
1056 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1057 assert(!hasIntModifiers());
1058 addRegWithInputModsOperands(Inst, N);
1059 }
1060
1061 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1062 assert(!hasFPModifiers());
1063 addRegWithInputModsOperands(Inst, N);
1064 }
1065
1066 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1067 // clang-format off
1068 switch (Type) {
1069 case ImmTyNone: OS << "None"; break;
1070 case ImmTyGDS: OS << "GDS"; break;
1071 case ImmTyLDS: OS << "LDS"; break;
1072 case ImmTyOffen: OS << "Offen"; break;
1073 case ImmTyIdxen: OS << "Idxen"; break;
1074 case ImmTyAddr64: OS << "Addr64"; break;
1075 case ImmTyOffset: OS << "Offset"; break;
1076 case ImmTyInstOffset: OS << "InstOffset"; break;
1077 case ImmTyOffset0: OS << "Offset0"; break;
1078 case ImmTyOffset1: OS << "Offset1"; break;
1079 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1080 case ImmTyCPol: OS << "CPol"; break;
1081 case ImmTyIndexKey8bit: OS << "index_key"; break;
1082 case ImmTyIndexKey16bit: OS << "index_key"; break;
1083 case ImmTyTFE: OS << "TFE"; break;
1084 case ImmTyD16: OS << "D16"; break;
1085 case ImmTyFORMAT: OS << "FORMAT"; break;
1086 case ImmTyClamp: OS << "Clamp"; break;
1087 case ImmTyOModSI: OS << "OModSI"; break;
1088 case ImmTyDPP8: OS << "DPP8"; break;
1089 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1090 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1091 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1092 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1093 case ImmTyDppFI: OS << "DppFI"; break;
1094 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1095 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1096 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1097 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1098 case ImmTyDMask: OS << "DMask"; break;
1099 case ImmTyDim: OS << "Dim"; break;
1100 case ImmTyUNorm: OS << "UNorm"; break;
1101 case ImmTyDA: OS << "DA"; break;
1102 case ImmTyR128A16: OS << "R128A16"; break;
1103 case ImmTyA16: OS << "A16"; break;
1104 case ImmTyLWE: OS << "LWE"; break;
1105 case ImmTyOff: OS << "Off"; break;
1106 case ImmTyExpTgt: OS << "ExpTgt"; break;
1107 case ImmTyExpCompr: OS << "ExpCompr"; break;
1108 case ImmTyExpVM: OS << "ExpVM"; break;
1109 case ImmTyHwreg: OS << "Hwreg"; break;
1110 case ImmTySendMsg: OS << "SendMsg"; break;
1111 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1112 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1113 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1114 case ImmTyOpSel: OS << "OpSel"; break;
1115 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1116 case ImmTyNegLo: OS << "NegLo"; break;
1117 case ImmTyNegHi: OS << "NegHi"; break;
1118 case ImmTySwizzle: OS << "Swizzle"; break;
1119 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1120 case ImmTyHigh: OS << "High"; break;
1121 case ImmTyBLGP: OS << "BLGP"; break;
1122 case ImmTyCBSZ: OS << "CBSZ"; break;
1123 case ImmTyABID: OS << "ABID"; break;
1124 case ImmTyEndpgm: OS << "Endpgm"; break;
1125 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1126 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1127 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1128 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1129 case ImmTyByteSel: OS << "ByteSel" ; break;
1130 }
1131 // clang-format on
1132 }
1133
1134 void print(raw_ostream &OS) const override {
1135 switch (Kind) {
1136 case Register:
1137 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1138 << " mods: " << Reg.Mods << '>';
1139 break;
1140 case Immediate:
1141 OS << '<' << getImm();
1142 if (getImmTy() != ImmTyNone) {
1143 OS << " type: "; printImmTy(OS, getImmTy());
1144 }
1145 OS << " mods: " << Imm.Mods << '>';
1146 break;
1147 case Token:
1148 OS << '\'' << getToken() << '\'';
1149 break;
1150 case Expression:
1151 OS << "<expr " << *Expr << '>';
1152 break;
1153 }
1154 }
1155
1156 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1157 int64_t Val, SMLoc Loc,
1158 ImmTy Type = ImmTyNone,
1159 bool IsFPImm = false) {
1160 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1161 Op->Imm.Val = Val;
1162 Op->Imm.IsFPImm = IsFPImm;
1163 Op->Imm.Kind = ImmKindTyNone;
1164 Op->Imm.Type = Type;
1165 Op->Imm.Mods = Modifiers();
1166 Op->StartLoc = Loc;
1167 Op->EndLoc = Loc;
1168 return Op;
1169 }
1170
1171 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1172 StringRef Str, SMLoc Loc,
1173 bool HasExplicitEncodingSize = true) {
1174 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1175 Res->Tok.Data = Str.data();
1176 Res->Tok.Length = Str.size();
1177 Res->StartLoc = Loc;
1178 Res->EndLoc = Loc;
1179 return Res;
1180 }
1181
1182 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1183 unsigned RegNo, SMLoc S,
1184 SMLoc E) {
1185 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1186 Op->Reg.RegNo = RegNo;
1187 Op->Reg.Mods = Modifiers();
1188 Op->StartLoc = S;
1189 Op->EndLoc = E;
1190 return Op;
1191 }
1192
1193 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1194 const class MCExpr *Expr, SMLoc S) {
1195 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1196 Op->Expr = Expr;
1197 Op->StartLoc = S;
1198 Op->EndLoc = S;
1199 return Op;
1200 }
1201};
1202
1203raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1204 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1205 return OS;
1206}
1207
1208//===----------------------------------------------------------------------===//
1209// AsmParser
1210//===----------------------------------------------------------------------===//
1211
1212// Holds info related to the current kernel, e.g. count of SGPRs used.
1213// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1214// .amdgpu_hsa_kernel or at EOF.
1215class KernelScopeInfo {
1216 int SgprIndexUnusedMin = -1;
1217 int VgprIndexUnusedMin = -1;
1218 int AgprIndexUnusedMin = -1;
1219 MCContext *Ctx = nullptr;
1220 MCSubtargetInfo const *MSTI = nullptr;
1221
1222 void usesSgprAt(int i) {
1223 if (i >= SgprIndexUnusedMin) {
1224 SgprIndexUnusedMin = ++i;
1225 if (Ctx) {
1226 MCSymbol* const Sym =
1227 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1228 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1229 }
1230 }
1231 }
1232
1233 void usesVgprAt(int i) {
1234 if (i >= VgprIndexUnusedMin) {
1235 VgprIndexUnusedMin = ++i;
1236 if (Ctx) {
1237 MCSymbol* const Sym =
1238 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1239 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1240 VgprIndexUnusedMin);
1241 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1242 }
1243 }
1244 }
1245
1246 void usesAgprAt(int i) {
1247 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1248 if (!hasMAIInsts(*MSTI))
1249 return;
1250
1251 if (i >= AgprIndexUnusedMin) {
1252 AgprIndexUnusedMin = ++i;
1253 if (Ctx) {
1254 MCSymbol* const Sym =
1255 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1256 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1257
1258 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1259 MCSymbol* const vSym =
1260 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1261 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1262 VgprIndexUnusedMin);
1263 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1264 }
1265 }
1266 }
1267
1268public:
1269 KernelScopeInfo() = default;
1270
1271 void initialize(MCContext &Context) {
1272 Ctx = &Context;
1273 MSTI = Ctx->getSubtargetInfo();
1274
1275 usesSgprAt(SgprIndexUnusedMin = -1);
1276 usesVgprAt(VgprIndexUnusedMin = -1);
1277 if (hasMAIInsts(*MSTI)) {
1278 usesAgprAt(AgprIndexUnusedMin = -1);
1279 }
1280 }
1281
1282 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1283 unsigned RegWidth) {
1284 switch (RegKind) {
1285 case IS_SGPR:
1286 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1287 break;
1288 case IS_AGPR:
1289 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1290 break;
1291 case IS_VGPR:
1292 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1293 break;
1294 default:
1295 break;
1296 }
1297 }
1298};
1299
1300class AMDGPUAsmParser : public MCTargetAsmParser {
1301 MCAsmParser &Parser;
1302
1303 unsigned ForcedEncodingSize = 0;
1304 bool ForcedDPP = false;
1305 bool ForcedSDWA = false;
1306 KernelScopeInfo KernelScope;
1307
1308 /// @name Auto-generated Match Functions
1309 /// {
1310
1311#define GET_ASSEMBLER_HEADER
1312#include "AMDGPUGenAsmMatcher.inc"
1313
1314 /// }
1315
1316private:
1317 void createConstantSymbol(StringRef Id, int64_t Val);
1318
1319 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1320 bool OutOfRangeError(SMRange Range);
1321 /// Calculate VGPR/SGPR blocks required for given target, reserved
1322 /// registers, and user-specified NextFreeXGPR values.
1323 ///
1324 /// \param Features [in] Target features, used for bug corrections.
1325 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1326 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1327 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1328 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1329 /// descriptor field, if valid.
1330 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1331 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1332 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1333 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1334 /// \param VGPRBlocks [out] Result VGPR block count.
1335 /// \param SGPRBlocks [out] Result SGPR block count.
1336 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1337 const MCExpr *FlatScrUsed, bool XNACKUsed,
1338 std::optional<bool> EnableWavefrontSize32,
1339 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1340 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1341 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1342 bool ParseDirectiveAMDGCNTarget();
1343 bool ParseDirectiveAMDHSACodeObjectVersion();
1344 bool ParseDirectiveAMDHSAKernel();
1345 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1346 bool ParseDirectiveAMDKernelCodeT();
1347 // TODO: Possibly make subtargetHasRegister const.
1348 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1349 bool ParseDirectiveAMDGPUHsaKernel();
1350
1351 bool ParseDirectiveISAVersion();
1352 bool ParseDirectiveHSAMetadata();
1353 bool ParseDirectivePALMetadataBegin();
1354 bool ParseDirectivePALMetadata();
1355 bool ParseDirectiveAMDGPULDS();
1356
1357 /// Common code to parse out a block of text (typically YAML) between start and
1358 /// end directives.
1359 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1360 const char *AssemblerDirectiveEnd,
1361 std::string &CollectString);
1362
1363 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1364 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1365 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1366 unsigned &RegNum, unsigned &RegWidth,
1367 bool RestoreOnFailure = false);
1368 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1369 unsigned &RegNum, unsigned &RegWidth,
1371 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1372 unsigned &RegWidth,
1374 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1375 unsigned &RegWidth,
1377 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1378 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1379 bool ParseRegRange(unsigned& Num, unsigned& Width);
1380 unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
1381 unsigned RegWidth, SMLoc Loc);
1382
1383 bool isRegister();
1384 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1385 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1386 void initializeGprCountSymbol(RegisterKind RegKind);
1387 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1388 unsigned RegWidth);
1389 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1390 bool IsAtomic);
1391
1392public:
1393 enum OperandMode {
1394 OperandMode_Default,
1395 OperandMode_NSA,
1396 };
1397
1398 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1399
1400 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1401 const MCInstrInfo &MII,
1402 const MCTargetOptions &Options)
1403 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1405
1406 if (getFeatureBits().none()) {
1407 // Set default features.
1408 copySTI().ToggleFeature("southern-islands");
1409 }
1410
1411 FeatureBitset FB = getFeatureBits();
1412 if (!FB[AMDGPU::FeatureWavefrontSize64] &&
1413 !FB[AMDGPU::FeatureWavefrontSize32]) {
1414 // If there is no default wave size it must be a generation before gfx10,
1415 // these have FeatureWavefrontSize64 in their definition already. For
1416 // gfx10+ set wave32 as a default.
1417 copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
1418 }
1419
1420 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1421
1423 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1424 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1425 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1426 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1427 } else {
1428 createConstantSymbol(".option.machine_version_major", ISA.Major);
1429 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1430 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1431 }
1432 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1433 initializeGprCountSymbol(IS_VGPR);
1434 initializeGprCountSymbol(IS_SGPR);
1435 } else
1436 KernelScope.initialize(getContext());
1437
1438 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1439 createConstantSymbol(Symbol, Code);
1440
1441 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1442 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1443 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1444 }
1445
1446 bool hasMIMG_R128() const {
1447 return AMDGPU::hasMIMG_R128(getSTI());
1448 }
1449
1450 bool hasPackedD16() const {
1451 return AMDGPU::hasPackedD16(getSTI());
1452 }
1453
1454 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1455
1456 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1457
1458 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1459
1460 bool isSI() const {
1461 return AMDGPU::isSI(getSTI());
1462 }
1463
1464 bool isCI() const {
1465 return AMDGPU::isCI(getSTI());
1466 }
1467
1468 bool isVI() const {
1469 return AMDGPU::isVI(getSTI());
1470 }
1471
1472 bool isGFX9() const {
1473 return AMDGPU::isGFX9(getSTI());
1474 }
1475
1476 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1477 bool isGFX90A() const {
1478 return AMDGPU::isGFX90A(getSTI());
1479 }
1480
1481 bool isGFX940() const {
1482 return AMDGPU::isGFX940(getSTI());
1483 }
1484
1485 bool isGFX9Plus() const {
1486 return AMDGPU::isGFX9Plus(getSTI());
1487 }
1488
1489 bool isGFX10() const {
1490 return AMDGPU::isGFX10(getSTI());
1491 }
1492
1493 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1494
1495 bool isGFX11() const {
1496 return AMDGPU::isGFX11(getSTI());
1497 }
1498
1499 bool isGFX11Plus() const {
1500 return AMDGPU::isGFX11Plus(getSTI());
1501 }
1502
1503 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1504
1505 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1506
1507 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1508
1509 bool isGFX10_BEncoding() const {
1511 }
1512
1513 bool hasInv2PiInlineImm() const {
1514 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1515 }
1516
1517 bool hasFlatOffsets() const {
1518 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1519 }
1520
1521 bool hasArchitectedFlatScratch() const {
1522 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1523 }
1524
1525 bool hasSGPR102_SGPR103() const {
1526 return !isVI() && !isGFX9();
1527 }
1528
1529 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1530
1531 bool hasIntClamp() const {
1532 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1533 }
1534
1535 bool hasPartialNSAEncoding() const {
1536 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1537 }
1538
1539 unsigned getNSAMaxSize(bool HasSampler = false) const {
1540 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1541 }
1542
1543 unsigned getMaxNumUserSGPRs() const {
1545 }
1546
1547 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1548
1549 AMDGPUTargetStreamer &getTargetStreamer() {
1551 return static_cast<AMDGPUTargetStreamer &>(TS);
1552 }
1553
1554 const MCRegisterInfo *getMRI() const {
1555 // We need this const_cast because for some reason getContext() is not const
1556 // in MCAsmParser.
1557 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1558 }
1559
1560 const MCInstrInfo *getMII() const {
1561 return &MII;
1562 }
1563
1564 const FeatureBitset &getFeatureBits() const {
1565 return getSTI().getFeatureBits();
1566 }
1567
1568 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1569 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1570 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1571
1572 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1573 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1574 bool isForcedDPP() const { return ForcedDPP; }
1575 bool isForcedSDWA() const { return ForcedSDWA; }
1576 ArrayRef<unsigned> getMatchedVariants() const;
1577 StringRef getMatchedVariantName() const;
1578
1579 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1580 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1581 bool RestoreOnFailure);
1582 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1584 SMLoc &EndLoc) override;
1585 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1587 unsigned Kind) override;
1588 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1591 bool MatchingInlineAsm) override;
1592 bool ParseDirective(AsmToken DirectiveID) override;
1593 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1594 OperandMode Mode = OperandMode_Default);
1595 StringRef parseMnemonicSuffix(StringRef Name);
1597 SMLoc NameLoc, OperandVector &Operands) override;
1598 //bool ProcessInstruction(MCInst &Inst);
1599
1601
1602 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1603
1605 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1606 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1607 std::function<bool(int64_t &)> ConvertResult = nullptr);
1608
1609 ParseStatus parseOperandArrayWithPrefix(
1610 const char *Prefix, OperandVector &Operands,
1611 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1612 bool (*ConvertResult)(int64_t &) = nullptr);
1613
1615 parseNamedBit(StringRef Name, OperandVector &Operands,
1616 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1617 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1619 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1620 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1621 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1622 SMLoc &StringLoc);
1623 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1626 int64_t &IntVal);
1627 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1630 AMDGPUOperand::ImmTy Type);
1631
1632 bool isModifier();
1633 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1634 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1635 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1636 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1637 bool parseSP3NegModifier();
1638 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1639 bool HasLit = false);
1641 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1642 bool HasLit = false);
1643 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1644 bool AllowImm = true);
1645 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1646 bool AllowImm = true);
1647 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1648 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1649 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1650 ParseStatus tryParseIndexKey(OperandVector &Operands,
1651 AMDGPUOperand::ImmTy ImmTy);
1652 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1653 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1654
1655 ParseStatus parseDfmtNfmt(int64_t &Format);
1656 ParseStatus parseUfmt(int64_t &Format);
1657 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1658 int64_t &Format);
1659 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1660 int64_t &Format);
1661 ParseStatus parseFORMAT(OperandVector &Operands);
1662 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1663 ParseStatus parseNumericFormat(int64_t &Format);
1664 ParseStatus parseFlatOffset(OperandVector &Operands);
1665 ParseStatus parseR128A16(OperandVector &Operands);
1667 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1668 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1669
1670 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1671
1672 bool parseCnt(int64_t &IntVal);
1673 ParseStatus parseSWaitCnt(OperandVector &Operands);
1674
1675 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1676 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1677 ParseStatus parseDepCtr(OperandVector &Operands);
1678
1679 bool parseDelay(int64_t &Delay);
1680 ParseStatus parseSDelayALU(OperandVector &Operands);
1681
1682 ParseStatus parseHwreg(OperandVector &Operands);
1683
1684private:
1685 struct OperandInfoTy {
1686 SMLoc Loc;
1687 int64_t Val;
1688 bool IsSymbolic = false;
1689 bool IsDefined = false;
1690
1691 OperandInfoTy(int64_t Val) : Val(Val) {}
1692 };
1693
1694 struct StructuredOpField : OperandInfoTy {
1697 unsigned Width;
1698 bool IsDefined = false;
1699
1700 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1701 int64_t Default)
1702 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1703 virtual ~StructuredOpField() = default;
1704
1705 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1706 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1707 return false;
1708 }
1709
1710 virtual bool validate(AMDGPUAsmParser &Parser) const {
1711 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1712 return Error(Parser, "not supported on this GPU");
1713 if (!isUIntN(Width, Val))
1714 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1715 return true;
1716 }
1717 };
1718
1719 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1720 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1721
1722 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1723 bool validateSendMsg(const OperandInfoTy &Msg,
1724 const OperandInfoTy &Op,
1725 const OperandInfoTy &Stream);
1726
1727 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1728 OperandInfoTy &Width);
1729
1730 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1731 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1732 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1733
1734 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1735 const OperandVector &Operands) const;
1736 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1737 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1738 SMLoc getLitLoc(const OperandVector &Operands,
1739 bool SearchMandatoryLiterals = false) const;
1740 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1741 SMLoc getConstLoc(const OperandVector &Operands) const;
1742 SMLoc getInstLoc(const OperandVector &Operands) const;
1743
1744 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1745 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1746 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1747 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1748 bool validateSOPLiteral(const MCInst &Inst) const;
1749 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1750 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1751 const OperandVector &Operands);
1752 bool validateIntClampSupported(const MCInst &Inst);
1753 bool validateMIMGAtomicDMask(const MCInst &Inst);
1754 bool validateMIMGGatherDMask(const MCInst &Inst);
1755 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1756 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1757 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1758 bool validateMIMGD16(const MCInst &Inst);
1759 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1760 bool validateMIMGMSAA(const MCInst &Inst);
1761 bool validateOpSel(const MCInst &Inst);
1762 bool validateNeg(const MCInst &Inst, int OpName);
1763 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1764 bool validateVccOperand(unsigned Reg) const;
1765 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1766 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1767 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1768 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1769 bool validateAGPRLdSt(const MCInst &Inst) const;
1770 bool validateVGPRAlign(const MCInst &Inst) const;
1771 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1772 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1773 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1774 bool validateDivScale(const MCInst &Inst);
1775 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1776 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1777 const SMLoc &IDLoc);
1778 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1779 const unsigned CPol);
1780 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1781 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1782 unsigned getConstantBusLimit(unsigned Opcode) const;
1783 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1784 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1785 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1786
1787 bool isSupportedMnemo(StringRef Mnemo,
1788 const FeatureBitset &FBS);
1789 bool isSupportedMnemo(StringRef Mnemo,
1790 const FeatureBitset &FBS,
1791 ArrayRef<unsigned> Variants);
1792 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1793
1794 bool isId(const StringRef Id) const;
1795 bool isId(const AsmToken &Token, const StringRef Id) const;
1796 bool isToken(const AsmToken::TokenKind Kind) const;
1797 StringRef getId() const;
1798 bool trySkipId(const StringRef Id);
1799 bool trySkipId(const StringRef Pref, const StringRef Id);
1800 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1801 bool trySkipToken(const AsmToken::TokenKind Kind);
1802 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1803 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1804 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1805
1806 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1807 AsmToken::TokenKind getTokenKind() const;
1808 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1810 StringRef getTokenStr() const;
1811 AsmToken peekToken(bool ShouldSkipSpace = true);
1812 AsmToken getToken() const;
1813 SMLoc getLoc() const;
1814 void lex();
1815
1816public:
1817 void onBeginOfFile() override;
1818 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1819
1820 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1821
1822 ParseStatus parseExpTgt(OperandVector &Operands);
1823 ParseStatus parseSendMsg(OperandVector &Operands);
1824 ParseStatus parseInterpSlot(OperandVector &Operands);
1825 ParseStatus parseInterpAttr(OperandVector &Operands);
1826 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1827 ParseStatus parseBoolReg(OperandVector &Operands);
1828
1829 bool parseSwizzleOperand(int64_t &Op,
1830 const unsigned MinVal,
1831 const unsigned MaxVal,
1832 const StringRef ErrMsg,
1833 SMLoc &Loc);
1834 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1835 const unsigned MinVal,
1836 const unsigned MaxVal,
1837 const StringRef ErrMsg);
1838 ParseStatus parseSwizzle(OperandVector &Operands);
1839 bool parseSwizzleOffset(int64_t &Imm);
1840 bool parseSwizzleMacro(int64_t &Imm);
1841 bool parseSwizzleQuadPerm(int64_t &Imm);
1842 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1843 bool parseSwizzleBroadcast(int64_t &Imm);
1844 bool parseSwizzleSwap(int64_t &Imm);
1845 bool parseSwizzleReverse(int64_t &Imm);
1846
1847 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1848 int64_t parseGPRIdxMacro();
1849
1850 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1851 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1852
1853 ParseStatus parseOModSI(OperandVector &Operands);
1854
1855 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1856 OptionalImmIndexMap &OptionalIdx);
1857 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1858 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1859 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1860 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1861
1862 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1863 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1864 OptionalImmIndexMap &OptionalIdx);
1865 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1866 OptionalImmIndexMap &OptionalIdx);
1867
1868 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1869 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1870
1871 bool parseDimId(unsigned &Encoding);
1873 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1875 ParseStatus parseDPPCtrl(OperandVector &Operands);
1876 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1877 int64_t parseDPPCtrlSel(StringRef Ctrl);
1878 int64_t parseDPPCtrlPerm();
1879 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1880 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1881 cvtDPP(Inst, Operands, true);
1882 }
1883 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1884 bool IsDPP8 = false);
1885 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1886 cvtVOP3DPP(Inst, Operands, true);
1887 }
1888
1889 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1890 AMDGPUOperand::ImmTy Type);
1891 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1892 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1893 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1894 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1895 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1896 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1897 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1898 uint64_t BasicInstType,
1899 bool SkipDstVcc = false,
1900 bool SkipSrcVcc = false);
1901
1902 ParseStatus parseEndpgm(OperandVector &Operands);
1903
1905};
1906
1907} // end anonymous namespace
1908
1909// May be called with integer type with equivalent bitwidth.
1910static const fltSemantics *getFltSemantics(unsigned Size) {
1911 switch (Size) {
1912 case 4:
1913 return &APFloat::IEEEsingle();
1914 case 8:
1915 return &APFloat::IEEEdouble();
1916 case 2:
1917 return &APFloat::IEEEhalf();
1918 default:
1919 llvm_unreachable("unsupported fp type");
1920 }
1921}
1922
1924 return getFltSemantics(VT.getSizeInBits() / 8);
1925}
1926
1928 switch (OperandType) {
1929 // When floating-point immediate is used as operand of type i16, the 32-bit
1930 // representation of the constant truncated to the 16 LSBs should be used.
1950 return &APFloat::IEEEsingle();
1956 return &APFloat::IEEEdouble();
1965 return &APFloat::IEEEhalf();
1973 return &APFloat::BFloat();
1974 default:
1975 llvm_unreachable("unsupported fp type");
1976 }
1977}
1978
1979//===----------------------------------------------------------------------===//
1980// Operand
1981//===----------------------------------------------------------------------===//
1982
1983static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1984 bool Lost;
1985
1986 // Convert literal to single precision
1988 APFloat::rmNearestTiesToEven,
1989 &Lost);
1990 // We allow precision lost but not overflow or underflow
1991 if (Status != APFloat::opOK &&
1992 Lost &&
1993 ((Status & APFloat::opOverflow) != 0 ||
1994 (Status & APFloat::opUnderflow) != 0)) {
1995 return false;
1996 }
1997
1998 return true;
1999}
2000
2001static bool isSafeTruncation(int64_t Val, unsigned Size) {
2002 return isUIntN(Size, Val) || isIntN(Size, Val);
2003}
2004
2005static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2006 if (VT.getScalarType() == MVT::i16)
2007 return isInlinableLiteral32(Val, HasInv2Pi);
2008
2009 if (VT.getScalarType() == MVT::f16)
2010 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2011
2012 assert(VT.getScalarType() == MVT::bf16);
2013
2014 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2015}
2016
2017bool AMDGPUOperand::isInlinableImm(MVT type) const {
2018
2019 // This is a hack to enable named inline values like
2020 // shared_base with both 32-bit and 64-bit operands.
2021 // Note that these values are defined as
2022 // 32-bit operands only.
2023 if (isInlineValue()) {
2024 return true;
2025 }
2026
2027 if (!isImmTy(ImmTyNone)) {
2028 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2029 return false;
2030 }
2031 // TODO: We should avoid using host float here. It would be better to
2032 // check the float bit values which is what a few other places do.
2033 // We've had bot failures before due to weird NaN support on mips hosts.
2034
2035 APInt Literal(64, Imm.Val);
2036
2037 if (Imm.IsFPImm) { // We got fp literal token
2038 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2040 AsmParser->hasInv2PiInlineImm());
2041 }
2042
2043 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2044 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2045 return false;
2046
2047 if (type.getScalarSizeInBits() == 16) {
2048 bool Lost = false;
2049 switch (type.getScalarType().SimpleTy) {
2050 default:
2051 llvm_unreachable("unknown 16-bit type");
2052 case MVT::bf16:
2053 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2054 &Lost);
2055 break;
2056 case MVT::f16:
2057 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2058 &Lost);
2059 break;
2060 case MVT::i16:
2061 FPLiteral.convert(APFloatBase::IEEEsingle(),
2062 APFloat::rmNearestTiesToEven, &Lost);
2063 break;
2064 }
2065 // We need to use 32-bit representation here because when a floating-point
2066 // inline constant is used as an i16 operand, its 32-bit representation
2067 // representation will be used. We will need the 32-bit value to check if
2068 // it is FP inline constant.
2069 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2070 return isInlineableLiteralOp16(ImmVal, type,
2071 AsmParser->hasInv2PiInlineImm());
2072 }
2073
2074 // Check if single precision literal is inlinable
2076 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2077 AsmParser->hasInv2PiInlineImm());
2078 }
2079
2080 // We got int literal token.
2081 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2083 AsmParser->hasInv2PiInlineImm());
2084 }
2085
2086 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2087 return false;
2088 }
2089
2090 if (type.getScalarSizeInBits() == 16) {
2092 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2093 type, AsmParser->hasInv2PiInlineImm());
2094 }
2095
2097 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2098 AsmParser->hasInv2PiInlineImm());
2099}
2100
2101bool AMDGPUOperand::isLiteralImm(MVT type) const {
2102 // Check that this immediate can be added as literal
2103 if (!isImmTy(ImmTyNone)) {
2104 return false;
2105 }
2106
2107 if (!Imm.IsFPImm) {
2108 // We got int literal token.
2109
2110 if (type == MVT::f64 && hasFPModifiers()) {
2111 // Cannot apply fp modifiers to int literals preserving the same semantics
2112 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2113 // disable these cases.
2114 return false;
2115 }
2116
2117 unsigned Size = type.getSizeInBits();
2118 if (Size == 64)
2119 Size = 32;
2120
2121 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2122 // types.
2123 return isSafeTruncation(Imm.Val, Size);
2124 }
2125
2126 // We got fp literal token
2127 if (type == MVT::f64) { // Expected 64-bit fp operand
2128 // We would set low 64-bits of literal to zeroes but we accept this literals
2129 return true;
2130 }
2131
2132 if (type == MVT::i64) { // Expected 64-bit int operand
2133 // We don't allow fp literals in 64-bit integer instructions. It is
2134 // unclear how we should encode them.
2135 return false;
2136 }
2137
2138 // We allow fp literals with f16x2 operands assuming that the specified
2139 // literal goes into the lower half and the upper half is zero. We also
2140 // require that the literal may be losslessly converted to f16.
2141 //
2142 // For i16x2 operands, we assume that the specified literal is encoded as a
2143 // single-precision float. This is pretty odd, but it matches SP3 and what
2144 // happens in hardware.
2145 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2146 : (type == MVT::v2i16) ? MVT::f32
2147 : (type == MVT::v2f32) ? MVT::f32
2148 : type;
2149
2150 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2151 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2152}
2153
2154bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2155 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2156}
2157
2158bool AMDGPUOperand::isVRegWithInputMods() const {
2159 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2160 // GFX90A allows DPP on 64-bit operands.
2161 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2162 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2163}
2164
2165template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2166 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2167 : AMDGPU::VGPR_16_Lo128RegClassID);
2168}
2169
2170bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2171 if (AsmParser->isVI())
2172 return isVReg32();
2173 if (AsmParser->isGFX9Plus())
2174 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2175 return false;
2176}
2177
2178bool AMDGPUOperand::isSDWAFP16Operand() const {
2179 return isSDWAOperand(MVT::f16);
2180}
2181
2182bool AMDGPUOperand::isSDWAFP32Operand() const {
2183 return isSDWAOperand(MVT::f32);
2184}
2185
2186bool AMDGPUOperand::isSDWAInt16Operand() const {
2187 return isSDWAOperand(MVT::i16);
2188}
2189
2190bool AMDGPUOperand::isSDWAInt32Operand() const {
2191 return isSDWAOperand(MVT::i32);
2192}
2193
2194bool AMDGPUOperand::isBoolReg() const {
2195 auto FB = AsmParser->getFeatureBits();
2196 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2197 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2198}
2199
2200uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2201{
2202 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2203 assert(Size == 2 || Size == 4 || Size == 8);
2204
2205 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2206
2207 if (Imm.Mods.Abs) {
2208 Val &= ~FpSignMask;
2209 }
2210 if (Imm.Mods.Neg) {
2211 Val ^= FpSignMask;
2212 }
2213
2214 return Val;
2215}
2216
2217void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2218 if (isExpr()) {
2220 return;
2221 }
2222
2223 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2224 Inst.getNumOperands())) {
2225 addLiteralImmOperand(Inst, Imm.Val,
2226 ApplyModifiers &
2227 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2228 } else {
2229 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2231 setImmKindNone();
2232 }
2233}
2234
2235void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2236 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2237 auto OpNum = Inst.getNumOperands();
2238 // Check that this operand accepts literals
2239 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2240
2241 if (ApplyModifiers) {
2242 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2243 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2244 Val = applyInputFPModifiers(Val, Size);
2245 }
2246
2247 APInt Literal(64, Val);
2248 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2249
2250 if (Imm.IsFPImm) { // We got fp literal token
2251 switch (OpTy) {
2257 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2258 AsmParser->hasInv2PiInlineImm())) {
2259 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2260 setImmKindConst();
2261 return;
2262 }
2263
2264 // Non-inlineable
2265 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2266 // For fp operands we check if low 32 bits are zeros
2267 if (Literal.getLoBits(32) != 0) {
2268 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2269 "Can't encode literal as exact 64-bit floating-point operand. "
2270 "Low 32-bits will be set to zero");
2271 Val &= 0xffffffff00000000u;
2272 }
2273
2275 setImmKindLiteral();
2276 return;
2277 }
2278
2279 // We don't allow fp literals in 64-bit integer instructions. It is
2280 // unclear how we should encode them. This case should be checked earlier
2281 // in predicate methods (isLiteralImm())
2282 llvm_unreachable("fp literal in 64-bit integer instruction.");
2283
2291 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2292 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2293 // loss of precision. The constant represents ideomatic fp32 value of
2294 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2295 // bits. Prevent rounding below.
2296 Inst.addOperand(MCOperand::createImm(0x3e22));
2297 setImmKindLiteral();
2298 return;
2299 }
2300 [[fallthrough]];
2301
2329 bool lost;
2330 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2331 // Convert literal to single precision
2332 FPLiteral.convert(*getOpFltSemantics(OpTy),
2333 APFloat::rmNearestTiesToEven, &lost);
2334 // We allow precision lost but not overflow or underflow. This should be
2335 // checked earlier in isLiteralImm()
2336
2337 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2338 Inst.addOperand(MCOperand::createImm(ImmVal));
2339 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2340 setImmKindMandatoryLiteral();
2341 } else {
2342 setImmKindLiteral();
2343 }
2344 return;
2345 }
2346 default:
2347 llvm_unreachable("invalid operand size");
2348 }
2349
2350 return;
2351 }
2352
2353 // We got int literal token.
2354 // Only sign extend inline immediates.
2355 switch (OpTy) {
2371 if (isSafeTruncation(Val, 32) &&
2372 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2373 AsmParser->hasInv2PiInlineImm())) {
2375 setImmKindConst();
2376 return;
2377 }
2378
2379 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2380 setImmKindLiteral();
2381 return;
2382
2388 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2390 setImmKindConst();
2391 return;
2392 }
2393
2394 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2395 : Lo_32(Val);
2396
2398 setImmKindLiteral();
2399 return;
2400
2404 if (isSafeTruncation(Val, 16) &&
2405 AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2406 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2407 setImmKindConst();
2408 return;
2409 }
2410
2411 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2412 setImmKindLiteral();
2413 return;
2414
2419 if (isSafeTruncation(Val, 16) &&
2420 AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2421 AsmParser->hasInv2PiInlineImm())) {
2423 setImmKindConst();
2424 return;
2425 }
2426
2427 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2428 setImmKindLiteral();
2429 return;
2430
2435 if (isSafeTruncation(Val, 16) &&
2436 AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2437 AsmParser->hasInv2PiInlineImm())) {
2439 setImmKindConst();
2440 return;
2441 }
2442
2443 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2444 setImmKindLiteral();
2445 return;
2446
2449 assert(isSafeTruncation(Val, 16));
2450 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2452 return;
2453 }
2456 assert(isSafeTruncation(Val, 16));
2457 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2458 AsmParser->hasInv2PiInlineImm()));
2459
2461 return;
2462 }
2463
2466 assert(isSafeTruncation(Val, 16));
2467 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2468 AsmParser->hasInv2PiInlineImm()));
2469
2471 return;
2472 }
2473
2475 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2476 setImmKindMandatoryLiteral();
2477 return;
2479 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2480 setImmKindMandatoryLiteral();
2481 return;
2482 default:
2483 llvm_unreachable("invalid operand size");
2484 }
2485}
2486
2487void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2488 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2489}
2490
2491bool AMDGPUOperand::isInlineValue() const {
2492 return isRegKind() && ::isInlineValue(getReg());
2493}
2494
2495//===----------------------------------------------------------------------===//
2496// AsmParser
2497//===----------------------------------------------------------------------===//
2498
2499void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2500 // TODO: make those pre-defined variables read-only.
2501 // Currently there is none suitable machinery in the core llvm-mc for this.
2502 // MCSymbol::isRedefinable is intended for another purpose, and
2503 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2504 MCContext &Ctx = getContext();
2505 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2506 Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
2507}
2508
2509static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2510 if (Is == IS_VGPR) {
2511 switch (RegWidth) {
2512 default: return -1;
2513 case 32:
2514 return AMDGPU::VGPR_32RegClassID;
2515 case 64:
2516 return AMDGPU::VReg_64RegClassID;
2517 case 96:
2518 return AMDGPU::VReg_96RegClassID;
2519 case 128:
2520 return AMDGPU::VReg_128RegClassID;
2521 case 160:
2522 return AMDGPU::VReg_160RegClassID;
2523 case 192:
2524 return AMDGPU::VReg_192RegClassID;
2525 case 224:
2526 return AMDGPU::VReg_224RegClassID;
2527 case 256:
2528 return AMDGPU::VReg_256RegClassID;
2529 case 288:
2530 return AMDGPU::VReg_288RegClassID;
2531 case 320:
2532 return AMDGPU::VReg_320RegClassID;
2533 case 352:
2534 return AMDGPU::VReg_352RegClassID;
2535 case 384:
2536 return AMDGPU::VReg_384RegClassID;
2537 case 512:
2538 return AMDGPU::VReg_512RegClassID;
2539 case 1024:
2540 return AMDGPU::VReg_1024RegClassID;
2541 }
2542 } else if (Is == IS_TTMP) {
2543 switch (RegWidth) {
2544 default: return -1;
2545 case 32:
2546 return AMDGPU::TTMP_32RegClassID;
2547 case 64:
2548 return AMDGPU::TTMP_64RegClassID;
2549 case 128:
2550 return AMDGPU::TTMP_128RegClassID;
2551 case 256:
2552 return AMDGPU::TTMP_256RegClassID;
2553 case 512:
2554 return AMDGPU::TTMP_512RegClassID;
2555 }
2556 } else if (Is == IS_SGPR) {
2557 switch (RegWidth) {
2558 default: return -1;
2559 case 32:
2560 return AMDGPU::SGPR_32RegClassID;
2561 case 64:
2562 return AMDGPU::SGPR_64RegClassID;
2563 case 96:
2564 return AMDGPU::SGPR_96RegClassID;
2565 case 128:
2566 return AMDGPU::SGPR_128RegClassID;
2567 case 160:
2568 return AMDGPU::SGPR_160RegClassID;
2569 case 192:
2570 return AMDGPU::SGPR_192RegClassID;
2571 case 224:
2572 return AMDGPU::SGPR_224RegClassID;
2573 case 256:
2574 return AMDGPU::SGPR_256RegClassID;
2575 case 288:
2576 return AMDGPU::SGPR_288RegClassID;
2577 case 320:
2578 return AMDGPU::SGPR_320RegClassID;
2579 case 352:
2580 return AMDGPU::SGPR_352RegClassID;
2581 case 384:
2582 return AMDGPU::SGPR_384RegClassID;
2583 case 512:
2584 return AMDGPU::SGPR_512RegClassID;
2585 }
2586 } else if (Is == IS_AGPR) {
2587 switch (RegWidth) {
2588 default: return -1;
2589 case 32:
2590 return AMDGPU::AGPR_32RegClassID;
2591 case 64:
2592 return AMDGPU::AReg_64RegClassID;
2593 case 96:
2594 return AMDGPU::AReg_96RegClassID;
2595 case 128:
2596 return AMDGPU::AReg_128RegClassID;
2597 case 160:
2598 return AMDGPU::AReg_160RegClassID;
2599 case 192:
2600 return AMDGPU::AReg_192RegClassID;
2601 case 224:
2602 return AMDGPU::AReg_224RegClassID;
2603 case 256:
2604 return AMDGPU::AReg_256RegClassID;
2605 case 288:
2606 return AMDGPU::AReg_288RegClassID;
2607 case 320:
2608 return AMDGPU::AReg_320RegClassID;
2609 case 352:
2610 return AMDGPU::AReg_352RegClassID;
2611 case 384:
2612 return AMDGPU::AReg_384RegClassID;
2613 case 512:
2614 return AMDGPU::AReg_512RegClassID;
2615 case 1024:
2616 return AMDGPU::AReg_1024RegClassID;
2617 }
2618 }
2619 return -1;
2620}
2621
2624 .Case("exec", AMDGPU::EXEC)
2625 .Case("vcc", AMDGPU::VCC)
2626 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2627 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2628 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2629 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2630 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2631 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2632 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2633 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2634 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2635 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2636 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2637 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2638 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2639 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2640 .Case("m0", AMDGPU::M0)
2641 .Case("vccz", AMDGPU::SRC_VCCZ)
2642 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2643 .Case("execz", AMDGPU::SRC_EXECZ)
2644 .Case("src_execz", AMDGPU::SRC_EXECZ)
2645 .Case("scc", AMDGPU::SRC_SCC)
2646 .Case("src_scc", AMDGPU::SRC_SCC)
2647 .Case("tba", AMDGPU::TBA)
2648 .Case("tma", AMDGPU::TMA)
2649 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2650 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2651 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2652 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2653 .Case("vcc_lo", AMDGPU::VCC_LO)
2654 .Case("vcc_hi", AMDGPU::VCC_HI)
2655 .Case("exec_lo", AMDGPU::EXEC_LO)
2656 .Case("exec_hi", AMDGPU::EXEC_HI)
2657 .Case("tma_lo", AMDGPU::TMA_LO)
2658 .Case("tma_hi", AMDGPU::TMA_HI)
2659 .Case("tba_lo", AMDGPU::TBA_LO)
2660 .Case("tba_hi", AMDGPU::TBA_HI)
2661 .Case("pc", AMDGPU::PC_REG)
2662 .Case("null", AMDGPU::SGPR_NULL)
2663 .Default(AMDGPU::NoRegister);
2664}
2665
2666bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2667 SMLoc &EndLoc, bool RestoreOnFailure) {
2668 auto R = parseRegister();
2669 if (!R) return true;
2670 assert(R->isReg());
2671 RegNo = R->getReg();
2672 StartLoc = R->getStartLoc();
2673 EndLoc = R->getEndLoc();
2674 return false;
2675}
2676
2677bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2678 SMLoc &EndLoc) {
2679 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2680}
2681
2682ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2683 SMLoc &EndLoc) {
2684 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2685 bool PendingErrors = getParser().hasPendingError();
2686 getParser().clearPendingErrors();
2687 if (PendingErrors)
2688 return ParseStatus::Failure;
2689 if (Result)
2690 return ParseStatus::NoMatch;
2691 return ParseStatus::Success;
2692}
2693
2694bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2695 RegisterKind RegKind, unsigned Reg1,
2696 SMLoc Loc) {
2697 switch (RegKind) {
2698 case IS_SPECIAL:
2699 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2700 Reg = AMDGPU::EXEC;
2701 RegWidth = 64;
2702 return true;
2703 }
2704 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2705 Reg = AMDGPU::FLAT_SCR;
2706 RegWidth = 64;
2707 return true;
2708 }
2709 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2710 Reg = AMDGPU::XNACK_MASK;
2711 RegWidth = 64;
2712 return true;
2713 }
2714 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2715 Reg = AMDGPU::VCC;
2716 RegWidth = 64;
2717 return true;
2718 }
2719 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2720 Reg = AMDGPU::TBA;
2721 RegWidth = 64;
2722 return true;
2723 }
2724 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2725 Reg = AMDGPU::TMA;
2726 RegWidth = 64;
2727 return true;
2728 }
2729 Error(Loc, "register does not fit in the list");
2730 return false;
2731 case IS_VGPR:
2732 case IS_SGPR:
2733 case IS_AGPR:
2734 case IS_TTMP:
2735 if (Reg1 != Reg + RegWidth / 32) {
2736 Error(Loc, "registers in a list must have consecutive indices");
2737 return false;
2738 }
2739 RegWidth += 32;
2740 return true;
2741 default:
2742 llvm_unreachable("unexpected register kind");
2743 }
2744}
2745
2746struct RegInfo {
2748 RegisterKind Kind;
2749};
2750
2751static constexpr RegInfo RegularRegisters[] = {
2752 {{"v"}, IS_VGPR},
2753 {{"s"}, IS_SGPR},
2754 {{"ttmp"}, IS_TTMP},
2755 {{"acc"}, IS_AGPR},
2756 {{"a"}, IS_AGPR},
2757};
2758
2759static bool isRegularReg(RegisterKind Kind) {
2760 return Kind == IS_VGPR ||
2761 Kind == IS_SGPR ||
2762 Kind == IS_TTMP ||
2763 Kind == IS_AGPR;
2764}
2765
2767 for (const RegInfo &Reg : RegularRegisters)
2768 if (Str.starts_with(Reg.Name))
2769 return &Reg;
2770 return nullptr;
2771}
2772
2773static bool getRegNum(StringRef Str, unsigned& Num) {
2774 return !Str.getAsInteger(10, Num);
2775}
2776
2777bool
2778AMDGPUAsmParser::isRegister(const AsmToken &Token,
2779 const AsmToken &NextToken) const {
2780
2781 // A list of consecutive registers: [s0,s1,s2,s3]
2782 if (Token.is(AsmToken::LBrac))
2783 return true;
2784
2785 if (!Token.is(AsmToken::Identifier))
2786 return false;
2787
2788 // A single register like s0 or a range of registers like s[0:1]
2789
2790 StringRef Str = Token.getString();
2791 const RegInfo *Reg = getRegularRegInfo(Str);
2792 if (Reg) {
2793 StringRef RegName = Reg->Name;
2794 StringRef RegSuffix = Str.substr(RegName.size());
2795 if (!RegSuffix.empty()) {
2796 RegSuffix.consume_back(".l");
2797 RegSuffix.consume_back(".h");
2798 unsigned Num;
2799 // A single register with an index: rXX
2800 if (getRegNum(RegSuffix, Num))
2801 return true;
2802 } else {
2803 // A range of registers: r[XX:YY].
2804 if (NextToken.is(AsmToken::LBrac))
2805 return true;
2806 }
2807 }
2808
2809 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2810}
2811
2812bool
2813AMDGPUAsmParser::isRegister()
2814{
2815 return isRegister(getToken(), peekToken());
2816}
2817
2818unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2819 unsigned SubReg, unsigned RegWidth,
2820 SMLoc Loc) {
2821 assert(isRegularReg(RegKind));
2822
2823 unsigned AlignSize = 1;
2824 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2825 // SGPR and TTMP registers must be aligned.
2826 // Max required alignment is 4 dwords.
2827 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2828 }
2829
2830 if (RegNum % AlignSize != 0) {
2831 Error(Loc, "invalid register alignment");
2832 return AMDGPU::NoRegister;
2833 }
2834
2835 unsigned RegIdx = RegNum / AlignSize;
2836 int RCID = getRegClass(RegKind, RegWidth);
2837 if (RCID == -1) {
2838 Error(Loc, "invalid or unsupported register size");
2839 return AMDGPU::NoRegister;
2840 }
2841
2842 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2843 const MCRegisterClass RC = TRI->getRegClass(RCID);
2844 if (RegIdx >= RC.getNumRegs()) {
2845 Error(Loc, "register index is out of range");
2846 return AMDGPU::NoRegister;
2847 }
2848
2849 unsigned Reg = RC.getRegister(RegIdx);
2850
2851 if (SubReg) {
2852 Reg = TRI->getSubReg(Reg, SubReg);
2853
2854 // Currently all regular registers have their .l and .h subregisters, so
2855 // we should never need to generate an error here.
2856 assert(Reg && "Invalid subregister!");
2857 }
2858
2859 return Reg;
2860}
2861
2862bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2863 int64_t RegLo, RegHi;
2864 if (!skipToken(AsmToken::LBrac, "missing register index"))
2865 return false;
2866
2867 SMLoc FirstIdxLoc = getLoc();
2868 SMLoc SecondIdxLoc;
2869
2870 if (!parseExpr(RegLo))
2871 return false;
2872
2873 if (trySkipToken(AsmToken::Colon)) {
2874 SecondIdxLoc = getLoc();
2875 if (!parseExpr(RegHi))
2876 return false;
2877 } else {
2878 RegHi = RegLo;
2879 }
2880
2881 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2882 return false;
2883
2884 if (!isUInt<32>(RegLo)) {
2885 Error(FirstIdxLoc, "invalid register index");
2886 return false;
2887 }
2888
2889 if (!isUInt<32>(RegHi)) {
2890 Error(SecondIdxLoc, "invalid register index");
2891 return false;
2892 }
2893
2894 if (RegLo > RegHi) {
2895 Error(FirstIdxLoc, "first register index should not exceed second index");
2896 return false;
2897 }
2898
2899 Num = static_cast<unsigned>(RegLo);
2900 RegWidth = 32 * ((RegHi - RegLo) + 1);
2901 return true;
2902}
2903
2904unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2905 unsigned &RegNum, unsigned &RegWidth,
2906 SmallVectorImpl<AsmToken> &Tokens) {
2907 assert(isToken(AsmToken::Identifier));
2908 unsigned Reg = getSpecialRegForName(getTokenStr());
2909 if (Reg) {
2910 RegNum = 0;
2911 RegWidth = 32;
2912 RegKind = IS_SPECIAL;
2913 Tokens.push_back(getToken());
2914 lex(); // skip register name
2915 }
2916 return Reg;
2917}
2918
2919unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2920 unsigned &RegNum, unsigned &RegWidth,
2921 SmallVectorImpl<AsmToken> &Tokens) {
2922 assert(isToken(AsmToken::Identifier));
2923 StringRef RegName = getTokenStr();
2924 auto Loc = getLoc();
2925
2926 const RegInfo *RI = getRegularRegInfo(RegName);
2927 if (!RI) {
2928 Error(Loc, "invalid register name");
2929 return AMDGPU::NoRegister;
2930 }
2931
2932 Tokens.push_back(getToken());
2933 lex(); // skip register name
2934
2935 RegKind = RI->Kind;
2936 StringRef RegSuffix = RegName.substr(RI->Name.size());
2937 unsigned SubReg = NoSubRegister;
2938 if (!RegSuffix.empty()) {
2939 if (RegSuffix.consume_back(".l"))
2940 SubReg = AMDGPU::lo16;
2941 else if (RegSuffix.consume_back(".h"))
2942 SubReg = AMDGPU::hi16;
2943
2944 // Single 32-bit register: vXX.
2945 if (!getRegNum(RegSuffix, RegNum)) {
2946 Error(Loc, "invalid register index");
2947 return AMDGPU::NoRegister;
2948 }
2949 RegWidth = 32;
2950 } else {
2951 // Range of registers: v[XX:YY]. ":YY" is optional.
2952 if (!ParseRegRange(RegNum, RegWidth))
2953 return AMDGPU::NoRegister;
2954 }
2955
2956 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2957}
2958
2959unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2960 unsigned &RegWidth,
2961 SmallVectorImpl<AsmToken> &Tokens) {
2962 unsigned Reg = AMDGPU::NoRegister;
2963 auto ListLoc = getLoc();
2964
2965 if (!skipToken(AsmToken::LBrac,
2966 "expected a register or a list of registers")) {
2967 return AMDGPU::NoRegister;
2968 }
2969
2970 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2971
2972 auto Loc = getLoc();
2973 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2974 return AMDGPU::NoRegister;
2975 if (RegWidth != 32) {
2976 Error(Loc, "expected a single 32-bit register");
2977 return AMDGPU::NoRegister;
2978 }
2979
2980 for (; trySkipToken(AsmToken::Comma); ) {
2981 RegisterKind NextRegKind;
2982 unsigned NextReg, NextRegNum, NextRegWidth;
2983 Loc = getLoc();
2984
2985 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2986 NextRegNum, NextRegWidth,
2987 Tokens)) {
2988 return AMDGPU::NoRegister;
2989 }
2990 if (NextRegWidth != 32) {
2991 Error(Loc, "expected a single 32-bit register");
2992 return AMDGPU::NoRegister;
2993 }
2994 if (NextRegKind != RegKind) {
2995 Error(Loc, "registers in a list must be of the same kind");
2996 return AMDGPU::NoRegister;
2997 }
2998 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2999 return AMDGPU::NoRegister;
3000 }
3001
3002 if (!skipToken(AsmToken::RBrac,
3003 "expected a comma or a closing square bracket")) {
3004 return AMDGPU::NoRegister;
3005 }
3006
3007 if (isRegularReg(RegKind))
3008 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3009
3010 return Reg;
3011}
3012
3013bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3014 unsigned &RegNum, unsigned &RegWidth,
3015 SmallVectorImpl<AsmToken> &Tokens) {
3016 auto Loc = getLoc();
3017 Reg = AMDGPU::NoRegister;
3018
3019 if (isToken(AsmToken::Identifier)) {
3020 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3021 if (Reg == AMDGPU::NoRegister)
3022 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3023 } else {
3024 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3025 }
3026
3027 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3028 if (Reg == AMDGPU::NoRegister) {
3029 assert(Parser.hasPendingError());
3030 return false;
3031 }
3032
3033 if (!subtargetHasRegister(*TRI, Reg)) {
3034 if (Reg == AMDGPU::SGPR_NULL) {
3035 Error(Loc, "'null' operand is not supported on this GPU");
3036 } else {
3038 " register not available on this GPU");
3039 }
3040 return false;
3041 }
3042
3043 return true;
3044}
3045
3046bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3047 unsigned &RegNum, unsigned &RegWidth,
3048 bool RestoreOnFailure /*=false*/) {
3049 Reg = AMDGPU::NoRegister;
3050
3052 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3053 if (RestoreOnFailure) {
3054 while (!Tokens.empty()) {
3055 getLexer().UnLex(Tokens.pop_back_val());
3056 }
3057 }
3058 return true;
3059 }
3060 return false;
3061}
3062
3063std::optional<StringRef>
3064AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3065 switch (RegKind) {
3066 case IS_VGPR:
3067 return StringRef(".amdgcn.next_free_vgpr");
3068 case IS_SGPR:
3069 return StringRef(".amdgcn.next_free_sgpr");
3070 default:
3071 return std::nullopt;
3072 }
3073}
3074
3075void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3076 auto SymbolName = getGprCountSymbolName(RegKind);
3077 assert(SymbolName && "initializing invalid register kind");
3078 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3079 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
3080}
3081
3082bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3083 unsigned DwordRegIndex,
3084 unsigned RegWidth) {
3085 // Symbols are only defined for GCN targets
3086 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3087 return true;
3088
3089 auto SymbolName = getGprCountSymbolName(RegKind);
3090 if (!SymbolName)
3091 return true;
3092 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3093
3094 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3095 int64_t OldCount;
3096
3097 if (!Sym->isVariable())
3098 return !Error(getLoc(),
3099 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3100 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
3101 return !Error(
3102 getLoc(),
3103 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3104
3105 if (OldCount <= NewMax)
3106 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
3107
3108 return true;
3109}
3110
3111std::unique_ptr<AMDGPUOperand>
3112AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3113 const auto &Tok = getToken();
3114 SMLoc StartLoc = Tok.getLoc();
3115 SMLoc EndLoc = Tok.getEndLoc();
3116 RegisterKind RegKind;
3117 unsigned Reg, RegNum, RegWidth;
3118
3119 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3120 return nullptr;
3121 }
3122 if (isHsaAbi(getSTI())) {
3123 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3124 return nullptr;
3125 } else
3126 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3127 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3128}
3129
3130ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3131 bool HasSP3AbsModifier, bool HasLit) {
3132 // TODO: add syntactic sugar for 1/(2*PI)
3133
3134 if (isRegister())
3135 return ParseStatus::NoMatch;
3136 assert(!isModifier());
3137
3138 if (!HasLit) {
3139 HasLit = trySkipId("lit");
3140 if (HasLit) {
3141 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3142 return ParseStatus::Failure;
3143 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
3144 if (S.isSuccess() &&
3145 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3146 return ParseStatus::Failure;
3147 return S;
3148 }
3149 }
3150
3151 const auto& Tok = getToken();
3152 const auto& NextTok = peekToken();
3153 bool IsReal = Tok.is(AsmToken::Real);
3154 SMLoc S = getLoc();
3155 bool Negate = false;
3156
3157 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3158 lex();
3159 IsReal = true;
3160 Negate = true;
3161 }
3162
3163 AMDGPUOperand::Modifiers Mods;
3164 Mods.Lit = HasLit;
3165
3166 if (IsReal) {
3167 // Floating-point expressions are not supported.
3168 // Can only allow floating-point literals with an
3169 // optional sign.
3170
3171 StringRef Num = getTokenStr();
3172 lex();
3173
3174 APFloat RealVal(APFloat::IEEEdouble());
3175 auto roundMode = APFloat::rmNearestTiesToEven;
3176 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3177 return ParseStatus::Failure;
3178 if (Negate)
3179 RealVal.changeSign();
3180
3181 Operands.push_back(
3182 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3183 AMDGPUOperand::ImmTyNone, true));
3184 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3185 Op.setModifiers(Mods);
3186
3187 return ParseStatus::Success;
3188
3189 } else {
3190 int64_t IntVal;
3191 const MCExpr *Expr;
3192 SMLoc S = getLoc();
3193
3194 if (HasSP3AbsModifier) {
3195 // This is a workaround for handling expressions
3196 // as arguments of SP3 'abs' modifier, for example:
3197 // |1.0|
3198 // |-1|
3199 // |1+x|
3200 // This syntax is not compatible with syntax of standard
3201 // MC expressions (due to the trailing '|').
3202 SMLoc EndLoc;
3203 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3204 return ParseStatus::Failure;
3205 } else {
3206 if (Parser.parseExpression(Expr))
3207 return ParseStatus::Failure;
3208 }
3209
3210 if (Expr->evaluateAsAbsolute(IntVal)) {
3211 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3212 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3213 Op.setModifiers(Mods);
3214 } else {
3215 if (HasLit)
3216 return ParseStatus::NoMatch;
3217 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3218 }
3219
3220 return ParseStatus::Success;
3221 }
3222
3223 return ParseStatus::NoMatch;
3224}
3225
3226ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3227 if (!isRegister())
3228 return ParseStatus::NoMatch;
3229
3230 if (auto R = parseRegister()) {
3231 assert(R->isReg());
3232 Operands.push_back(std::move(R));
3233 return ParseStatus::Success;
3234 }
3235 return ParseStatus::Failure;
3236}
3237
3238ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3239 bool HasSP3AbsMod, bool HasLit) {
3240 ParseStatus Res = parseReg(Operands);
3241 if (!Res.isNoMatch())
3242 return Res;
3243 if (isModifier())
3244 return ParseStatus::NoMatch;
3245 return parseImm(Operands, HasSP3AbsMod, HasLit);
3246}
3247
3248bool
3249AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3250 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3251 const auto &str = Token.getString();
3252 return str == "abs" || str == "neg" || str == "sext";
3253 }
3254 return false;
3255}
3256
3257bool
3258AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3259 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3260}
3261
3262bool
3263AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3264 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3265}
3266
3267bool
3268AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3269 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3270}
3271
3272// Check if this is an operand modifier or an opcode modifier
3273// which may look like an expression but it is not. We should
3274// avoid parsing these modifiers as expressions. Currently
3275// recognized sequences are:
3276// |...|
3277// abs(...)
3278// neg(...)
3279// sext(...)
3280// -reg
3281// -|...|
3282// -abs(...)
3283// name:...
3284//
3285bool
3286AMDGPUAsmParser::isModifier() {
3287
3288 AsmToken Tok = getToken();
3289 AsmToken NextToken[2];
3290 peekTokens(NextToken);
3291
3292 return isOperandModifier(Tok, NextToken[0]) ||
3293 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3294 isOpcodeModifierWithVal(Tok, NextToken[0]);
3295}
3296
3297// Check if the current token is an SP3 'neg' modifier.
3298// Currently this modifier is allowed in the following context:
3299//
3300// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3301// 2. Before an 'abs' modifier: -abs(...)
3302// 3. Before an SP3 'abs' modifier: -|...|
3303//
3304// In all other cases "-" is handled as a part
3305// of an expression that follows the sign.
3306//
3307// Note: When "-" is followed by an integer literal,
3308// this is interpreted as integer negation rather
3309// than a floating-point NEG modifier applied to N.
3310// Beside being contr-intuitive, such use of floating-point
3311// NEG modifier would have resulted in different meaning
3312// of integer literals used with VOP1/2/C and VOP3,
3313// for example:
3314// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3315// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3316// Negative fp literals with preceding "-" are
3317// handled likewise for uniformity
3318//
3319bool
3320AMDGPUAsmParser::parseSP3NegModifier() {
3321
3322 AsmToken NextToken[2];
3323 peekTokens(NextToken);
3324
3325 if (isToken(AsmToken::Minus) &&
3326 (isRegister(NextToken[0], NextToken[1]) ||
3327 NextToken[0].is(AsmToken::Pipe) ||
3328 isId(NextToken[0], "abs"))) {
3329 lex();
3330 return true;
3331 }
3332
3333 return false;
3334}
3335
3337AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3338 bool AllowImm) {
3339 bool Neg, SP3Neg;
3340 bool Abs, SP3Abs;
3341 bool Lit;
3342 SMLoc Loc;
3343
3344 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3345 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3346 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3347
3348 SP3Neg = parseSP3NegModifier();
3349
3350 Loc = getLoc();
3351 Neg = trySkipId("neg");
3352 if (Neg && SP3Neg)
3353 return Error(Loc, "expected register or immediate");
3354 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3355 return ParseStatus::Failure;
3356
3357 Abs = trySkipId("abs");
3358 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3359 return ParseStatus::Failure;
3360
3361 Lit = trySkipId("lit");
3362 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3363 return ParseStatus::Failure;
3364
3365 Loc = getLoc();
3366 SP3Abs = trySkipToken(AsmToken::Pipe);
3367 if (Abs && SP3Abs)
3368 return Error(Loc, "expected register or immediate");
3369
3370 ParseStatus Res;
3371 if (AllowImm) {
3372 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3373 } else {
3374 Res = parseReg(Operands);
3375 }
3376 if (!Res.isSuccess())
3377 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3378
3379 if (Lit && !Operands.back()->isImm())
3380 Error(Loc, "expected immediate with lit modifier");
3381
3382 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3383 return ParseStatus::Failure;
3384 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3385 return ParseStatus::Failure;
3386 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3387 return ParseStatus::Failure;
3388 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3389 return ParseStatus::Failure;
3390
3391 AMDGPUOperand::Modifiers Mods;
3392 Mods.Abs = Abs || SP3Abs;
3393 Mods.Neg = Neg || SP3Neg;
3394 Mods.Lit = Lit;
3395
3396 if (Mods.hasFPModifiers() || Lit) {
3397 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3398 if (Op.isExpr())
3399 return Error(Op.getStartLoc(), "expected an absolute expression");
3400 Op.setModifiers(Mods);
3401 }
3402 return ParseStatus::Success;
3403}
3404
3406AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3407 bool AllowImm) {
3408 bool Sext = trySkipId("sext");
3409 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3410 return ParseStatus::Failure;
3411
3412 ParseStatus Res;
3413 if (AllowImm) {
3414 Res = parseRegOrImm(Operands);
3415 } else {
3416 Res = parseReg(Operands);
3417 }
3418 if (!Res.isSuccess())
3419 return Sext ? ParseStatus::Failure : Res;
3420
3421 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3422 return ParseStatus::Failure;
3423
3424 AMDGPUOperand::Modifiers Mods;
3425 Mods.Sext = Sext;
3426
3427 if (Mods.hasIntModifiers()) {
3428 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3429 if (Op.isExpr())
3430 return Error(Op.getStartLoc(), "expected an absolute expression");
3431 Op.setModifiers(Mods);
3432 }
3433
3434 return ParseStatus::Success;
3435}
3436
3437ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3438 return parseRegOrImmWithFPInputMods(Operands, false);
3439}
3440
3441ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3442 return parseRegOrImmWithIntInputMods(Operands, false);
3443}
3444
3445ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3446 auto Loc = getLoc();
3447 if (trySkipId("off")) {
3448 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3449 AMDGPUOperand::ImmTyOff, false));
3450 return ParseStatus::Success;
3451 }
3452
3453 if (!isRegister())
3454 return ParseStatus::NoMatch;
3455
3456 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3457 if (Reg) {
3458 Operands.push_back(std::move(Reg));
3459 return ParseStatus::Success;
3460 }
3461
3462 return ParseStatus::Failure;
3463}
3464
3465unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3466 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3467
3468 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3469 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3470 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3471 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3472 return Match_InvalidOperand;
3473
3474 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3475 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3476 // v_mac_f32/16 allow only dst_sel == DWORD;
3477 auto OpNum =
3478 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3479 const auto &Op = Inst.getOperand(OpNum);
3480 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3481 return Match_InvalidOperand;
3482 }
3483 }
3484
3485 return Match_Success;
3486}
3487
3489 static const unsigned Variants[] = {
3493 };
3494
3495 return ArrayRef(Variants);
3496}
3497
3498// What asm variants we should check
3499ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3500 if (isForcedDPP() && isForcedVOP3()) {
3501 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3502 return ArrayRef(Variants);
3503 }
3504 if (getForcedEncodingSize() == 32) {
3505 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3506 return ArrayRef(Variants);
3507 }
3508
3509 if (isForcedVOP3()) {
3510 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3511 return ArrayRef(Variants);
3512 }
3513
3514 if (isForcedSDWA()) {
3515 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3517 return ArrayRef(Variants);
3518 }
3519
3520 if (isForcedDPP()) {
3521 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3522 return ArrayRef(Variants);
3523 }
3524
3525 return getAllVariants();
3526}
3527
3528StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3529 if (isForcedDPP() && isForcedVOP3())
3530 return "e64_dpp";
3531
3532 if (getForcedEncodingSize() == 32)
3533 return "e32";
3534
3535 if (isForcedVOP3())
3536 return "e64";
3537
3538 if (isForcedSDWA())
3539 return "sdwa";
3540
3541 if (isForcedDPP())
3542 return "dpp";
3543
3544 return "";
3545}
3546
3547unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3548 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3549 for (MCPhysReg Reg : Desc.implicit_uses()) {
3550 switch (Reg) {
3551 case AMDGPU::FLAT_SCR:
3552 case AMDGPU::VCC:
3553 case AMDGPU::VCC_LO:
3554 case AMDGPU::VCC_HI:
3555 case AMDGPU::M0:
3556 return Reg;
3557 default:
3558 break;
3559 }
3560 }
3561 return AMDGPU::NoRegister;
3562}
3563
3564// NB: This code is correct only when used to check constant
3565// bus limitations because GFX7 support no f16 inline constants.
3566// Note that there are no cases when a GFX7 opcode violates
3567// constant bus limitations due to the use of an f16 constant.
3568bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3569 unsigned OpIdx) const {
3570 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3571
3572 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3573 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3574 return false;
3575 }
3576
3577 const MCOperand &MO = Inst.getOperand(OpIdx);
3578
3579 int64_t Val = MO.getImm();
3580 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3581
3582 switch (OpSize) { // expected operand size
3583 case 8:
3584 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3585 case 4:
3586 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3587 case 2: {
3588 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3592 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3593
3598
3603
3608
3613 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3614
3619 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3620
3621 llvm_unreachable("invalid operand type");
3622 }
3623 default:
3624 llvm_unreachable("invalid operand size");
3625 }
3626}
3627
3628unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3629 if (!isGFX10Plus())
3630 return 1;
3631
3632 switch (Opcode) {
3633 // 64-bit shift instructions can use only one scalar value input
3634 case AMDGPU::V_LSHLREV_B64_e64:
3635 case AMDGPU::V_LSHLREV_B64_gfx10:
3636 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3637 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3638 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3639 case AMDGPU::V_LSHRREV_B64_e64:
3640 case AMDGPU::V_LSHRREV_B64_gfx10:
3641 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3642 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3643 case AMDGPU::V_ASHRREV_I64_e64:
3644 case AMDGPU::V_ASHRREV_I64_gfx10:
3645 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3646 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3647 case AMDGPU::V_LSHL_B64_e64:
3648 case AMDGPU::V_LSHR_B64_e64:
3649 case AMDGPU::V_ASHR_I64_e64:
3650 return 1;
3651 default:
3652 return 2;
3653 }
3654}
3655
3656constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3658
3659// Get regular operand indices in the same order as specified
3660// in the instruction (but append mandatory literals to the end).
3662 bool AddMandatoryLiterals = false) {
3663
3664 int16_t ImmIdx =
3665 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3666
3667 if (isVOPD(Opcode)) {
3668 int16_t ImmDeferredIdx =
3669 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3670 : -1;
3671
3672 return {getNamedOperandIdx(Opcode, OpName::src0X),
3673 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3674 getNamedOperandIdx(Opcode, OpName::src0Y),
3675 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3676 ImmDeferredIdx,
3677 ImmIdx};
3678 }
3679
3680 return {getNamedOperandIdx(Opcode, OpName::src0),
3681 getNamedOperandIdx(Opcode, OpName::src1),
3682 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3683}
3684
3685bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3686 const MCOperand &MO = Inst.getOperand(OpIdx);
3687 if (MO.isImm())
3688 return !isInlineConstant(Inst, OpIdx);
3689 if (MO.isReg()) {
3690 auto Reg = MO.getReg();
3691 if (!Reg)
3692 return false;
3693 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3694 auto PReg = mc2PseudoReg(Reg);
3695 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3696 }
3697 return true;
3698}
3699
3700// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3701// Writelane is special in that it can use SGPR and M0 (which would normally
3702// count as using the constant bus twice - but in this case it is allowed since
3703// the lane selector doesn't count as a use of the constant bus). However, it is
3704// still required to abide by the 1 SGPR rule.
3705static bool checkWriteLane(const MCInst &Inst) {
3706 const unsigned Opcode = Inst.getOpcode();
3707 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3708 return false;
3709 const MCOperand &LaneSelOp = Inst.getOperand(2);
3710 if (!LaneSelOp.isReg())
3711 return false;
3712 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3713 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3714}
3715
3716bool AMDGPUAsmParser::validateConstantBusLimitations(
3717 const MCInst &Inst, const OperandVector &Operands) {
3718 const unsigned Opcode = Inst.getOpcode();
3719 const MCInstrDesc &Desc = MII.get(Opcode);
3720 unsigned LastSGPR = AMDGPU::NoRegister;
3721 unsigned ConstantBusUseCount = 0;
3722 unsigned NumLiterals = 0;
3723 unsigned LiteralSize;
3724
3725 if (!(Desc.TSFlags &
3728 !isVOPD(Opcode))
3729 return true;
3730
3731 if (checkWriteLane(Inst))
3732 return true;
3733
3734 // Check special imm operands (used by madmk, etc)
3735 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3736 ++NumLiterals;
3737 LiteralSize = 4;
3738 }
3739
3740 SmallDenseSet<unsigned> SGPRsUsed;
3741 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3742 if (SGPRUsed != AMDGPU::NoRegister) {
3743 SGPRsUsed.insert(SGPRUsed);
3744 ++ConstantBusUseCount;
3745 }
3746
3747 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3748
3749 for (int OpIdx : OpIndices) {
3750 if (OpIdx == -1)
3751 continue;
3752
3753 const MCOperand &MO = Inst.getOperand(OpIdx);
3754 if (usesConstantBus(Inst, OpIdx)) {
3755 if (MO.isReg()) {
3756 LastSGPR = mc2PseudoReg(MO.getReg());
3757 // Pairs of registers with a partial intersections like these
3758 // s0, s[0:1]
3759 // flat_scratch_lo, flat_scratch
3760 // flat_scratch_lo, flat_scratch_hi
3761 // are theoretically valid but they are disabled anyway.
3762 // Note that this code mimics SIInstrInfo::verifyInstruction
3763 if (SGPRsUsed.insert(LastSGPR).second) {
3764 ++ConstantBusUseCount;
3765 }
3766 } else { // Expression or a literal
3767
3768 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3769 continue; // special operand like VINTERP attr_chan
3770
3771 // An instruction may use only one literal.
3772 // This has been validated on the previous step.
3773 // See validateVOPLiteral.
3774 // This literal may be used as more than one operand.
3775 // If all these operands are of the same size,
3776 // this literal counts as one scalar value.
3777 // Otherwise it counts as 2 scalar values.
3778 // See "GFX10 Shader Programming", section 3.6.2.3.
3779
3780 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3781 if (Size < 4)
3782 Size = 4;
3783
3784 if (NumLiterals == 0) {
3785 NumLiterals = 1;
3786 LiteralSize = Size;
3787 } else if (LiteralSize != Size) {
3788 NumLiterals = 2;
3789 }
3790 }
3791 }
3792 }
3793 ConstantBusUseCount += NumLiterals;
3794
3795 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3796 return true;
3797
3798 SMLoc LitLoc = getLitLoc(Operands);
3799 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3800 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3801 Error(Loc, "invalid operand (violates constant bus restrictions)");
3802 return false;
3803}
3804
3805bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3806 const MCInst &Inst, const OperandVector &Operands) {
3807
3808 const unsigned Opcode = Inst.getOpcode();
3809 if (!isVOPD(Opcode))
3810 return true;
3811
3812 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3813
3814 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3815 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3816 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3817 ? Opr.getReg()
3819 };
3820
3821 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3822 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3823
3824 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3825 auto InvalidCompOprIdx =
3826 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3827 if (!InvalidCompOprIdx)
3828 return true;
3829
3830 auto CompOprIdx = *InvalidCompOprIdx;
3831 auto ParsedIdx =
3832 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3833 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3834 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3835
3836 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3837 if (CompOprIdx == VOPD::Component::DST) {
3838 Error(Loc, "one dst register must be even and the other odd");
3839 } else {
3840 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3841 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3842 " operands must use different VGPR banks");
3843 }
3844
3845 return false;
3846}
3847
3848bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3849
3850 const unsigned Opc = Inst.getOpcode();
3851 const MCInstrDesc &Desc = MII.get(Opc);
3852
3853 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3854 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3855 assert(ClampIdx != -1);
3856 return Inst.getOperand(ClampIdx).getImm() == 0;
3857 }
3858
3859 return true;
3860}
3861
3864
3865bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3866 const SMLoc &IDLoc) {
3867
3868 const unsigned Opc = Inst.getOpcode();
3869 const MCInstrDesc &Desc = MII.get(Opc);
3870
3871 if ((Desc.TSFlags & MIMGFlags) == 0)
3872 return true;
3873
3874 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3875 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3876 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3877
3878 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
3879 return true;
3880
3881 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3882 return true;
3883
3884 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3885 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3886 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3887 if (DMask == 0)
3888 DMask = 1;
3889
3890 bool IsPackedD16 = false;
3891 unsigned DataSize =
3892 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3893 if (hasPackedD16()) {
3894 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3895 IsPackedD16 = D16Idx >= 0;
3896 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3897 DataSize = (DataSize + 1) / 2;
3898 }
3899
3900 if ((VDataSize / 4) == DataSize + TFESize)
3901 return true;
3902
3903 StringRef Modifiers;
3904 if (isGFX90A())
3905 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3906 else
3907 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3908
3909 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3910 return false;
3911}
3912
3913bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3914 const SMLoc &IDLoc) {
3915 const unsigned Opc = Inst.getOpcode();
3916 const MCInstrDesc &Desc = MII.get(Opc);
3917
3918 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3919 return true;
3920
3922
3923 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3925 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3926 int RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
3927 : AMDGPU::OpName::rsrc;
3928 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3929 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3930 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3931
3932 assert(VAddr0Idx != -1);
3933 assert(SrsrcIdx != -1);
3934 assert(SrsrcIdx > VAddr0Idx);
3935
3936 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3937 if (BaseOpcode->BVH) {
3938 if (IsA16 == BaseOpcode->A16)
3939 return true;
3940 Error(IDLoc, "image address size does not match a16");
3941 return false;
3942 }
3943
3944 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3946 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3947 unsigned ActualAddrSize =
3948 IsNSA ? SrsrcIdx - VAddr0Idx
3949 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3950
3951 unsigned ExpectedAddrSize =
3952 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3953
3954 if (IsNSA) {
3955 if (hasPartialNSAEncoding() &&
3956 ExpectedAddrSize >
3958 int VAddrLastIdx = SrsrcIdx - 1;
3959 unsigned VAddrLastSize =
3960 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3961
3962 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3963 }
3964 } else {
3965 if (ExpectedAddrSize > 12)
3966 ExpectedAddrSize = 16;
3967
3968 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3969 // This provides backward compatibility for assembly created
3970 // before 160b/192b/224b types were directly supported.
3971 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3972 return true;
3973 }
3974
3975 if (ActualAddrSize == ExpectedAddrSize)
3976 return true;
3977
3978 Error(IDLoc, "image address size does not match dim and a16");
3979 return false;
3980}
3981
3982bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3983
3984 const unsigned Opc = Inst.getOpcode();
3985 const MCInstrDesc &Desc = MII.get(Opc);
3986
3987 if ((Desc.TSFlags & MIMGFlags) == 0)
3988 return true;
3989 if (!Desc.mayLoad() || !Desc.mayStore())
3990 return true; // Not atomic
3991
3992 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3993 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3994
3995 // This is an incomplete check because image_atomic_cmpswap
3996 // may only use 0x3 and 0xf while other atomic operations
3997 // may use 0x1 and 0x3. However these limitations are
3998 // verified when we check that dmask matches dst size.
3999 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4000}
4001
4002bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4003
4004 const unsigned Opc = Inst.getOpcode();
4005 const MCInstrDesc &Desc = MII.get(Opc);
4006
4007 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4008 return true;
4009
4010 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4011 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4012
4013 // GATHER4 instructions use dmask in a different fashion compared to
4014 // other MIMG instructions. The only useful DMASK values are
4015 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4016 // (red,red,red,red) etc.) The ISA document doesn't mention
4017 // this.
4018 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4019}
4020
4021bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4022 const OperandVector &Operands) {
4023 if (!isGFX10Plus())
4024 return true;
4025
4026 const unsigned Opc = Inst.getOpcode();
4027 const MCInstrDesc &Desc = MII.get(Opc);
4028
4029 if ((Desc.TSFlags & MIMGFlags) == 0)
4030 return true;
4031
4032 // image_bvh_intersect_ray instructions do not have dim
4033 if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH)
4034 return true;
4035
4036 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4037 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4038 if (Op.isDim())
4039 return true;
4040 }
4041 return false;
4042}
4043
4044bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4045 const unsigned Opc = Inst.getOpcode();
4046 const MCInstrDesc &Desc = MII.get(Opc);
4047
4048 if ((Desc.TSFlags & MIMGFlags) == 0)
4049 return true;
4050
4052 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4054
4055 if (!BaseOpcode->MSAA)
4056 return true;
4057
4058 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4059 assert(DimIdx != -1);
4060
4061 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4063
4064 return DimInfo->MSAA;
4065}
4066
4067static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4068{
4069 switch (Opcode) {
4070 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4071 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4072 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4073 return true;
4074 default:
4075 return false;
4076 }
4077}
4078
4079// movrels* opcodes should only allow VGPRS as src0.
4080// This is specified in .td description for vop1/vop3,
4081// but sdwa is handled differently. See isSDWAOperand.
4082bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4083 const OperandVector &Operands) {
4084
4085 const unsigned Opc = Inst.getOpcode();
4086 const MCInstrDesc &Desc = MII.get(Opc);
4087
4088 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4089 return true;
4090
4091 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4092 assert(Src0Idx != -1);
4093
4094 SMLoc ErrLoc;
4095 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4096 if (Src0.isReg()) {
4097 auto Reg = mc2PseudoReg(Src0.getReg());
4098 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4099 if (!isSGPR(Reg, TRI))
4100 return true;
4101 ErrLoc = getRegLoc(Reg, Operands);
4102 } else {
4103 ErrLoc = getConstLoc(Operands);
4104 }
4105
4106 Error(ErrLoc, "source operand must be a VGPR");
4107 return false;
4108}
4109
4110bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4111 const OperandVector &Operands) {
4112
4113 const unsigned Opc = Inst.getOpcode();
4114
4115 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4116 return true;
4117
4118 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4119 assert(Src0Idx != -1);
4120
4121 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4122 if (!Src0.isReg())
4123 return true;
4124
4125 auto Reg = mc2PseudoReg(Src0.getReg());
4126 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4127 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4128 Error(getRegLoc(Reg, Operands),
4129 "source operand must be either a VGPR or an inline constant");
4130 return false;
4131 }
4132
4133 return true;
4134}
4135
4136bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4137 const OperandVector &Operands) {
4138 unsigned Opcode = Inst.getOpcode();
4139 const MCInstrDesc &Desc = MII.get(Opcode);
4140
4141 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4142 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4143 return true;
4144
4145 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4146 if (Src2Idx == -1)
4147 return true;
4148
4149 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4150 Error(getConstLoc(Operands),
4151 "inline constants are not allowed for this operand");
4152 return false;
4153 }
4154
4155 return true;
4156}
4157
4158bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4159 const OperandVector &Operands) {
4160 const unsigned Opc = Inst.getOpcode();
4161 const MCInstrDesc &Desc = MII.get(Opc);
4162
4163 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4164 return true;
4165
4166 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4167 if (Src2Idx == -1)
4168 return true;
4169
4170 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4171 if (!Src2.isReg())
4172 return true;
4173
4174 MCRegister Src2Reg = Src2.getReg();
4175 MCRegister DstReg = Inst.getOperand(0).getReg();
4176 if (Src2Reg == DstReg)
4177 return true;
4178
4179 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4180 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4181 return true;
4182
4183 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4184 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
4185 "source 2 operand must not partially overlap with dst");
4186 return false;
4187 }
4188
4189 return true;
4190}
4191
4192bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4193 switch (Inst.getOpcode()) {
4194 default:
4195 return true;
4196 case V_DIV_SCALE_F32_gfx6_gfx7:
4197 case V_DIV_SCALE_F32_vi:
4198 case V_DIV_SCALE_F32_gfx10:
4199 case V_DIV_SCALE_F64_gfx6_gfx7:
4200 case V_DIV_SCALE_F64_vi:
4201 case V_DIV_SCALE_F64_gfx10:
4202 break;
4203 }
4204
4205 // TODO: Check that src0 = src1 or src2.
4206
4207 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4208 AMDGPU::OpName::src2_modifiers,
4209 AMDGPU::OpName::src2_modifiers}) {
4211 .getImm() &
4213 return false;
4214 }
4215 }
4216
4217 return true;
4218}
4219
4220bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4221
4222 const unsigned Opc = Inst.getOpcode();
4223 const MCInstrDesc &Desc = MII.get(Opc);
4224
4225 if ((Desc.TSFlags & MIMGFlags) == 0)
4226 return true;
4227
4228 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4229 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4230 if (isCI() || isSI())
4231 return false;
4232 }
4233
4234 return true;
4235}
4236
4237static bool IsRevOpcode(const unsigned Opcode)
4238{
4239 switch (Opcode) {
4240 case AMDGPU::V_SUBREV_F32_e32:
4241 case AMDGPU::V_SUBREV_F32_e64:
4242 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4243 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4244 case AMDGPU::V_SUBREV_F32_e32_vi:
4245 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4246 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4247 case AMDGPU::V_SUBREV_F32_e64_vi:
4248
4249 case AMDGPU::V_SUBREV_CO_U32_e32:
4250 case AMDGPU::V_SUBREV_CO_U32_e64:
4251 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4252 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4253
4254 case AMDGPU::V_SUBBREV_U32_e32:
4255 case AMDGPU::V_SUBBREV_U32_e64:
4256 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4257 case AMDGPU::V_SUBBREV_U32_e32_vi:
4258 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4259 case AMDGPU::V_SUBBREV_U32_e64_vi:
4260
4261 case AMDGPU::V_SUBREV_U32_e32:
4262 case AMDGPU::V_SUBREV_U32_e64:
4263 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4264 case AMDGPU::V_SUBREV_U32_e32_vi:
4265 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4266 case AMDGPU::V_SUBREV_U32_e64_vi:
4267
4268 case AMDGPU::V_SUBREV_F16_e32:
4269 case AMDGPU::V_SUBREV_F16_e64:
4270 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4271 case AMDGPU::V_SUBREV_F16_e32_vi:
4272 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4273 case AMDGPU::V_SUBREV_F16_e64_vi:
4274
4275 case AMDGPU::V_SUBREV_U16_e32:
4276 case AMDGPU::V_SUBREV_U16_e64:
4277 case AMDGPU::V_SUBREV_U16_e32_vi:
4278 case AMDGPU::V_SUBREV_U16_e64_vi:
4279
4280 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4281 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4282 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4283
4284 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4285 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4286
4287 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4288 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4289
4290 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4291 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4292
4293 case AMDGPU::V_LSHRREV_B32_e32:
4294 case AMDGPU::V_LSHRREV_B32_e64:
4295 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4296 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4297 case AMDGPU::V_LSHRREV_B32_e32_vi:
4298 case AMDGPU::V_LSHRREV_B32_e64_vi:
4299 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4300 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4301
4302 case AMDGPU::V_ASHRREV_I32_e32:
4303 case AMDGPU::V_ASHRREV_I32_e64:
4304 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4305 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4306 case AMDGPU::V_ASHRREV_I32_e32_vi:
4307 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4308 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4309 case AMDGPU::V_ASHRREV_I32_e64_vi:
4310
4311 case AMDGPU::V_LSHLREV_B32_e32:
4312 case AMDGPU::V_LSHLREV_B32_e64:
4313 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4314 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4315 case AMDGPU::V_LSHLREV_B32_e32_vi:
4316 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4317 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4318 case AMDGPU::V_LSHLREV_B32_e64_vi:
4319
4320 case AMDGPU::V_LSHLREV_B16_e32:
4321 case AMDGPU::V_LSHLREV_B16_e64:
4322 case AMDGPU::V_LSHLREV_B16_e32_vi:
4323 case AMDGPU::V_LSHLREV_B16_e64_vi:
4324 case AMDGPU::V_LSHLREV_B16_gfx10:
4325
4326 case AMDGPU::V_LSHRREV_B16_e32:
4327 case AMDGPU::V_LSHRREV_B16_e64:
4328 case AMDGPU::V_LSHRREV_B16_e32_vi:
4329 case AMDGPU::V_LSHRREV_B16_e64_vi:
4330 case AMDGPU::V_LSHRREV_B16_gfx10:
4331
4332 case AMDGPU::V_ASHRREV_I16_e32:
4333 case AMDGPU::V_ASHRREV_I16_e64:
4334 case AMDGPU::V_ASHRREV_I16_e32_vi:
4335 case AMDGPU::V_ASHRREV_I16_e64_vi:
4336 case AMDGPU::V_ASHRREV_I16_gfx10:
4337
4338 case AMDGPU::V_LSHLREV_B64_e64:
4339 case AMDGPU::V_LSHLREV_B64_gfx10:
4340 case AMDGPU::V_LSHLREV_B64_vi:
4341
4342 case AMDGPU::V_LSHRREV_B64_e64:
4343 case AMDGPU::V_LSHRREV_B64_gfx10:
4344 case AMDGPU::V_LSHRREV_B64_vi:
4345
4346 case AMDGPU::V_ASHRREV_I64_e64:
4347 case AMDGPU::V_ASHRREV_I64_gfx10:
4348 case AMDGPU::V_ASHRREV_I64_vi:
4349
4350 case AMDGPU::V_PK_LSHLREV_B16:
4351 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4352 case AMDGPU::V_PK_LSHLREV_B16_vi:
4353
4354 case AMDGPU::V_PK_LSHRREV_B16:
4355 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4356 case AMDGPU::V_PK_LSHRREV_B16_vi:
4357 case AMDGPU::V_PK_ASHRREV_I16:
4358 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4359 case AMDGPU::V_PK_ASHRREV_I16_vi:
4360 return true;
4361 default:
4362 return false;
4363 }
4364}
4365
4366std::optional<StringRef>
4367AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4368
4369 using namespace SIInstrFlags;
4370 const unsigned Opcode = Inst.getOpcode();
4371 const MCInstrDesc &Desc = MII.get(Opcode);
4372
4373 // lds_direct register is defined so that it can be used
4374 // with 9-bit operands only. Ignore encodings which do not accept these.
4375 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4376 if ((Desc.TSFlags & Enc) == 0)
4377 return std::nullopt;
4378
4379 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4380 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4381 if (SrcIdx == -1)
4382 break;
4383 const auto &Src = Inst.getOperand(SrcIdx);
4384 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4385
4386 if (isGFX90A() || isGFX11Plus())
4387 return StringRef("lds_direct is not supported on this GPU");
4388
4389 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4390 return StringRef("lds_direct cannot be used with this instruction");
4391
4392 if (SrcName != OpName::src0)
4393 return StringRef("lds_direct may be used as src0 only");
4394 }
4395 }
4396
4397 return std::nullopt;
4398}
4399
4400SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4401 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4402 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4403 if (Op.isFlatOffset())
4404 return Op.getStartLoc();
4405 }
4406 return getLoc();
4407}
4408
4409bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4410 const OperandVector &Operands) {
4411 auto Opcode = Inst.getOpcode();
4412 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4413 if (OpNum == -1)
4414 return true;
4415
4416 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4417 if ((TSFlags & SIInstrFlags::FLAT))
4418 return validateFlatOffset(Inst, Operands);
4419
4420 if ((TSFlags & SIInstrFlags::SMRD))
4421 return validateSMEMOffset(Inst, Operands);
4422
4423 const auto &Op = Inst.getOperand(OpNum);
4424 if (isGFX12Plus() &&
4425 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4426 const unsigned OffsetSize = 24;
4427 if (!isIntN(OffsetSize, Op.getImm())) {
4428 Error(getFlatOffsetLoc(Operands),
4429 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4430 return false;
4431 }
4432 } else {
4433 const unsigned OffsetSize = 16;
4434 if (!isUIntN(OffsetSize, Op.getImm())) {
4435 Error(getFlatOffsetLoc(Operands),
4436 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4437 return false;
4438 }
4439 }
4440 return true;
4441}
4442
4443bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4444 const OperandVector &Operands) {
4445 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4446 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4447 return true;
4448
4449 auto Opcode = Inst.getOpcode();
4450 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4451 assert(OpNum != -1);
4452
4453 const auto &Op = Inst.getOperand(OpNum);
4454 if (!hasFlatOffsets() && Op.getImm() != 0) {
4455 Error(getFlatOffsetLoc(Operands),
4456 "flat offset modifier is not supported on this GPU");
4457 return false;
4458 }
4459
4460 // For pre-GFX12 FLAT instructions the offset must be positive;
4461 // MSB is ignored and forced to zero.
4462 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4463 bool AllowNegative =
4465 isGFX12Plus();
4466 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4467 Error(getFlatOffsetLoc(Operands),
4468 Twine("expected a ") +
4469 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4470 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4471 return false;
4472 }
4473
4474 return true;
4475}
4476
4477SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4478 // Start with second operand because SMEM Offset cannot be dst or src0.
4479 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4480 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4481 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4482 return Op.getStartLoc();
4483 }
4484 return getLoc();
4485}
4486
4487bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4488 const OperandVector &Operands) {
4489 if (isCI() || isSI())
4490 return true;
4491
4492 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4493 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4494 return true;
4495
4496 auto Opcode = Inst.getOpcode();
4497 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4498 if (OpNum == -1)
4499 return true;
4500
4501 const auto &Op = Inst.getOperand(OpNum);
4502 if (!Op.isImm())
4503 return true;
4504
4505 uint64_t Offset = Op.getImm();
4506 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4509 return true;
4510
4511 Error(getSMEMOffsetLoc(Operands),
4512 isGFX12Plus() ? "expected a 24-bit signed offset"
4513 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4514 : "expected a 21-bit signed offset");
4515
4516 return false;
4517}
4518
4519bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4520 unsigned Opcode = Inst.getOpcode();
4521 const MCInstrDesc &Desc = MII.get(Opcode);
4522 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4523 return true;
4524
4525 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4526 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4527
4528 const int OpIndices[] = { Src0Idx, Src1Idx };
4529
4530 unsigned NumExprs = 0;
4531 unsigned NumLiterals = 0;
4533
4534 for (int OpIdx : OpIndices) {
4535 if (OpIdx == -1) break;
4536
4537 const MCOperand &MO = Inst.getOperand(OpIdx);
4538 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4539 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4540 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4541 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4542 if (NumLiterals == 0 || LiteralValue != Value) {
4544 ++NumLiterals;
4545 }
4546 } else if (MO.isExpr()) {
4547 ++NumExprs;
4548 }
4549 }
4550 }
4551
4552 return NumLiterals + NumExprs <= 1;
4553}
4554
4555bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4556 const unsigned Opc = Inst.getOpcode();
4557 if (isPermlane16(Opc)) {
4558 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4559 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4560
4561 if (OpSel & ~3)
4562 return false;
4563 }
4564
4565 uint64_t TSFlags = MII.get(Opc).TSFlags;
4566
4567 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4568 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4569 if (OpSelIdx != -1) {
4570 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4571 return false;
4572 }
4573 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4574 if (OpSelHiIdx != -1) {
4575 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4576 return false;
4577 }
4578 }
4579
4580 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4581 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4582 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4583 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4584 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4585 if (OpSel & 3)
4586 return false;
4587 }
4588
4589 return true;
4590}
4591
4592bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
4593 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4594
4595 const unsigned Opc = Inst.getOpcode();
4596 uint64_t TSFlags = MII.get(Opc).TSFlags;
4597
4598 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4599 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4600 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4601 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4602 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4603 !(TSFlags & SIInstrFlags::IsSWMMAC))
4604 return true;
4605
4606 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4607 if (NegIdx == -1)
4608 return true;
4609
4610 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4611
4612 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4613 // on some src operands but not allowed on other.
4614 // It is convenient that such instructions don't have src_modifiers operand
4615 // for src operands that don't allow neg because they also don't allow opsel.
4616
4617 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4618 AMDGPU::OpName::src1_modifiers,
4619 AMDGPU::OpName::src2_modifiers};
4620
4621 for (unsigned i = 0; i < 3; ++i) {
4622 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4623 if (Neg & (1 << i))
4624 return false;
4625 }
4626 }
4627
4628 return true;
4629}
4630
4631bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4632 const OperandVector &Operands) {
4633 const unsigned Opc = Inst.getOpcode();
4634 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4635 if (DppCtrlIdx >= 0) {
4636 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4637
4638 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4639 AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4640 // DP ALU DPP is supported for row_newbcast only on GFX9*
4641 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4642 Error(S, "DP ALU dpp only supports row_newbcast");
4643 return false;
4644 }
4645 }
4646
4647 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4648 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4649
4650 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4651 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4652 if (Src1Idx >= 0) {
4653 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4654 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4655 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
4656 auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());
4657 SMLoc S = getRegLoc(Reg, Operands);
4658 Error(S, "invalid operand for instruction");
4659 return false;
4660 }
4661 if (Src1.isImm()) {
4662 Error(getInstLoc(Operands),
4663 "src1 immediate operand invalid for instruction");
4664 return false;
4665 }
4666 }
4667 }
4668
4669 return true;
4670}
4671
4672// Check if VCC register matches wavefront size
4673bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4674 auto FB = getFeatureBits();
4675 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4676 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4677}
4678
4679// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4680bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4681 const OperandVector &Operands) {
4682 unsigned Opcode = Inst.getOpcode();
4683 const MCInstrDesc &Desc = MII.get(Opcode);
4684 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4685 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4686 !HasMandatoryLiteral && !isVOPD(Opcode))
4687 return true;
4688
4689 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4690
4691 unsigned NumExprs = 0;
4692 unsigned NumLiterals = 0;
4694
4695 for (int OpIdx : OpIndices) {
4696 if (OpIdx == -1)
4697 continue;
4698
4699 const MCOperand &MO = Inst.getOperand(OpIdx);
4700 if (!MO.isImm() && !MO.isExpr())
4701 continue;
4702 if (!isSISrcOperand(Desc, OpIdx))
4703 continue;
4704
4705 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4706 uint64_t Value = static_cast<uint64_t>(MO.getImm());
4707 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4708 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4709 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4710
4711 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4712 Error(getLitLoc(Operands), "invalid operand for instruction");
4713 return false;
4714 }
4715
4716 if (IsFP64 && IsValid32Op)
4717 Value = Hi_32(Value);
4718
4719 if (NumLiterals == 0 || LiteralValue != Value) {
4721 ++NumLiterals;
4722 }
4723 } else if (MO.isExpr()) {
4724 ++NumExprs;
4725 }
4726 }
4727 NumLiterals += NumExprs;
4728
4729 if (!NumLiterals)
4730 return true;
4731
4732 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4733 Error(getLitLoc(Operands), "literal operands are not supported");
4734 return false;
4735 }
4736
4737 if (NumLiterals > 1) {
4738 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4739 return false;
4740 }
4741
4742 return true;
4743}
4744
4745// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4746static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4747 const MCRegisterInfo *MRI) {
4748 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4749 if (OpIdx < 0)
4750 return -1;
4751
4752 const MCOperand &Op = Inst.getOperand(OpIdx);
4753 if (!Op.isReg())
4754 return -1;
4755
4756 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4757 auto Reg = Sub ? Sub : Op.getReg();
4758 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4759 return AGPR32.contains(Reg) ? 1 : 0;
4760}
4761
4762bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4763 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4764 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4766 SIInstrFlags::DS)) == 0)
4767 return true;
4768
4769 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4770 : AMDGPU::OpName::vdata;
4771
4772 const MCRegisterInfo *MRI = getMRI();
4773 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4774 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4775
4776 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4777 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4778 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4779 return false;
4780 }
4781
4782 auto FB = getFeatureBits();
4783 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4784 if (DataAreg < 0 || DstAreg < 0)
4785 return true;
4786 return DstAreg == DataAreg;
4787 }
4788
4789 return DstAreg < 1 && DataAreg < 1;
4790}
4791
4792bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4793 auto FB = getFeatureBits();
4794 if (!FB[AMDGPU::FeatureGFX90AInsts])
4795 return true;
4796
4797 const MCRegisterInfo *MRI = getMRI();
4798 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4799 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4800 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4801 const MCOperand &Op = Inst.getOperand(I);
4802 if (!Op.isReg())
4803 continue;
4804
4805 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4806 if (!Sub)
4807 continue;
4808
4809 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4810 return false;
4811 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4812 return false;
4813 }
4814
4815 return true;
4816}
4817
4818SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4819 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4820 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4821 if (Op.isBLGP())
4822 return Op.getStartLoc();
4823 }
4824 return SMLoc();
4825}
4826
4827bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4828 const OperandVector &Operands) {
4829 unsigned Opc = Inst.getOpcode();
4830 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4831 if (BlgpIdx == -1)
4832 return true;
4833 SMLoc BLGPLoc = getBLGPLoc(Operands);
4834 if (!BLGPLoc.isValid())
4835 return true;
4836 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4837 auto FB = getFeatureBits();
4838 bool UsesNeg = false;
4839 if (FB[AMDGPU::FeatureGFX940Insts]) {
4840 switch (Opc) {
4841 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4842 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4843 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4844 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4845 UsesNeg = true;
4846 }
4847 }
4848
4849 if (IsNeg == UsesNeg)
4850 return true;
4851
4852 Error(BLGPLoc,
4853 UsesNeg ? "invalid modifier: blgp is not supported"
4854 : "invalid modifier: neg is not supported");
4855
4856 return false;
4857}
4858
4859bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4860 const OperandVector &Operands) {
4861 if (!isGFX11Plus())
4862 return true;
4863
4864 unsigned Opc = Inst.getOpcode();
4865 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4866 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4867 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4868 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4869 return true;
4870
4871 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4872 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4873 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4874 if (Reg == AMDGPU::SGPR_NULL)
4875 return true;
4876
4877 SMLoc RegLoc = getRegLoc(Reg, Operands);
4878 Error(RegLoc, "src0 must be null");
4879 return false;
4880}
4881
4882bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4883 const OperandVector &Operands) {
4884 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4885 if ((TSFlags & SIInstrFlags::DS) == 0)
4886 return true;
4887 if (TSFlags & SIInstrFlags::GWS)
4888 return validateGWS(Inst, Operands);
4889 // Only validate GDS for non-GWS instructions.
4890 if (hasGDS())
4891 return true;
4892 int GDSIdx =
4893 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4894 if (GDSIdx < 0)
4895 return true;
4896 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4897 if (GDS) {
4898 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4899 Error(S, "gds modifier is not supported on this GPU");
4900 return false;
4901 }
4902 return true;
4903}
4904
4905// gfx90a has an undocumented limitation:
4906// DS_GWS opcodes must use even aligned registers.
4907bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4908 const OperandVector &Operands) {
4909 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4910 return true;
4911
4912 int Opc = Inst.getOpcode();
4913 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4914 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4915 return true;
4916
4917 const MCRegisterInfo *MRI = getMRI();
4918 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4919 int Data0Pos =
4920 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4921 assert(Data0Pos != -1);
4922 auto Reg = Inst.getOperand(Data0Pos).getReg();
4923 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4924 if (RegIdx & 1) {
4925 SMLoc RegLoc = getRegLoc(Reg, Operands);
4926 Error(RegLoc, "vgpr must be even aligned");
4927 return false;
4928 }
4929
4930 return true;
4931}
4932
4933bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4934 const OperandVector &Operands,
4935 const SMLoc &IDLoc) {
4936 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4937 AMDGPU::OpName::cpol);
4938 if (CPolPos == -1)
4939 return true;
4940
4941 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4942
4943 if (isGFX12Plus())
4944 return validateTHAndScopeBits(Inst, Operands, CPol);
4945
4946 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4947 if (TSFlags & SIInstrFlags::SMRD) {
4948 if (CPol && (isSI() || isCI())) {
4949 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4950 Error(S, "cache policy is not supported for SMRD instructions");
4951 return false;
4952 }
4953 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4954 Error(IDLoc, "invalid cache policy for SMEM instruction");
4955 return false;
4956 }
4957 }
4958
4959 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4960 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4963 if (!(TSFlags & AllowSCCModifier)) {
4964 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4965 StringRef CStr(S.getPointer());
4966 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4967 Error(S,
4968 "scc modifier is not supported for this instruction on this GPU");
4969 return false;
4970 }
4971 }
4972
4974 return true;
4975
4976 if (TSFlags & SIInstrFlags::IsAtomicRet) {
4977 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4978 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4979 : "instruction must use glc");
4980 return false;
4981 }
4982 } else {
4983 if (CPol & CPol::GLC) {
4984 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4985 StringRef CStr(S.getPointer());
4987 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4988 Error(S, isGFX940() ? "instruction must not use sc0"
4989 : "instruction must not use glc");
4990 return false;
4991 }
4992 }
4993
4994 return true;
4995}
4996
4997bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4998 const OperandVector &Operands,
4999 const unsigned CPol) {
5000 const unsigned TH = CPol & AMDGPU::CPol::TH;
5001 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5002
5003 const unsigned Opcode = Inst.getOpcode();
5004 const MCInstrDesc &TID = MII.get(Opcode);
5005
5006 auto PrintError = [&](StringRef Msg) {
5007 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5008 Error(S, Msg);
5009 return false;
5010 };
5011
5012 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5015 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5016
5017 if (TH == 0)
5018 return true;
5019
5020 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5021 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5022 (TH == AMDGPU::CPol::TH_NT_HT)))
5023 return PrintError("invalid th value for SMEM instruction");
5024
5025 if (TH == AMDGPU::CPol::TH_BYPASS) {
5026 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5028 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5030 return PrintError("scope and th combination is not valid");
5031 }
5032
5033 bool IsStore = TID.mayStore();
5034 bool IsAtomic =
5036
5037 if (IsAtomic) {
5038 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5039 return PrintError("invalid th value for atomic instructions");
5040 } else if (IsStore) {
5041 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5042 return PrintError("invalid th value for store instructions");
5043 } else {
5044 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5045 return PrintError("invalid th value for load instructions");
5046 }
5047
5048 return true;
5049}
5050
5051bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5052 const OperandVector &Operands) {
5053 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5054 if (Desc.mayStore() &&
5056 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5057 if (Loc != getInstLoc(Operands)) {
5058 Error(Loc, "TFE modifier has no meaning for store instructions");
5059 return false;
5060 }
5061 }
5062
5063 return true;
5064}
5065
5066bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5067 const SMLoc &IDLoc,
5068 const OperandVector &Operands) {
5069 if (auto ErrMsg = validateLdsDirect(Inst)) {
5070 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
5071 return false;
5072 }
5073 if (!validateSOPLiteral(Inst)) {
5074 Error(getLitLoc(Operands),
5075 "only one unique literal operand is allowed");
5076 return false;
5077 }
5078 if (!validateVOPLiteral(Inst, Operands)) {
5079 return false;
5080 }
5081 if (!validateConstantBusLimitations(Inst, Operands)) {
5082 return false;
5083 }
5084 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5085 return false;
5086 }
5087 if (!validateIntClampSupported(Inst)) {
5088 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5089 "integer clamping is not supported on this GPU");
5090 return false;
5091 }
5092 if (!validateOpSel(Inst)) {
5093 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5094 "invalid op_sel operand");
5095 return false;
5096 }
5097 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5098 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5099 "invalid neg_lo operand");
5100 return false;
5101 }
5102 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5103 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5104 "invalid neg_hi operand");
5105 return false;
5106 }
5107 if (!validateDPP(Inst, Operands)) {
5108 return false;
5109 }
5110 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5111 if (!validateMIMGD16(Inst)) {
5112 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5113 "d16 modifier is not supported on this GPU");
5114 return false;
5115 }
5116 if (!validateMIMGDim(Inst, Operands)) {
5117 Error(IDLoc, "missing dim operand");
5118 return false;
5119 }
5120 if (!validateMIMGMSAA(Inst)) {
5121 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5122 "invalid dim; must be MSAA type");
5123 return false;
5124 }
5125 if (!validateMIMGDataSize(Inst, IDLoc)) {
5126 return false;
5127 }
5128 if (!validateMIMGAddrSize(Inst, IDLoc))
5129 return false;
5130 if (!validateMIMGAtomicDMask(Inst)) {
5131 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5132 "invalid atomic image dmask");
5133 return false;
5134 }
5135 if (!validateMIMGGatherDMask(Inst)) {
5136 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5137 "invalid image_gather dmask: only one bit must be set");
5138 return false;
5139 }
5140 if (!validateMovrels(Inst, Operands)) {
5141 return false;
5142 }
5143 if (!validateOffset(Inst, Operands)) {
5144 return false;
5145 }
5146 if (!validateMAIAccWrite(Inst, Operands)) {
5147 return false;
5148 }
5149 if (!validateMAISrc2(Inst, Operands)) {
5150 return false;
5151 }
5152 if (!validateMFMA(Inst, Operands)) {
5153 return false;
5154 }
5155 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5156 return false;
5157 }
5158
5159 if (!validateAGPRLdSt(Inst)) {
5160 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5161 ? "invalid register class: data and dst should be all VGPR or AGPR"
5162 : "invalid register class: agpr loads and stores not supported on this GPU"
5163 );
5164 return false;
5165 }
5166 if (!validateVGPRAlign(Inst)) {
5167 Error(IDLoc,
5168 "invalid register class: vgpr tuples must be 64 bit aligned");
5169 return false;
5170 }
5171 if (!validateDS(Inst, Operands)) {
5172 return false;
5173 }
5174
5175 if (!validateBLGP(Inst, Operands)) {
5176 return false;
5177 }
5178
5179 if (!validateDivScale(Inst)) {
5180 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5181 return false;
5182 }
5183 if (!validateWaitCnt(Inst, Operands)) {
5184 return false;
5185 }
5186 if (!validateTFE(Inst, Operands)) {
5187 return false;
5188 }
5189
5190 return true;
5191}
5192
5194 const FeatureBitset &FBS,
5195 unsigned VariantID = 0);
5196
5197static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5198 const FeatureBitset &AvailableFeatures,
5199 unsigned VariantID);
5200
5201bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5202 const FeatureBitset &FBS) {
5203 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5204}
5205
5206bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5207 const FeatureBitset &FBS,
5208 ArrayRef<unsigned> Variants) {
5209 for (auto Variant : Variants) {
5210 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5211 return true;
5212 }
5213
5214 return false;
5215}
5216
5217bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5218 const SMLoc &IDLoc) {
5219 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5220
5221 // Check if requested instruction variant is supported.
5222 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5223 return false;
5224
5225 // This instruction is not supported.
5226 // Clear any other pending errors because they are no longer relevant.
5227 getParser().clearPendingErrors();
5228
5229 // Requested instruction variant is not supported.
5230 // Check if any other variants are supported.
5231 StringRef VariantName = getMatchedVariantName();
5232 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5233 return Error(IDLoc,
5234 Twine(VariantName,
5235 " variant of this instruction is not supported"));
5236 }
5237
5238 // Check if this instruction may be used with a different wavesize.
5239 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5240 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5241
5242 FeatureBitset FeaturesWS32 = getFeatureBits();
5243 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5244 .flip(AMDGPU::FeatureWavefrontSize32);
5245 FeatureBitset AvailableFeaturesWS32 =
5246 ComputeAvailableFeatures(FeaturesWS32);
5247
5248 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5249 return Error(IDLoc, "instruction requires wavesize=32");
5250 }
5251
5252 // Finally check if this instruction is supported on any other GPU.
5253 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5254 return Error(IDLoc, "instruction not supported on this GPU");
5255 }
5256
5257 // Instruction not supported on any GPU. Probably a typo.
5258 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5259 return Error(IDLoc, "invalid instruction" + Suggestion);
5260}
5261
5263 uint64_t InvalidOprIdx) {
5264 assert(InvalidOprIdx < Operands.size());
5265 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5266 if (Op.isToken() && InvalidOprIdx > 1) {
5267 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5268 return PrevOp.isToken() && PrevOp.getToken() == "::";
5269 }
5270 return false;
5271}
5272
5273bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5275 MCStreamer &Out,
5277 bool MatchingInlineAsm) {
5278 MCInst Inst;
5279 unsigned Result = Match_Success;
5280 for (auto Variant : getMatchedVariants()) {
5281 uint64_t EI;
5282 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5283 Variant);
5284 // We order match statuses from least to most specific. We use most specific
5285 // status as resulting
5286 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5287 if (R == Match_Success || R == Match_MissingFeature ||
5288 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5289 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5290 Result != Match_MissingFeature)) {
5291 Result = R;
5292 ErrorInfo = EI;
5293 }
5294 if (R == Match_Success)
5295 break;
5296 }
5297
5298 if (Result == Match_Success) {
5299 if (!validateInstruction(Inst, IDLoc, Operands)) {
5300 return true;
5301 }
5302 Inst.setLoc(IDLoc);
5303 Out.emitInstruction(Inst, getSTI());
5304 return false;
5305 }
5306
5307 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5308 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5309 return true;
5310 }
5311
5312 switch (Result) {
5313 default: break;
5314 case Match_MissingFeature:
5315 // It has been verified that the specified instruction
5316 // mnemonic is valid. A match was found but it requires
5317 // features which are not supported on this GPU.
5318 return Error(IDLoc, "operands are not valid for this GPU or mode");
5319
5320 case Match_InvalidOperand: {
5321 SMLoc ErrorLoc = IDLoc;
5322 if (ErrorInfo != ~0ULL) {
5323 if (ErrorInfo >= Operands.size()) {
5324 return Error(IDLoc, "too few operands for instruction");
5325 }
5326 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5327 if (ErrorLoc == SMLoc())
5328 ErrorLoc = IDLoc;
5329
5331 return Error(ErrorLoc, "invalid VOPDY instruction");
5332 }
5333 return Error(ErrorLoc, "invalid operand for instruction");
5334 }
5335
5336 case Match_MnemonicFail:
5337 llvm_unreachable("Invalid instructions should have been handled already");
5338 }
5339 llvm_unreachable("Implement any new match types added!");
5340}
5341
5342bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5343 int64_t Tmp = -1;
5344 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5345 return true;
5346 }
5347 if (getParser().parseAbsoluteExpression(Tmp)) {
5348 return true;
5349 }
5350 Ret = static_cast<uint32_t>(Tmp);
5351 return false;
5352}
5353
5354bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5355 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5356 return TokError("directive only supported for amdgcn architecture");
5357
5358 std::string TargetIDDirective;
5359 SMLoc TargetStart = getTok().getLoc();
5360 if (getParser().parseEscapedString(TargetIDDirective))
5361 return true;
5362
5363 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5364 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5365 return getParser().Error(TargetRange.Start,
5366 (Twine(".amdgcn_target directive's target id ") +
5367 Twine(TargetIDDirective) +
5368 Twine(" does not match the specified target id ") +
5369 Twine(getTargetStreamer().getTargetID()->toString())).str());
5370
5371 return false;
5372}
5373
5374bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5375 return Error(Range.Start, "value out of range", Range);
5376}
5377
5378bool AMDGPUAsmParser::calculateGPRBlocks(
5379 const FeatureBitset &Features, const MCExpr *VCCUsed,
5380 const MCExpr *FlatScrUsed, bool XNACKUsed,
5381 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5382 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5383 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5384 // TODO(scott.linder): These calculations are duplicated from
5385 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5386 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5387 MCContext &Ctx = getContext();
5388
5389 const MCExpr *NumSGPRs = NextFreeSGPR;
5390 int64_t EvaluatedSGPRs;
5391
5392 if (Version.Major >= 10)
5394 else {
5395 unsigned MaxAddressableNumSGPRs =
5397
5398 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5399 !Features.test(FeatureSGPRInitBug) &&
5400 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5401 return OutOfRangeError(SGPRRange);
5402
5403 const MCExpr *ExtraSGPRs =
5404 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5405 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5406
5407 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5408 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5409 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5410 return OutOfRangeError(SGPRRange);
5411
5412 if (Features.test(FeatureSGPRInitBug))
5413 NumSGPRs =
5415 }
5416
5417 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5418 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5419 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5420 unsigned Granule) -> const MCExpr * {
5421 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5422 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5423 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5424 const MCExpr *AlignToGPR =
5425 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5426 const MCExpr *DivGPR =
5427 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5428 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5429 return SubGPR;
5430 };
5431
5432 VGPRBlocks = GetNumGPRBlocks(
5433 NextFreeVGPR,
5434 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5435 SGPRBlocks =
5436 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5437
5438 return false;
5439}
5440
5441bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5442 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5443 return TokError("directive only supported for amdgcn architecture");
5444
5445 if (!isHsaAbi(getSTI()))
5446 return TokError("directive only supported for amdhsa OS");
5447
5448 StringRef KernelName;
5449 if (getParser().parseIdentifier(KernelName))
5450 return true;
5451
5454 &getSTI(), getContext());
5455
5456 StringSet<> Seen;
5457
5458 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5459
5460 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5461 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5462
5463 SMRange VGPRRange;
5464 const MCExpr *NextFreeVGPR = ZeroExpr;
5465 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5466 uint64_t SharedVGPRCount = 0;
5467 uint64_t PreloadLength = 0;
5468 uint64_t PreloadOffset = 0;
5469 SMRange SGPRRange;
5470 const MCExpr *NextFreeSGPR = ZeroExpr;
5471
5472 // Count the number of user SGPRs implied from the enabled feature bits.
5473 unsigned ImpliedUserSGPRCount = 0;
5474
5475 // Track if the asm explicitly contains the directive for the user SGPR
5476 // count.
5477 std::optional<unsigned> ExplicitUserSGPRCount;
5478 const MCExpr *ReserveVCC = OneExpr;
5479 const MCExpr *ReserveFlatScr = OneExpr;
5480 std::optional<bool> EnableWavefrontSize32;
5481
5482 while (true) {
5483 while (trySkipToken(AsmToken::EndOfStatement));
5484
5485 StringRef ID;
5486 SMRange IDRange = getTok().getLocRange();
5487 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5488 return true;
5489
5490 if (ID == ".end_amdhsa_kernel")
5491 break;
5492
5493 if (!Seen.insert(ID).second)
5494 return TokError(".amdhsa_ directives cannot be repeated");
5495
5496 SMLoc ValStart = getLoc();
5497 const MCExpr *ExprVal;
5498 if (getParser().parseExpression(ExprVal))
5499 return true;
5500 SMLoc ValEnd = getLoc();
5501 SMRange ValRange = SMRange(ValStart, ValEnd);
5502
5503 int64_t IVal = 0;
5504 uint64_t Val = IVal;
5505 bool EvaluatableExpr;
5506 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5507 if (IVal < 0)
5508 return OutOfRangeError(ValRange);
5509 Val = IVal;
5510 }
5511
5512#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5513 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5514 return OutOfRangeError(RANGE); \
5515 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5516 getContext());
5517
5518// Some fields use the parsed value immediately which requires the expression to
5519// be solvable.
5520#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5521 if (!(RESOLVED)) \
5522 return Error(IDRange.Start, "directive should have resolvable expression", \
5523 IDRange);
5524
5525 if (ID == ".amdhsa_group_segment_fixed_size") {
5527 CHAR_BIT>(Val))
5528 return OutOfRangeError(ValRange);
5529 KD.group_segment_fixed_size = ExprVal;
5530 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5532 CHAR_BIT>(Val))
5533 return OutOfRangeError(ValRange);
5534 KD.private_segment_fixed_size = ExprVal;
5535 } else if (ID == ".amdhsa_kernarg_size") {
5536 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
5537 return OutOfRangeError(ValRange);
5538 KD.kernarg_size = ExprVal;
5539 } else if (ID == ".amdhsa_user_sgpr_count") {
5540 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5541 ExplicitUserSGPRCount = Val;
5542 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5543 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5545 return Error(IDRange.Start,
5546 "directive is not supported with architected flat scratch",
5547 IDRange);
5549 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5550 ExprVal, ValRange);
5551 if (Val)
5552 ImpliedUserSGPRCount += 4;
5553 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5554 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5555 if (!hasKernargPreload())
5556 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5557
5558 if (Val > getMaxNumUserSGPRs())
5559 return OutOfRangeError(ValRange);
5560 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
5561 ValRange);
5562 if (Val) {
5563 ImpliedUserSGPRCount += Val;
5564 PreloadLength = Val;
5565 }
5566 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5567 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5568 if (!hasKernargPreload())
5569 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5570
5571 if (Val >= 1024)
5572 return OutOfRangeError(ValRange);
5573 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
5574 ValRange);
5575 if (Val)
5576 PreloadOffset = Val;
5577 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5578 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5580 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5581 ValRange);
5582 if (Val)
5583 ImpliedUserSGPRCount += 2;
5584 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5585 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5587 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5588 ValRange);
5589 if (Val)
5590 ImpliedUserSGPRCount += 2;
5591 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5592 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5594 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5595 ExprVal, ValRange);
5596 if (Val)
5597 ImpliedUserSGPRCount += 2;
5598 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5599 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5601 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5602 ValRange);
5603 if (Val)
5604 ImpliedUserSGPRCount += 2;
5605 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5607 return Error(IDRange.Start,
5608 "directive is not supported with architected flat scratch",
5609 IDRange);
5610 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5612 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5613 ExprVal, ValRange);
5614 if (Val)
5615 ImpliedUserSGPRCount += 2;
5616 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5617 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5619 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5620 ExprVal, ValRange);
5621 if (Val)
5622 ImpliedUserSGPRCount += 1;
5623 } else if (ID == ".amdhsa_wavefront_size32") {
5624 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5625 if (IVersion.Major < 10)
5626 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5627 EnableWavefrontSize32 = Val;
5629 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5630 ValRange);
5631 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5633 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5634 ValRange);
5635 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5637 return Error(IDRange.Start,
5638 "directive is not supported with architected flat scratch",
5639 IDRange);
5641 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5642 ValRange);
5643 } else if (ID == ".amdhsa_enable_private_segment") {
5645 return Error(
5646 IDRange.Start,
5647 "directive is not supported without architected flat scratch",
5648 IDRange);
5650 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5651 ValRange);
5652 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5654 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5655 ValRange);
5656 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5658 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5659 ValRange);
5660 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5662 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5663 ValRange);
5664 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5666 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5667 ValRange);
5668 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5670 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5671 ValRange);
5672 } else if (ID == ".amdhsa_next_free_vgpr") {
5673 VGPRRange = ValRange;
5674 NextFreeVGPR = ExprVal;
5675 } else if (ID == ".amdhsa_next_free_sgpr") {
5676 SGPRRange = ValRange;
5677 NextFreeSGPR = ExprVal;
5678 } else if (ID == ".amdhsa_accum_offset") {
5679 if (!isGFX90A())
5680 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5681 AccumOffset = ExprVal;
5682 } else if (ID == ".amdhsa_reserve_vcc") {
5683 if (EvaluatableExpr && !isUInt<1>(Val))
5684 return OutOfRangeError(ValRange);
5685 ReserveVCC = ExprVal;
5686 } else if (ID == ".amdhsa_reserve_flat_scratch") {
5687 if (IVersion.Major < 7)
5688 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5690 return Error(IDRange.Start,
5691 "directive is not supported with architected flat scratch",
5692 IDRange);
5693 if (EvaluatableExpr && !isUInt<1>(Val))
5694 return OutOfRangeError(ValRange);
5695 ReserveFlatScr = ExprVal;
5696 } else if (ID == ".amdhsa_reserve_xnack_mask") {
5697 if (IVersion.Major < 8)
5698 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5699 if (!isUInt<1>(Val))
5700 return OutOfRangeError(ValRange);
5701 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5702 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5703 IDRange);
5704 } else if (ID == ".amdhsa_float_round_mode_32") {
5706 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5707 ValRange);
5708 } else if (ID == ".amdhsa_float_round_mode_16_64") {
5710 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5711 ValRange);
5712 } else if (ID == ".amdhsa_float_denorm_mode_32") {
5714 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5715 ValRange);
5716 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5718 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5719 ValRange);
5720 } else if (ID == ".amdhsa_dx10_clamp") {
5721 if (IVersion.Major >= 12)
5722 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5724 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5725 ValRange);
5726 } else if (ID == ".amdhsa_ieee_mode") {
5727 if (IVersion.Major >= 12)
5728 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5730 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5731 ValRange);
5732 } else if (ID == ".amdhsa_fp16_overflow") {
5733 if (IVersion.Major < 9)
5734 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5736 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5737 ValRange);
5738 } else if (ID == ".amdhsa_tg_split") {
5739 if (!isGFX90A())
5740 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5741 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
5742 ExprVal, ValRange);
5743 } else if (ID == ".amdhsa_workgroup_processor_mode") {
5744 if (IVersion.Major < 10)
5745 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5747 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5748 ValRange);
5749 } else if (ID == ".amdhsa_memory_ordered") {
5750 if (IVersion.Major < 10)
5751 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5753 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5754 ValRange);
5755 } else if (ID == ".amdhsa_forward_progress") {
5756 if (IVersion.Major < 10)
5757 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5759 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5760 ValRange);
5761 } else if (ID == ".amdhsa_shared_vgpr_count") {
5762 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5763 if (IVersion.Major < 10 || IVersion.Major >= 12)
5764 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
5765 IDRange);
5766 SharedVGPRCount = Val;
5768 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5769 ValRange);
5770 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5773 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5774 ExprVal, ValRange);
5775 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5777 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5778 ExprVal, ValRange);
5779 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5782 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5783 ExprVal, ValRange);
5784 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5786 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5787 ExprVal, ValRange);
5788 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5790 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5791 ExprVal, ValRange);
5792 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5794 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5795 ExprVal, ValRange);
5796 } else if (ID == ".amdhsa_exception_int_div_zero") {
5798 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5799 ExprVal, ValRange);
5800 } else if (ID == ".amdhsa_round_robin_scheduling") {
5801 if (IVersion.Major < 12)
5802 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
5804 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5805 ValRange);
5806 } else {
5807 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5808 }
5809
5810#undef PARSE_BITS_ENTRY
5811 }
5812
5813 if (!Seen.contains(".amdhsa_next_free_vgpr"))
5814 return TokError(".amdhsa_next_free_vgpr directive is required");
5815
5816 if (!Seen.contains(".amdhsa_next_free_sgpr"))
5817 return TokError(".amdhsa_next_free_sgpr directive is required");
5818
5819 const MCExpr *VGPRBlocks;
5820 const MCExpr *SGPRBlocks;
5821 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5822 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5823 EnableWavefrontSize32, NextFreeVGPR,
5824 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5825 SGPRBlocks))
5826 return true;
5827
5828 int64_t EvaluatedVGPRBlocks;
5829 bool VGPRBlocksEvaluatable =
5830 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
5831 if (VGPRBlocksEvaluatable &&
5832 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5833 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
5834 return OutOfRangeError(VGPRRange);
5835 }
5837 KD.compute_pgm_rsrc1, VGPRBlocks,
5838 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5839 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
5840
5841 int64_t EvaluatedSGPRBlocks;
5842 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
5843 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5844 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
5845 return OutOfRangeError(SGPRRange);
5847 KD.compute_pgm_rsrc1, SGPRBlocks,
5848 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5849 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
5850
5851 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5852 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5853 "enabled user SGPRs");
5854
5855 unsigned UserSGPRCount =
5856 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5857
5858 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5859 return TokError("too many user SGPRs enabled");
5861 KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
5862 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5863 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
5864
5865 int64_t IVal = 0;
5866 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
5867 return TokError("Kernarg size should be resolvable");
5868 uint64_t kernarg_size = IVal;
5869 if (PreloadLength && kernarg_size &&
5870 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5871 return TokError("Kernarg preload length + offset is larger than the "
5872 "kernarg segment size");
5873
5874 if (isGFX90A()) {
5875 if (!Seen.contains(".amdhsa_accum_offset"))
5876 return TokError(".amdhsa_accum_offset directive is required");
5877 int64_t EvaluatedAccum;
5878 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
5879 uint64_t UEvaluatedAccum = EvaluatedAccum;
5880 if (AccumEvaluatable &&
5881 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
5882 return TokError("accum_offset should be in range [4..256] in "
5883 "increments of 4");
5884
5885 int64_t EvaluatedNumVGPR;
5886 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
5887 AccumEvaluatable &&
5888 UEvaluatedAccum >
5889 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
5890 return TokError("accum_offset exceeds total VGPR allocation");
5891 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
5893 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
5894 MCConstantExpr::create(1, getContext()), getContext());
5896 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5897 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5898 getContext());
5899 }
5900
5901 if (IVersion.Major >= 10 && IVersion.Major < 12) {
5902 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5903 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5904 return TokError("shared_vgpr_count directive not valid on "
5905 "wavefront size 32");
5906 }
5907
5908 if (VGPRBlocksEvaluatable &&
5909 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
5910 63)) {
5911 return TokError("shared_vgpr_count*2 + "
5912 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5913 "exceed 63\n");
5914 }
5915 }
5916
5917 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5918 NextFreeVGPR, NextFreeSGPR,
5919 ReserveVCC, ReserveFlatScr);
5920 return false;
5921}
5922
5923bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5925 if (ParseAsAbsoluteExpression(Version))
5926 return true;
5927
5928 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5929 return false;
5930}
5931
5932bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5934 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5935 // assembly for backwards compatibility.
5936 if (ID == "max_scratch_backing_memory_byte_size") {
5937 Parser.eatToEndOfStatement();
5938 return false;
5939 }
5940
5941 SmallString<40> ErrStr;
5942 raw_svector_ostream Err(ErrStr);
5943 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
5944 return TokError(Err.str());
5945 }
5946 Lex();
5947
5948 if (ID == "enable_wavefront_size32") {
5949 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5950 if (!isGFX10Plus())
5951 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5952 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5953 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5954 } else {
5955 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5956 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5957 }
5958 }
5959
5960 if (ID == "wavefront_size") {
5961 if (C.wavefront_size == 5) {
5962 if (!isGFX10Plus())
5963 return TokError("wavefront_size=5 is only allowed on GFX10+");
5964 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5965 return TokError("wavefront_size=5 requires +WavefrontSize32");
5966 } else if (C.wavefront_size == 6) {
5967 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5968 return TokError("wavefront_size=6 requires +WavefrontSize64");
5969 }
5970 }
5971
5972 return false;
5973}
5974
5975bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5976 AMDGPUMCKernelCodeT KernelCode;
5977 KernelCode.initDefault(&getSTI(), getContext());
5978
5979 while (true) {
5980 // Lex EndOfStatement. This is in a while loop, because lexing a comment
5981 // will set the current token to EndOfStatement.
5982 while(trySkipToken(AsmToken::EndOfStatement));
5983
5984 StringRef ID;
5985 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5986 return true;
5987
5988 if (ID == ".end_amd_kernel_code_t")
5989 break;
5990
5991 if (ParseAMDKernelCodeTValue(ID, KernelCode))
5992 return true;
5993 }
5994
5995 KernelCode.validate(&getSTI(), getContext());
5996 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
5997
5998 return false;
5999}
6000
6001bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6002 StringRef KernelName;
6003 if (!parseId(KernelName, "expected symbol name"))
6004 return true;
6005
6006 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6008
6009 KernelScope.initialize(getContext());
6010 return false;
6011}
6012
6013bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6014 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
6015 return Error(getLoc(),
6016 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6017 "architectures");
6018 }
6019
6020 auto TargetIDDirective = getLexer().getTok().getStringContents();
6021 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6022 return Error(getParser().getTok().getLoc(), "target id must match options");
6023
6024 getTargetStreamer().EmitISAVersion();
6025 Lex();
6026
6027 return false;
6028}
6029
6030bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6031 assert(isHsaAbi(getSTI()));
6032
6033 std::string HSAMetadataString;
6034 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6035 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6036 return true;
6037
6038 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6039 return Error(getLoc(), "invalid HSA metadata");
6040
6041 return false;
6042}
6043
6044/// Common code to parse out a block of text (typically YAML) between start and
6045/// end directives.
6046bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6047 const char *AssemblerDirectiveEnd,
6048 std::string &CollectString) {
6049
6050 raw_string_ostream CollectStream(CollectString);
6051
6052 getLexer().setSkipSpace(false);
6053
6054 bool FoundEnd = false;
6055 while (!isToken(AsmToken::Eof)) {
6056 while (isToken(AsmToken::Space)) {
6057 CollectStream << getTokenStr();
6058 Lex();
6059 }
6060
6061 if (trySkipId(AssemblerDirectiveEnd)) {
6062 FoundEnd = true;
6063 break;
6064 }
6065
6066 CollectStream << Parser.parseStringToEndOfStatement()
6067 << getContext().getAsmInfo()->getSeparatorString();
6068
6069 Parser.eatToEndOfStatement();
6070 }
6071
6072 getLexer().setSkipSpace(true);
6073
6074 if (isToken(AsmToken::Eof) && !FoundEnd) {
6075 return TokError(Twine("expected directive ") +
6076 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6077 }
6078
6079 CollectStream.flush();
6080 return false;
6081}
6082
6083/// Parse the assembler directive for new MsgPack-format PAL metadata.
6084bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6085 std::string String;
6086 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6088 return true;
6089
6090 auto PALMetadata = getTargetStreamer().getPALMetadata();
6091 if (!PALMetadata->setFromString(String))
6092 return Error(getLoc(), "invalid PAL metadata");
6093 return false;
6094}
6095
6096/// Parse the assembler directive for old linear-format PAL metadata.
6097bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6098 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6099 return Error(getLoc(),
6100 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6101 "not available on non-amdpal OSes")).str());
6102 }
6103
6104 auto PALMetadata = getTargetStreamer().getPALMetadata();
6105 PALMetadata->setLegacy();
6106 for (;;) {
6108 if (ParseAsAbsoluteExpression(Key)) {
6109 return TokError(Twine("invalid value in ") +
6111 }
6112 if (!trySkipToken(AsmToken::Comma)) {
6113 return TokError(Twine("expected an even number of values in ") +
6115 }
6116 if (ParseAsAbsoluteExpression(Value)) {
6117 return TokError(Twine("invalid value in ") +
6119 }
6120 PALMetadata->setRegister(Key, Value);
6121 if (!trySkipToken(AsmToken::Comma))
6122 break;
6123 }
6124 return false;
6125}
6126
6127/// ParseDirectiveAMDGPULDS
6128/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6129bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6130 if (getParser().checkForValidSection())
6131 return true;
6132
6134 SMLoc NameLoc = getLoc();
6135 if (getParser().parseIdentifier(Name))
6136 return TokError("expected identifier in directive");
6137
6138 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6139 if (getParser().parseComma())
6140 return true;
6141
6142 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6143
6144 int64_t Size;
6145 SMLoc SizeLoc = getLoc();
6146 if (getParser().parseAbsoluteExpression(Size))
6147 return true;
6148 if (Size < 0)
6149 return Error(SizeLoc, "size must be non-negative");
6150 if (Size > LocalMemorySize)
6151 return Error(SizeLoc, "size is too large");
6152
6153 int64_t Alignment = 4;
6154 if (trySkipToken(AsmToken::Comma)) {
6155 SMLoc AlignLoc = getLoc();
6156 if (getParser().parseAbsoluteExpression(Alignment))
6157 return true;
6158 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6159 return Error(AlignLoc, "alignment must be a power of two");
6160
6161 // Alignment larger than the size of LDS is possible in theory, as long
6162 // as the linker manages to place to symbol at address 0, but we do want
6163 // to make sure the alignment fits nicely into a 32-bit integer.
6164 if (Alignment >= 1u << 31)
6165 return Error(AlignLoc, "alignment is too large");
6166 }
6167
6168 if (parseEOL())
6169 return true;
6170
6171 Symbol->redefineIfPossible();
6172 if (!Symbol->isUndefined())
6173 return Error(NameLoc, "invalid symbol redefinition");
6174
6175 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6176 return false;
6177}
6178
6179bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6180 StringRef IDVal = DirectiveID.getString();
6181
6182 if (isHsaAbi(getSTI())) {
6183 if (IDVal == ".amdhsa_kernel")
6184 return ParseDirectiveAMDHSAKernel();
6185
6186 if (IDVal == ".amdhsa_code_object_version")
6187 return ParseDirectiveAMDHSACodeObjectVersion();
6188
6189 // TODO: Restructure/combine with PAL metadata directive.
6191 return ParseDirectiveHSAMetadata();
6192 } else {
6193 if (IDVal == ".amd_kernel_code_t")
6194 return ParseDirectiveAMDKernelCodeT();
6195
6196 if (IDVal == ".amdgpu_hsa_kernel")
6197 return ParseDirectiveAMDGPUHsaKernel();
6198
6199 if (IDVal == ".amd_amdgpu_isa")
6200 return ParseDirectiveISAVersion();
6201
6203 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6204 Twine(" directive is "
6205 "not available on non-amdhsa OSes"))
6206 .str());
6207 }
6208 }
6209
6210 if (IDVal == ".amdgcn_target")
6211 return ParseDirectiveAMDGCNTarget();
6212
6213 if (IDVal == ".amdgpu_lds")
6214 return ParseDirectiveAMDGPULDS();
6215
6216 if (IDVal == PALMD::AssemblerDirectiveBegin)
6217 return ParseDirectivePALMetadataBegin();
6218
6219 if (IDVal == PALMD::AssemblerDirective)
6220 return ParseDirectivePALMetadata();
6221
6222 return true;
6223}
6224
6225bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6226 unsigned RegNo) {
6227 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
6228 return isGFX9Plus();
6229
6230 // GFX10+ has 2 more SGPRs 104 and 105.
6231 if (MRI.regsOverlap(SGPR104_SGPR105, RegNo))
6232 return hasSGPR104_SGPR105();
6233
6234 switch (RegNo) {
6235 case SRC_SHARED_BASE_LO:
6236 case SRC_SHARED_BASE:
6237 case SRC_SHARED_LIMIT_LO:
6238 case SRC_SHARED_LIMIT:
6239 case SRC_PRIVATE_BASE_LO:
6240 case SRC_PRIVATE_BASE:
6241 case SRC_PRIVATE_LIMIT_LO:
6242 case SRC_PRIVATE_LIMIT:
6243 return isGFX9Plus();
6244 case SRC_POPS_EXITING_WAVE_ID:
6245 return isGFX9Plus() && !isGFX11Plus();
6246 case TBA:
6247 case TBA_LO:
6248 case TBA_HI:
6249 case TMA:
6250 case TMA_LO:
6251 case TMA_HI:
6252 return !isGFX9Plus();
6253 case XNACK_MASK:
6254 case XNACK_MASK_LO:
6255 case XNACK_MASK_HI:
6256 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6257 case SGPR_NULL:
6258 return isGFX10Plus();
6259 case SRC_EXECZ:
6260 case SRC_VCCZ:
6261 return !isGFX11Plus();
6262 default:
6263 break;
6264 }
6265
6266 if (isCI())
6267 return true;
6268
6269 if (isSI() || isGFX10Plus()) {
6270 // No flat_scr on SI.
6271 // On GFX10Plus flat scratch is not a valid register operand and can only be
6272 // accessed with s_setreg/s_getreg.
6273 switch (RegNo) {
6274 case FLAT_SCR:
6275 case FLAT_SCR_LO:
6276 case FLAT_SCR_HI:
6277 return false;
6278 default:
6279 return true;
6280 }
6281 }
6282
6283 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6284 // SI/CI have.
6285 if (MRI.regsOverlap(SGPR102_SGPR103, RegNo))
6286 return hasSGPR102_SGPR103();
6287
6288 return true;
6289}
6290
6291ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6292 StringRef Mnemonic,
6293 OperandMode Mode) {
6294 ParseStatus Res = parseVOPD(Operands);
6295 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6296 return Res;
6297
6298 // Try to parse with a custom parser
6299 Res = MatchOperandParserImpl(Operands, Mnemonic);
6300
6301 // If we successfully parsed the operand or if there as an error parsing,
6302 // we are done.
6303 //
6304 // If we are parsing after we reach EndOfStatement then this means we
6305 // are appending default values to the Operands list. This is only done
6306 // by custom parser, so we shouldn't continue on to the generic parsing.
6307 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6308 return Res;
6309
6310 SMLoc RBraceLoc;
6311 SMLoc LBraceLoc = getLoc();
6312 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6313 unsigned Prefix = Operands.size();
6314
6315 for (;;) {
6316 auto Loc = getLoc();
6317 Res = parseReg(Operands);
6318 if (Res.isNoMatch())
6319 Error(Loc, "expected a register");
6320 if (!Res.isSuccess())
6321 return ParseStatus::Failure;
6322
6323 RBraceLoc = getLoc();
6324 if (trySkipToken(AsmToken::RBrac))
6325 break;
6326
6327 if (!skipToken(AsmToken::Comma,
6328 "expected a comma or a closing square bracket"))
6329 return ParseStatus::Failure;
6330 }
6331
6332 if (Operands.size() - Prefix > 1) {
6333 Operands.insert(Operands.begin() + Prefix,
6334 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6335 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6336 }
6337
6338 return ParseStatus::Success;
6339 }
6340
6341 return parseRegOrImm(Operands);
6342}
6343
6344StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6345 // Clear any forced encodings from the previous instruction.
6346 setForcedEncodingSize(0);
6347 setForcedDPP(false);
6348 setForcedSDWA(false);
6349
6350 if (Name.ends_with("_e64_dpp")) {
6351 setForcedDPP(true);
6352 setForcedEncodingSize(64);
6353 return Name.substr(0, Name.size() - 8);
6354 }
6355 if (Name.ends_with("_e64")) {
6356 setForcedEncodingSize(64);
6357 return Name.substr(0, Name.size() - 4);
6358 }
6359 if (Name.ends_with("_e32")) {
6360 setForcedEncodingSize(32);
6361 return Name.substr(0, Name.size() - 4);
6362 }
6363 if (Name.ends_with("_dpp")) {
6364 setForcedDPP(true);
6365 return Name.substr(0, Name.size() - 4);
6366 }
6367 if (Name.ends_with("_sdwa")) {
6368 setForcedSDWA(true);
6369 return Name.substr(0, Name.size() - 5);
6370 }
6371 return Name;
6372}
6373
6374static void applyMnemonicAliases(StringRef &Mnemonic,
6375 const FeatureBitset &Features,
6376 unsigned VariantID);
6377
6378bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
6380 SMLoc NameLoc, OperandVector &Operands) {
6381 // Add the instruction mnemonic
6382 Name = parseMnemonicSuffix(Name);
6383
6384 // If the target architecture uses MnemonicAlias, call it here to parse
6385 // operands correctly.
6386 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6387
6388 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6389
6390 bool IsMIMG = Name.starts_with("image_");
6391
6392 while (!trySkipToken(AsmToken::EndOfStatement)) {
6393 OperandMode Mode = OperandMode_Default;
6394 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6395 Mode = OperandMode_NSA;
6396 ParseStatus Res = parseOperand(Operands, Name, Mode);
6397
6398 if (!Res.isSuccess()) {
6399 checkUnsupportedInstruction(Name, NameLoc);
6400 if (!Parser.hasPendingError()) {
6401 // FIXME: use real operand location rather than the current location.
6402 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6403 : "not a valid operand.";
6404 Error(getLoc(), Msg);
6405 }
6406 while (!trySkipToken(AsmToken::EndOfStatement)) {
6407 lex();
6408 }
6409 return true;
6410 }
6411
6412 // Eat the comma or space if there is one.
6413 trySkipToken(AsmToken::Comma);
6414 }
6415
6416 return false;
6417}
6418
6419//===----------------------------------------------------------------------===//
6420// Utility functions
6421//===----------------------------------------------------------------------===//
6422
6423ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6425 SMLoc S = getLoc();
6426 if (!trySkipId(Name))
6427 return ParseStatus::NoMatch;
6428
6429 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6430 return ParseStatus::Success;
6431}
6432
6433ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6434 int64_t &IntVal) {
6435
6436 if (!trySkipId(Prefix, AsmToken::Colon))
6437 return ParseStatus::NoMatch;
6438
6440}
6441
6442ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6443 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6444 std::function<bool(int64_t &)> ConvertResult) {
6445 SMLoc S = getLoc();
6446 int64_t Value = 0;
6447
6448 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6449 if (!Res.isSuccess())
6450 return Res;
6451
6452 if (ConvertResult && !ConvertResult(Value)) {
6453 Error(S, "invalid " + StringRef(Prefix) + " value.");
6454 }
6455
6456 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6457 return ParseStatus::Success;
6458}
6459
6460ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6461 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6462 bool (*ConvertResult)(int64_t &)) {
6463 SMLoc S = getLoc();
6464 if (!trySkipId(Prefix, AsmToken::Colon))
6465 return ParseStatus::NoMatch;
6466
6467 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6468 return ParseStatus::Failure;
6469
6470 unsigned Val = 0;
6471 const unsigned MaxSize = 4;
6472
6473 // FIXME: How to verify the number of elements matches the number of src
6474 // operands?
6475 for (int I = 0; ; ++I) {
6476 int64_t Op;
6477 SMLoc Loc = getLoc();
6478 if (!parseExpr(Op))
6479 return ParseStatus::Failure;
6480
6481 if (Op != 0 && Op != 1)
6482 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6483
6484 Val |= (Op << I);
6485
6486 if (trySkipToken(AsmToken::RBrac))
6487 break;
6488
6489 if (I + 1 == MaxSize)
6490 return Error(getLoc(), "expected a closing square bracket");
6491
6492 if (!skipToken(AsmToken::Comma, "expected a comma"))
6493 return ParseStatus::Failure;
6494 }
6495
6496 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6497 return ParseStatus::Success;
6498}
6499
6500ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6502 AMDGPUOperand::ImmTy ImmTy) {
6503 int64_t Bit;
6504 SMLoc S = getLoc();
6505
6506 if (trySkipId(Name)) {
6507 Bit = 1;
6508 } else if (trySkipId("no", Name)) {
6509 Bit = 0;
6510 } else {
6511 return ParseStatus::NoMatch;
6512 }
6513
6514 if (Name == "r128" && !hasMIMG_R128())
6515 return Error(S, "r128 modifier is not supported on this GPU");
6516 if (Name == "a16" && !hasA16())
6517 return Error(S, "a16 modifier is not supported on this GPU");
6518
6519 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6520 ImmTy = AMDGPUOperand::ImmTyR128A16;
6521
6522 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6523 return ParseStatus::Success;
6524}
6525
6526unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6527 bool &Disabling) const {
6528 Disabling = Id.consume_front("no");
6529
6530 if (isGFX940() && !Mnemo.starts_with("s_")) {
6531 return StringSwitch<unsigned>(Id)
6532 .Case("nt", AMDGPU::CPol::NT)
6533 .Case("sc0", AMDGPU::CPol::SC0)
6534 .Case("sc1", AMDGPU::CPol::SC1)
6535 .Default(0);
6536 }
6537
6538 return StringSwitch<unsigned>(Id)
6539 .Case("dlc", AMDGPU::CPol::DLC)
6540 .Case("glc", AMDGPU::CPol::GLC)
6541 .Case("scc", AMDGPU::CPol::SCC)
6542 .Case("slc", AMDGPU::CPol::SLC)
6543 .Default(0);
6544}
6545
6546ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6547 if (isGFX12Plus()) {
6548 SMLoc StringLoc = getLoc();
6549
6550 int64_t CPolVal = 0;
6553
6554 for (;;) {
6555 if (ResTH.isNoMatch()) {
6556 int64_t TH;
6557 ResTH = parseTH(Operands, TH);
6558 if (ResTH.isFailure())
6559 return ResTH;
6560 if (ResTH.isSuccess()) {
6561 CPolVal |= TH;
6562 continue;
6563 }
6564 }
6565
6566 if (ResScope.isNoMatch()) {
6567 int64_t Scope;
6568 ResScope = parseScope(Operands, Scope);
6569 if (ResScope.isFailure())
6570 return ResScope;
6571 if (ResScope.isSuccess()) {
6572 CPolVal |= Scope;
6573 continue;
6574 }
6575 }
6576
6577 break;
6578 }
6579
6580 if (ResTH.isNoMatch() && ResScope.isNoMatch())
6581 return ParseStatus::NoMatch;
6582
6583 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6584 AMDGPUOperand::ImmTyCPol));
6585 return ParseStatus::Success;
6586 }
6587
6588 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6589 SMLoc OpLoc = getLoc();
6590 unsigned Enabled = 0, Seen = 0;
6591 for (;;) {
6592 SMLoc S = getLoc();
6593 bool Disabling;
6594 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6595 if (!CPol)
6596 break;
6597
6598 lex();
6599
6600 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6601 return Error(S, "dlc modifier is not supported on this GPU");
6602
6603 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6604 return Error(S, "scc modifier is not supported on this GPU");
6605
6606 if (Seen & CPol)
6607 return Error(S, "duplicate cache policy modifier");
6608
6609 if (!Disabling)
6610 Enabled |= CPol;
6611
6612 Seen |= CPol;
6613 }
6614
6615 if (!Seen)
6616 return ParseStatus::NoMatch;
6617
6618 Operands.push_back(
6619 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6620 return ParseStatus::Success;
6621}
6622
6623ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6624 int64_t &Scope) {
6625 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
6627
6628 ParseStatus Res = parseStringOrIntWithPrefix(
6629 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
6630 Scope);
6631
6632 if (Res.isSuccess())
6633 Scope = Scopes[Scope];
6634
6635 return Res;
6636}
6637
6638ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6639 TH = AMDGPU::CPol::TH_RT; // default
6640
6642 SMLoc StringLoc;
6643 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
6644 if (!Res.isSuccess())
6645 return Res;
6646
6647 if (Value == "TH_DEFAULT")
6649 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
6650 Value == "TH_LOAD_NT_WB") {
6651 return Error(StringLoc, "invalid th value");
6652 } else if (Value.consume_front("TH_ATOMIC_")) {
6654 } else if (Value.consume_front("TH_LOAD_")) {
6656 } else if (Value.consume_front("TH_STORE_")) {
6658 } else {
6659 return Error(StringLoc, "invalid th value");
6660 }
6661
6662 if (Value == "BYPASS")
6664
6665 if (TH != 0) {
6672 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
6675 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
6677 .Default(0xffffffff);
6678 else
6684 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
6685 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
6686 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
6687 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
6688 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
6689 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
6690 .Default(0xffffffff);
6691 }
6692
6693 if (TH == 0xffffffff)
6694 return Error(StringLoc, "invalid th value");
6695
6696 return ParseStatus::Success;
6697}
6698
6700 MCInst& Inst, const OperandVector& Operands,
6701 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6702 AMDGPUOperand::ImmTy ImmT,
6703 int64_t Default = 0) {
6704 auto i = OptionalIdx.find(ImmT);
6705 if (i != OptionalIdx.end()) {
6706 unsigned Idx = i->second;
6707 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6708 } else {
6710 }
6711}
6712
6713ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6715 SMLoc &StringLoc) {
6716 if (!trySkipId(Prefix, AsmToken::Colon))
6717 return ParseStatus::NoMatch;
6718
6719 StringLoc = getLoc();
6720 return parseId(Value, "expected an identifier") ? ParseStatus::Success
6722}
6723
6724ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
6726 int64_t &IntVal) {
6727 if (!trySkipId(Name, AsmToken::Colon))
6728 return ParseStatus::NoMatch;
6729
6730 SMLoc StringLoc = getLoc();
6731
6733 if (isToken(AsmToken::Identifier)) {
6734 Value = getTokenStr();
6735 lex();
6736
6737 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
6738 if (Value == Ids[IntVal])
6739 break;
6740 } else if (!parseExpr(IntVal))
6741 return ParseStatus::Failure;
6742
6743 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
6744 return Error(StringLoc, "invalid " + Twine(Name) + " value");
6745
6746 return ParseStatus::Success;
6747}
6748
6749ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
6751 AMDGPUOperand::ImmTy Type) {
6752 SMLoc S = getLoc();
6753 int64_t IntVal;
6754
6755 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
6756 if (Res.isSuccess())
6757 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
6758
6759 return Res;
6760}
6761
6762//===----------------------------------------------------------------------===//
6763// MTBUF format
6764//===----------------------------------------------------------------------===//
6765
6766bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6767 int64_t MaxVal,
6768 int64_t &Fmt) {
6769 int64_t Val;
6770 SMLoc Loc = getLoc();
6771
6772 auto Res = parseIntWithPrefix(Pref, Val);
6773 if (Res.isFailure())
6774 return false;
6775 if (Res.isNoMatch())
6776 return true;
6777
6778 if (Val < 0 || Val > MaxVal) {
6779 Error(Loc, Twine("out of range ", StringRef(Pref)));
6780 return false;
6781 }
6782
6783 Fmt = Val;
6784 return true;
6785}
6786
6787ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
6788 AMDGPUOperand::ImmTy ImmTy) {
6789 const char *Pref = "index_key";
6790 int64_t ImmVal = 0;
6791 SMLoc Loc = getLoc();
6792 auto Res = parseIntWithPrefix(Pref, ImmVal);
6793 if (!Res.isSuccess())
6794 return Res;
6795
6796 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6797 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6798
6799 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6800 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6801
6802 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
6803 return ParseStatus::Success;
6804}
6805
6806ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
6807 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6808}
6809
6810ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
6811 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6812}
6813
6814// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6815// values to live in a joint format operand in the MCInst encoding.
6816ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6817 using namespace llvm::AMDGPU::MTBUFFormat;
6818
6819 int64_t Dfmt = DFMT_UNDEF;
6820 int64_t Nfmt = NFMT_UNDEF;
6821
6822 // dfmt and nfmt can appear in either order, and each is optional.
6823 for (int I = 0; I < 2; ++I) {
6824 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6825 return ParseStatus::Failure;
6826
6827 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6828 return ParseStatus::Failure;
6829
6830 // Skip optional comma between dfmt/nfmt
6831 // but guard against 2 commas following each other.
6832 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6833 !peekToken().is(AsmToken::Comma)) {
6834 trySkipToken(AsmToken::Comma);
6835 }
6836 }
6837
6838 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6839 return ParseStatus::NoMatch;
6840
6841 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6842 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6843
6844 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6845 return ParseStatus::Success;
6846}
6847
6848ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6849 using namespace llvm::AMDGPU::MTBUFFormat;
6850
6851 int64_t Fmt = UFMT_UNDEF;
6852
6853 if (!tryParseFmt("format", UFMT_MAX, Fmt))
6854 return ParseStatus::Failure;
6855
6856 if (Fmt == UFMT_UNDEF)
6857 return ParseStatus::NoMatch;
6858
6859 Format = Fmt;
6860 return ParseStatus::Success;
6861}
6862
6863bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6864 int64_t &Nfmt,
6865 StringRef FormatStr,
6866 SMLoc Loc) {
6867 using namespace llvm::AMDGPU::MTBUFFormat;
6868 int64_t Format;
6869
6870 Format = getDfmt(FormatStr);
6871 if (Format != DFMT_UNDEF) {
6872 Dfmt = Format;
6873 return true;
6874 }
6875
6876 Format = getNfmt(FormatStr, getSTI());
6877 if (Format != NFMT_UNDEF) {
6878 Nfmt = Format;
6879 return true;
6880 }
6881
6882 Error(Loc, "unsupported format");
6883 return false;
6884}
6885
6886ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6887 SMLoc FormatLoc,
6888 int64_t &Format) {
6889 using namespace llvm::AMDGPU::MTBUFFormat;
6890
6891 int64_t Dfmt = DFMT_UNDEF;
6892 int64_t Nfmt = NFMT_UNDEF;
6893 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6894 return ParseStatus::Failure;
6895
6896 if (trySkipToken(AsmToken::Comma)) {
6897 StringRef Str;
6898 SMLoc Loc = getLoc();
6899 if (!parseId(Str, "expected a format string") ||
6900 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6901 return ParseStatus::Failure;
6902 if (Dfmt == DFMT_UNDEF)
6903 return Error(Loc, "duplicate numeric format");
6904 if (Nfmt == NFMT_UNDEF)
6905 return Error(Loc, "duplicate data format");
6906 }
6907
6908 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6909 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6910
6911 if (isGFX10Plus()) {
6912 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6913 if (Ufmt == UFMT_UNDEF)
6914 return Error(FormatLoc, "unsupported format");
6915 Format = Ufmt;
6916 } else {
6917 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6918 }
6919
6920 return ParseStatus::Success;
6921}
6922
6923ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6924 SMLoc Loc,
6925 int64_t &Format) {
6926 using namespace llvm::AMDGPU::MTBUFFormat;
6927
6928 auto Id = getUnifiedFormat(FormatStr, getSTI());
6929 if (Id == UFMT_UNDEF)
6930 return ParseStatus::NoMatch;
6931
6932 if (!isGFX10Plus())
6933 return Error(Loc, "unified format is not supported on this GPU");
6934
6935 Format = Id;
6936 return ParseStatus::Success;
6937}
6938
6939ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6940 using namespace llvm::AMDGPU::MTBUFFormat;
6941 SMLoc Loc = getLoc();
6942
6943 if (!parseExpr(Format))
6944 return ParseStatus::Failure;
6945 if (!isValidFormatEncoding(Format, getSTI()))
6946 return Error(Loc, "out of range format");
6947
6948 return ParseStatus::Success;
6949}
6950
6951ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6952 using namespace llvm::AMDGPU::MTBUFFormat;
6953
6954 if (!trySkipId("format", AsmToken::Colon))
6955 return ParseStatus::NoMatch;
6956
6957 if (trySkipToken(AsmToken::LBrac)) {
6958 StringRef FormatStr;
6959 SMLoc Loc = getLoc();
6960 if (!parseId(FormatStr, "expected a format string"))
6961 return ParseStatus::Failure;
6962
6963 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6964 if (Res.isNoMatch())
6965 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6966 if (!Res.isSuccess())
6967 return Res;
6968
6969 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6970 return ParseStatus::Failure;
6971
6972 return ParseStatus::Success;
6973 }
6974
6975 return parseNumericFormat(Format);
6976}
6977
6978ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6979 using namespace llvm::AMDGPU::MTBUFFormat;
6980
6981 int64_t Format = getDefaultFormatEncoding(getSTI());
6982 ParseStatus Res;
6983 SMLoc Loc = getLoc();
6984
6985 // Parse legacy format syntax.
6986 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6987 if (Res.isFailure())
6988 return Res;
6989
6990 bool FormatFound = Res.isSuccess();
6991
6992 Operands.push_back(
6993 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6994
6995 if (FormatFound)
6996 trySkipToken(AsmToken::Comma);
6997
6998 if (isToken(AsmToken::EndOfStatement)) {
6999 // We are expecting an soffset operand,
7000 // but let matcher handle the error.
7001 return ParseStatus::Success;
7002 }
7003
7004 // Parse soffset.
7005 Res = parseRegOrImm(Operands);
7006 if (!Res.isSuccess())
7007 return Res;
7008
7009 trySkipToken(AsmToken::Comma);
7010
7011 if (!FormatFound) {
7012 Res = parseSymbolicOrNumericFormat(Format);
7013 if (Res.isFailure())
7014 return Res;
7015 if (Res.isSuccess()) {
7016 auto Size = Operands.size();
7017 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7018 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7019 Op.setImm(Format);
7020 }
7021 return ParseStatus::Success;
7022 }
7023
7024 if (isId("format") && peekToken().is(AsmToken::Colon))
7025 return Error(getLoc(), "duplicate format");
7026 return ParseStatus::Success;
7027}
7028
7029ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7030 ParseStatus Res =
7031 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7032 if (Res.isNoMatch()) {
7033 Res = parseIntWithPrefix("inst_offset", Operands,
7034 AMDGPUOperand::ImmTyInstOffset);
7035 }
7036 return Res;
7037}
7038
7039ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7040 ParseStatus Res =
7041 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7042 if (Res.isNoMatch())
7043 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7044 return Res;
7045}
7046
7047ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7048 ParseStatus Res =
7049 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7050 if (Res.isNoMatch()) {
7051 Res =
7052 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7053 }
7054 return Res;
7055}
7056
7057//===----------------------------------------------------------------------===//
7058// Exp
7059//===----------------------------------------------------------------------===//
7060
7061void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7062 OptionalImmIndexMap OptionalIdx;
7063
7064 unsigned OperandIdx[4];
7065 unsigned EnMask = 0;
7066 int SrcIdx = 0;
7067
7068 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7069 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7070
7071 // Add the register arguments
7072 if (Op.isReg()) {
7073 assert(SrcIdx < 4);
7074 OperandIdx[SrcIdx] = Inst.size();
7075 Op.addRegOperands(Inst, 1);
7076 ++SrcIdx;
7077 continue;
7078 }
7079
7080 if (Op.isOff()) {
7081 assert(SrcIdx < 4);
7082 OperandIdx[SrcIdx] = Inst.size();
7083 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
7084 ++SrcIdx;
7085 continue;
7086 }
7087
7088 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7089 Op.addImmOperands(Inst, 1);
7090 continue;
7091 }
7092
7093 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7094 continue;
7095
7096 // Handle optional arguments
7097 OptionalIdx[Op.getImmTy()] = i;
7098 }
7099
7100 assert(SrcIdx == 4);
7101
7102 bool Compr = false;
7103 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7104 Compr = true;
7105 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7106 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
7107 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
7108 }
7109
7110 for (auto i = 0; i < SrcIdx; ++i) {
7111 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
7112 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7113 }
7114 }
7115
7116 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7117 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7118
7119 Inst.addOperand(MCOperand::createImm(EnMask));
7120}
7121
7122//===----------------------------------------------------------------------===//
7123// s_waitcnt
7124//===----------------------------------------------------------------------===//
7125
7126static bool
7128 const AMDGPU::IsaVersion ISA,
7129 int64_t &IntVal,
7130 int64_t CntVal,
7131 bool Saturate,
7132 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7133 unsigned (*decode)(const IsaVersion &Version, unsigned))
7134{
7135 bool Failed = false;
7136
7137 IntVal = encode(ISA, IntVal, CntVal);
7138 if (CntVal != decode(ISA, IntVal)) {
7139 if (Saturate) {
7140 IntVal = encode(ISA, IntVal, -1);
7141 } else {
7142 Failed = true;
7143 }
7144 }
7145 return Failed;
7146}
7147
7148bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7149
7150 SMLoc CntLoc = getLoc();
7151 StringRef CntName = getTokenStr();
7152
7153 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7154 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7155 return false;
7156
7157 int64_t CntVal;
7158 SMLoc ValLoc = getLoc();
7159 if (!parseExpr(CntVal))
7160 return false;
7161
7163
7164 bool Failed = true;
7165 bool Sat = CntName.ends_with("_sat");
7166
7167 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7168 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7169 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7170 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7171 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7172 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7173 } else {
7174 Error(CntLoc, "invalid counter name " + CntName);
7175 return false;
7176 }
7177
7178 if (Failed) {
7179 Error(ValLoc, "too large value for " + CntName);
7180 return false;
7181 }
7182
7183 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7184 return false;
7185
7186 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7187 if (isToken(AsmToken::EndOfStatement)) {
7188 Error(getLoc(), "expected a counter name");
7189 return false;
7190 }
7191 }
7192
7193 return true;
7194}
7195
7196ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7198 int64_t Waitcnt = getWaitcntBitMask(ISA);
7199 SMLoc S = getLoc();
7200
7201 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7202 while (!isToken(AsmToken::EndOfStatement)) {
7203 if (!parseCnt(Waitcnt))
7204 return ParseStatus::Failure;
7205 }
7206 } else {
7207 if (!parseExpr(Waitcnt))
7208 return ParseStatus::Failure;
7209 }
7210
7211 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7212 return ParseStatus::Success;
7213}
7214
7215bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7216 SMLoc FieldLoc = getLoc();
7217 StringRef FieldName = getTokenStr();
7218 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7219 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7220 return false;
7221
7222 SMLoc ValueLoc = getLoc();
7223 StringRef ValueName = getTokenStr();
7224 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7225 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7226 return false;
7227
7228 unsigned Shift;
7229 if (FieldName == "instid0") {
7230 Shift = 0;
7231 } else if (FieldName == "instskip") {
7232 Shift = 4;
7233 } else if (FieldName == "instid1") {
7234 Shift = 7;
7235 } else {
7236 Error(FieldLoc, "invalid field name " + FieldName);
7237 return false;
7238 }
7239
7240 int Value;
7241 if (Shift == 4) {
7242 // Parse values for instskip.
7244 .Case("SAME", 0)
7245 .Case("NEXT", 1)
7246 .Case("SKIP_1", 2)
7247 .Case("SKIP_2", 3)
7248 .Case("SKIP_3", 4)
7249 .Case("SKIP_4", 5)
7250 .Default(-1);
7251 } else {
7252 // Parse values for instid0 and instid1.
7254 .Case("NO_DEP", 0)
7255 .Case("VALU_DEP_1", 1)
7256 .Case("VALU_DEP_2", 2)
7257 .Case("VALU_DEP_3", 3)
7258 .Case("VALU_DEP_4", 4)
7259 .Case("TRANS32_DEP_1", 5)
7260 .Case("TRANS32_DEP_2", 6)
7261 .Case("TRANS32_DEP_3", 7)
7262 .Case("FMA_ACCUM_CYCLE_1", 8)
7263 .Case("SALU_CYCLE_1", 9)
7264 .Case("SALU_CYCLE_2", 10)
7265 .Case("SALU_CYCLE_3", 11)
7266 .Default(-1);
7267 }
7268 if (Value < 0) {
7269 Error(ValueLoc, "invalid value name " + ValueName);
7270 return false;
7271 }
7272
7273 Delay |= Value << Shift;
7274 return true;
7275}
7276
7277ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7278 int64_t Delay = 0;
7279 SMLoc S = getLoc();
7280
7281 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7282 do {
7283 if (!parseDelay(Delay))
7284 return ParseStatus::Failure;
7285 } while (trySkipToken(AsmToken::Pipe));
7286 } else {
7287 if (!parseExpr(Delay))
7288 return ParseStatus::Failure;
7289 }
7290
7291 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7292 return ParseStatus::Success;
7293}
7294
7295bool
7296AMDGPUOperand::isSWaitCnt() const {
7297 return isImm();
7298}
7299
7300bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7301
7302//===----------------------------------------------------------------------===//
7303// DepCtr
7304//===----------------------------------------------------------------------===//
7305
7306void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7307 StringRef DepCtrName) {
7308 switch (ErrorId) {
7309 case OPR_ID_UNKNOWN:
7310 Error(Loc, Twine("invalid counter name ", DepCtrName));
7311 return;
7312 case OPR_ID_UNSUPPORTED:
7313 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7314 return;
7315 case OPR_ID_DUPLICATE:
7316 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7317 return;
7318 case OPR_VAL_INVALID:
7319 Error(Loc, Twine("invalid value for ", DepCtrName));
7320 return;
7321 default:
7322 assert(false);
7323 }
7324}
7325
7326bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7327
7328 using namespace llvm::AMDGPU::DepCtr;
7329
7330 SMLoc DepCtrLoc = getLoc();
7331 StringRef DepCtrName = getTokenStr();
7332
7333 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7334 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7335 return false;
7336
7337 int64_t ExprVal;
7338 if (!parseExpr(ExprVal))
7339 return false;
7340
7341 unsigned PrevOprMask = UsedOprMask;
7342 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7343
7344 if (CntVal < 0) {
7345 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7346 return false;
7347 }
7348
7349 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7350 return false;
7351
7352 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7353 if (isToken(AsmToken::EndOfStatement)) {
7354 Error(getLoc(), "expected a counter name");
7355 return false;
7356 }
7357 }
7358
7359 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7360 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7361 return true;
7362}
7363
7364ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7365 using namespace llvm::AMDGPU::DepCtr;
7366
7367 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7368 SMLoc Loc = getLoc();
7369
7370 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7371 unsigned UsedOprMask = 0;
7372 while (!isToken(AsmToken::EndOfStatement)) {
7373 if (!parseDepCtr(DepCtr, UsedOprMask))
7374 return ParseStatus::Failure;
7375 }
7376 } else {
7377 if (!parseExpr(DepCtr))
7378 return ParseStatus::Failure;
7379 }
7380
7381 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7382 return ParseStatus::Success;
7383}
7384
7385bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7386
7387//===----------------------------------------------------------------------===//
7388// hwreg
7389//===----------------------------------------------------------------------===//
7390
7391ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7392 OperandInfoTy &Offset,
7393 OperandInfoTy &Width) {
7394 using namespace llvm::AMDGPU::Hwreg;
7395
7396 if (!trySkipId("hwreg", AsmToken::LParen))
7397 return ParseStatus::NoMatch;
7398
7399 // The register may be specified by name or using a numeric code
7400 HwReg.Loc = getLoc();
7401 if (isToken(AsmToken::Identifier) &&
7402 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7403 HwReg.IsSymbolic = true;
7404 lex(); // skip register name
7405 } else if (!parseExpr(HwReg.Val, "a register name")) {
7406 return ParseStatus::Failure;
7407 }
7408
7409 if (trySkipToken(AsmToken::RParen))
7410 return ParseStatus::Success;
7411
7412 // parse optional params
7413 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7414 return ParseStatus::Failure;
7415
7416 Offset.Loc = getLoc();
7417 if (!parseExpr(Offset.Val))
7418 return ParseStatus::Failure;
7419
7420 if (!skipToken(AsmToken::Comma, "expected a comma"))
7421 return ParseStatus::Failure;
7422
7423 Width.Loc = getLoc();
7424 if (!parseExpr(Width.Val) ||
7425 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7426 return ParseStatus::Failure;
7427
7428 return ParseStatus::Success;
7429}
7430
7431ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7432 using namespace llvm::AMDGPU::Hwreg;
7433
7434 int64_t ImmVal = 0;
7435 SMLoc Loc = getLoc();
7436
7437 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
7438 HwregId::Default);
7439 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
7440 HwregOffset::Default);
7441 struct : StructuredOpField {
7442 using StructuredOpField::StructuredOpField;
7443 bool validate(AMDGPUAsmParser &Parser) const override {
7444 if (!isUIntN(Width, Val - 1))
7445 return Error(Parser, "only values from 1 to 32 are legal");
7446 return true;
7447 }
7448 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
7449 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
7450
7451 if (Res.isNoMatch())
7452 Res = parseHwregFunc(HwReg, Offset, Width);
7453
7454 if (Res.isSuccess()) {
7455 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
7456 return ParseStatus::Failure;
7457 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
7458 }
7459
7460 if (Res.isNoMatch() &&
7461 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
7463
7464 if (!Res.isSuccess())
7465 return ParseStatus::Failure;
7466
7467 if (!isUInt<16>(ImmVal))
7468 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7469 Operands.push_back(
7470 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7471 return ParseStatus::Success;
7472}
7473
7474bool AMDGPUOperand::isHwreg() const {
7475 return isImmTy(ImmTyHwreg);
7476}
7477
7478//===----------------------------------------------------------------------===//
7479// sendmsg
7480//===----------------------------------------------------------------------===//
7481
7482bool
7483AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7484 OperandInfoTy &Op,
7485 OperandInfoTy &Stream) {
7486 using namespace llvm::AMDGPU::SendMsg;
7487
7488 Msg.Loc = getLoc();
7489 if (isToken(AsmToken::Identifier) &&
7490 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7491 Msg.IsSymbolic = true;
7492 lex(); // skip message name
7493 } else if (!parseExpr(Msg.Val, "a message name")) {
7494 return false;
7495 }
7496
7497 if (trySkipToken(AsmToken::Comma)) {
7498 Op.IsDefined = true;
7499 Op.Loc = getLoc();
7500 if (isToken(AsmToken::Identifier) &&
7501 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
7503 lex(); // skip operation name
7504 } else if (!parseExpr(Op.Val, "an operation name")) {
7505 return false;
7506 }
7507
7508 if (trySkipToken(AsmToken::Comma)) {
7509 Stream.IsDefined = true;
7510 Stream.Loc = getLoc();
7511 if (!parseExpr(Stream.Val))
7512 return false;
7513 }
7514 }
7515
7516 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7517}
7518
7519bool
7520AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7521 const OperandInfoTy &Op,
7522 const OperandInfoTy &Stream) {
7523 using namespace llvm::AMDGPU::SendMsg;
7524
7525 // Validation strictness depends on whether message is specified
7526 // in a symbolic or in a numeric form. In the latter case
7527 // only encoding possibility is checked.
7528 bool Strict = Msg.IsSymbolic;
7529
7530 if (Strict) {
7531 if (Msg.Val == OPR_ID_UNSUPPORTED) {
7532 Error(Msg.Loc, "specified message id is not supported on this GPU");
7533 return false;
7534 }
7535 } else {
7536 if (!isValidMsgId(Msg.Val, getSTI())) {
7537 Error(Msg.Loc, "invalid message id");
7538 return false;
7539 }
7540 }
7541 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
7542 if (Op.IsDefined) {
7543 Error(Op.Loc, "message does not support operations");
7544 } else {
7545 Error(Msg.Loc, "missing message operation");
7546 }
7547 return false;
7548 }
7549 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
7550 if (Op.Val == OPR_ID_UNSUPPORTED)
7551 Error(Op.Loc, "specified operation id is not supported on this GPU");
7552 else
7553 Error(Op.Loc, "invalid operation id");
7554 return false;
7555 }
7556 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
7557 Stream.IsDefined) {
7558 Error(Stream.Loc, "message operation does not support streams");
7559 return false;
7560 }
7561 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
7562 Error(Stream.Loc, "invalid message stream id");
7563 return false;
7564 }
7565 return true;
7566}
7567
7568ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7569 using namespace llvm::AMDGPU::SendMsg;
7570
7571 int64_t ImmVal = 0;
7572 SMLoc Loc = getLoc();
7573
7574 if (trySkipId("sendmsg", AsmToken::LParen)) {
7575 OperandInfoTy Msg(OPR_ID_UNKNOWN);
7576 OperandInfoTy Op(OP_NONE_);
7577 OperandInfoTy Stream(STREAM_ID_NONE_);
7578 if (parseSendMsgBody(Msg, Op, Stream) &&
7579 validateSendMsg(Msg, Op, Stream)) {
7580 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
7581 } else {
7582 return ParseStatus::Failure;
7583 }
7584 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7585 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7586 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7587 } else {
7588 return ParseStatus::Failure;
7589 }
7590
7591 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7592 return ParseStatus::Success;
7593}
7594
7595bool AMDGPUOperand::isSendMsg() const {
7596 return isImmTy(ImmTySendMsg);
7597}
7598
7599//===----------------------------------------------------------------------===//
7600// v_interp
7601//===----------------------------------------------------------------------===//
7602
7603ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7604 StringRef Str;
7605 SMLoc S = getLoc();
7606
7607 if (!parseId(Str))
7608 return ParseStatus::NoMatch;
7609
7610 int Slot = StringSwitch<int>(Str)
7611 .Case("p10", 0)
7612 .Case("p20", 1)
7613 .Case("p0", 2)
7614 .Default(-1);
7615
7616 if (Slot == -1)
7617 return Error(S, "invalid interpolation slot");
7618
7619 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
7620 AMDGPUOperand::ImmTyInterpSlot));
7621 return ParseStatus::Success;
7622}
7623
7624ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7625 StringRef Str;
7626 SMLoc S = getLoc();
7627
7628 if (!parseId(Str))
7629 return ParseStatus::NoMatch;
7630
7631 if (!Str.starts_with("attr"))
7632 return Error(S, "invalid interpolation attribute");
7633
7634 StringRef Chan = Str.take_back(2);
7635 int AttrChan = StringSwitch<int>(Chan)
7636 .Case(".x", 0)
7637 .Case(".y", 1)
7638 .Case(".z", 2)
7639 .Case(".w", 3)
7640 .Default(-1);
7641 if (AttrChan == -1)
7642 return Error(S, "invalid or missing interpolation attribute channel");
7643
7644 Str = Str.drop_back(2).drop_front(4);
7645
7646 uint8_t Attr;
7647 if (Str.getAsInteger(10, Attr))
7648 return Error(S, "invalid or missing interpolation attribute number");
7649
7650 if (Attr > 32)
7651 return Error(S, "out of bounds interpolation attribute number");
7652
7653 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
7654
7655 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
7656 AMDGPUOperand::ImmTyInterpAttr));
7657 Operands.push_back(AMDGPUOperand::CreateImm(
7658 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7659 return ParseStatus::Success;
7660}
7661
7662//===----------------------------------------------------------------------===//
7663// exp
7664//===----------------------------------------------------------------------===//
7665
7666ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7667 using namespace llvm::AMDGPU::Exp;
7668
7669 StringRef Str;
7670 SMLoc S = getLoc();
7671
7672 if (!parseId(Str))
7673 return ParseStatus::NoMatch;
7674
7675 unsigned Id = getTgtId(Str);
7676 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
7677 return Error(S, (Id == ET_INVALID)
7678 ? "invalid exp target"
7679 : "exp target is not supported on this GPU");
7680
7681 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
7682 AMDGPUOperand::ImmTyExpTgt));
7683 return ParseStatus::Success;
7684}
7685
7686//===----------------------------------------------------------------------===//
7687// parser helpers
7688//===----------------------------------------------------------------------===//
7689
7690bool
7691AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7692 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
7693}
7694
7695bool
7696AMDGPUAsmParser::isId(const StringRef Id) const {
7697 return isId(getToken(), Id);
7698}
7699
7700bool
7701AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7702 return getTokenKind() == Kind;
7703}
7704
7705StringRef AMDGPUAsmParser::getId() const {
7706 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7707}
7708
7709bool
7710AMDGPUAsmParser::trySkipId(const StringRef Id) {
7711 if (isId(Id)) {
7712 lex();
7713 return true;
7714 }
7715 return false;
7716}
7717
7718bool
7719AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7720 if (isToken(AsmToken::Identifier)) {
7721 StringRef Tok = getTokenStr();
7722 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
7723 lex();
7724 return true;
7725 }
7726 }
7727 return false;
7728}
7729
7730bool
7731AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7732 if (isId(Id) && peekToken().is(Kind)) {
7733 lex();
7734 lex();
7735 return true;
7736 }
7737 return false;
7738}
7739
7740bool
7741AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7742 if (isToken(Kind)) {
7743 lex();
7744 return true;
7745 }
7746 return false;
7747}
7748
7749bool
7750AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7751 const StringRef ErrMsg) {
7752 if (!trySkipToken(Kind)) {
7753 Error(getLoc(), ErrMsg);
7754 return false;
7755 }
7756 return true;
7757}
7758
7759bool
7760AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7761 SMLoc S = getLoc();
7762
7763 const MCExpr *Expr;
7764 if (Parser.parseExpression(Expr))
7765 return false;
7766
7767 if (Expr->evaluateAsAbsolute(Imm))
7768 return true;
7769
7770 if (Expected.empty()) {
7771 Error(S, "expected absolute expression");
7772 } else {
7773 Error(S, Twine("expected ", Expected) +
7774 Twine(" or an absolute expression"));
7775 }
7776 return false;
7777}
7778
7779bool
7780AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7781 SMLoc S = getLoc();
7782
7783 const MCExpr *Expr;
7784 if (Parser.parseExpression(Expr))
7785 return false;
7786
7787 int64_t IntVal;
7788 if (Expr->evaluateAsAbsolute(IntVal)) {
7789 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7790 } else {
7791 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7792 }
7793 return true;
7794}
7795
7796bool
7797AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7798 if (isToken(AsmToken::String)) {
7799 Val = getToken().getStringContents();
7800 lex();
7801 return true;
7802 }
7803 Error(getLoc(), ErrMsg);
7804 return false;
7805}
7806
7807bool
7808AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7809 if (isToken(AsmToken::Identifier)) {
7810 Val = getTokenStr();
7811 lex();
7812 return true;
7813 }
7814 if (!ErrMsg.empty())
7815 Error(getLoc(), ErrMsg);
7816 return false;
7817}
7818
7820AMDGPUAsmParser::getToken() const {
7821 return Parser.getTok();
7822}
7823
7824AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7825 return isToken(AsmToken::EndOfStatement)
7826 ? getToken()
7827 : getLexer().peekTok(ShouldSkipSpace);
7828}
7829
7830void
7831AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7832 auto TokCount = getLexer().peekTokens(Tokens);
7833
7834 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7835 Tokens[Idx] = AsmToken(AsmToken::Error, "");
7836}
7837
7839AMDGPUAsmParser::getTokenKind() const {
7840 return getLexer().getKind();
7841}
7842
7843SMLoc
7844AMDGPUAsmParser::getLoc() const {
7845 return getToken().getLoc();
7846}
7847
7849AMDGPUAsmParser::getTokenStr() const {
7850 return getToken().getString();
7851}
7852
7853void
7854AMDGPUAsmParser::lex() {
7855 Parser.Lex();
7856}
7857
7858SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7859 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7860}
7861
7862SMLoc
7863AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7864 const OperandVector &Operands) const {
7865 for (unsigned i = Operands.size() - 1; i > 0; --i) {
7866 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7867 if (Test(Op))
7868 return Op.getStartLoc();
7869 }
7870 return getInstLoc(Operands);
7871}
7872
7873SMLoc
7874AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7875 const OperandVector &Operands) const {
7876 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7877 return getOperandLoc(Test, Operands);
7878}
7879
7880SMLoc
7881AMDGPUAsmParser::getRegLoc(unsigned Reg,
7882 const OperandVector &Operands) const {
7883 auto Test = [=](const AMDGPUOperand& Op) {
7884 return Op.isRegKind() && Op.getReg() == Reg;
7885 };
7886 return getOperandLoc(Test, Operands);
7887}
7888
7889SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7890 bool SearchMandatoryLiterals) const {
7891 auto Test = [](const AMDGPUOperand& Op) {
7892 return Op.IsImmKindLiteral() || Op.isExpr();
7893 };
7894 SMLoc Loc = getOperandLoc(Test, Operands);
7895 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7896 Loc = getMandatoryLitLoc(Operands);
7897 return Loc;
7898}
7899
7900SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7901 auto Test = [](const AMDGPUOperand &Op) {
7902 return Op.IsImmKindMandatoryLiteral();
7903 };
7904 return getOperandLoc(Test, Operands);
7905}
7906
7907SMLoc
7908AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7909 auto Test = [](const AMDGPUOperand& Op) {
7910 return Op.isImmKindConst();
7911 };
7912 return getOperandLoc(Test, Operands);
7913}
7914
7916AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
7917 if (!trySkipToken(AsmToken::LCurly))
7918 return ParseStatus::NoMatch;
7919
7920 bool First = true;
7921 while (!trySkipToken(AsmToken::RCurly)) {
7922 if (!First &&
7923 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
7924 return ParseStatus::Failure;
7925
7926 StringRef Id = getTokenStr();
7927 SMLoc IdLoc = getLoc();
7928 if (!skipToken(AsmToken::Identifier, "field name expected") ||
7929 !skipToken(AsmToken::Colon, "colon expected"))
7930 return ParseStatus::Failure;
7931
7932 auto I =
7933 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
7934 if (I == Fields.end())
7935 return Error(IdLoc, "unknown field");
7936 if ((*I)->IsDefined)
7937 return Error(IdLoc, "duplicate field");
7938
7939 // TODO: Support symbolic values.
7940 (*I)->Loc = getLoc();
7941 if (!parseExpr((*I)->Val))
7942 return ParseStatus::Failure;
7943 (*I)->IsDefined = true;
7944
7945 First = false;
7946 }
7947 return ParseStatus::Success;
7948}
7949
7950bool AMDGPUAsmParser::validateStructuredOpFields(
7952 return all_of(Fields, [this](const StructuredOpField *F) {
7953 return F->validate(*this);
7954 });
7955}
7956
7957//===----------------------------------------------------------------------===//
7958// swizzle
7959//===----------------------------------------------------------------------===//
7960
7962static unsigned
7963encodeBitmaskPerm(const unsigned AndMask,
7964 const unsigned OrMask,
7965 const unsigned XorMask) {
7966 using namespace llvm::AMDGPU::Swizzle;
7967
7968 return BITMASK_PERM_ENC |
7969 (AndMask << BITMASK_AND_SHIFT) |
7970 (OrMask << BITMASK_OR_SHIFT) |
7971 (XorMask << BITMASK_XOR_SHIFT);
7972}
7973
7974bool
7975AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7976 const unsigned MinVal,
7977 const unsigned MaxVal,
7978 const StringRef ErrMsg,
7979 SMLoc &Loc) {
7980 if (!skipToken(AsmToken::Comma, "expected a comma")) {
7981 return false;
7982 }
7983 Loc = getLoc();
7984 if (!parseExpr(Op)) {
7985 return false;
7986 }
7987 if (Op < MinVal || Op > MaxVal) {
7988 Error(Loc, ErrMsg);
7989 return false;
7990 }
7991
7992 return true;
7993}
7994
7995bool
7996AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7997 const unsigned MinVal,
7998 const unsigned MaxVal,
7999 const StringRef ErrMsg) {
8000 SMLoc Loc;
8001 for (unsigned i = 0; i < OpNum; ++i) {
8002 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8003 return false;
8004 }
8005
8006 return true;
8007}
8008
8009bool
8010AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8011 using namespace llvm::AMDGPU::Swizzle;
8012
8013 int64_t Lane[LANE_NUM];
8014 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8015 "expected a 2-bit lane id")) {
8017 for (unsigned I = 0; I < LANE_NUM; ++I) {
8018 Imm |= Lane[I] << (LANE_SHIFT * I);
8019 }
8020 return true;
8021 }
8022 return false;
8023}
8024
8025bool
8026AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8027 using namespace llvm::AMDGPU::Swizzle;
8028
8029 SMLoc Loc;
8030 int64_t GroupSize;
8031 int64_t LaneIdx;
8032
8033 if (!parseSwizzleOperand(GroupSize,
8034 2, 32,
8035 "group size must be in the interval [2,32]",
8036 Loc)) {
8037 return false;
8038 }
8039 if (!isPowerOf2_64(GroupSize)) {
8040 Error(Loc, "group size must be a power of two");
8041 return false;
8042 }
8043 if (parseSwizzleOperand(LaneIdx,
8044 0, GroupSize - 1,
8045 "lane id must be in the interval [0,group size - 1]",
8046 Loc)) {
8047 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8048 return true;
8049 }
8050 return false;
8051}
8052
8053bool
8054AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8055 using namespace llvm::AMDGPU::Swizzle;
8056
8057 SMLoc Loc;
8058 int64_t GroupSize;
8059
8060 if (!parseSwizzleOperand(GroupSize,
8061 2, 32,
8062 "group size must be in the interval [2,32]",
8063 Loc)) {
8064 return false;
8065 }
8066 if (!isPowerOf2_64(GroupSize)) {
8067 Error(Loc, "group size must be a power of two");
8068 return false;
8069 }
8070
8071 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8072 return true;
8073}
8074
8075bool
8076AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8077 using namespace llvm::AMDGPU::Swizzle;
8078
8079 SMLoc Loc;
8080 int64_t GroupSize;
8081
8082 if (!parseSwizzleOperand(GroupSize,
8083 1, 16,
8084 "group size must be in the interval [1,16]",
8085 Loc)) {
8086 return false;
8087 }
8088 if (!isPowerOf2_64(GroupSize)) {
8089 Error(Loc, "group size must be a power of two");
8090 return false;
8091 }
8092
8093 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8094 return true;
8095}
8096
8097bool
8098AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8099 using namespace llvm::AMDGPU::Swizzle;
8100
8101 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8102 return false;
8103 }
8104
8105 StringRef Ctl;
8106 SMLoc StrLoc = getLoc();
8107 if (!parseString(Ctl)) {
8108 return false;
8109 }
8110 if (Ctl.size() != BITMASK_WIDTH) {
8111 Error(StrLoc, "expected a 5-character mask");
8112 return false;
8113 }
8114
8115 unsigned AndMask = 0;
8116 unsigned OrMask = 0;
8117 unsigned XorMask = 0;
8118
8119 for (size_t i = 0; i < Ctl.size(); ++i) {
8120 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8121 switch(Ctl[i]) {
8122 default:
8123 Error(StrLoc, "invalid mask");
8124 return false;
8125 case '0':
8126 break;
8127 case '1':
8128 OrMask |= Mask;
8129 break;
8130 case 'p':
8131 AndMask |= Mask;
8132 break;
8133 case 'i':
8134 AndMask |= Mask;
8135 XorMask |= Mask;
8136 break;
8137 }
8138 }
8139
8140 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8141 return true;
8142}
8143
8144bool
8145AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8146
8147 SMLoc OffsetLoc = getLoc();
8148
8149 if (!parseExpr(Imm, "a swizzle macro")) {
8150 return false;
8151 }
8152 if (!isUInt<16>(Imm)) {
8153 Error(OffsetLoc, "expected a 16-bit offset");
8154 return false;
8155 }
8156 return true;
8157}
8158
8159bool
8160AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8161 using namespace llvm::AMDGPU::Swizzle;
8162
8163 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8164
8165 SMLoc ModeLoc = getLoc();
8166 bool Ok = false;
8167
8168 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8169 Ok = parseSwizzleQuadPerm(Imm);
8170 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8171 Ok = parseSwizzleBitmaskPerm(Imm);
8172 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8173 Ok = parseSwizzleBroadcast(Imm);
8174 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8175 Ok = parseSwizzleSwap(Imm);
8176 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8177 Ok = parseSwizzleReverse(Imm);
8178 } else {
8179 Error(ModeLoc, "expected a swizzle mode");
8180 }
8181
8182 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8183 }
8184
8185 return false;
8186}
8187
8188ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8189 SMLoc S = getLoc();
8190 int64_t Imm = 0;
8191
8192 if (trySkipId("offset")) {
8193
8194 bool Ok = false;
8195 if (skipToken(AsmToken::Colon, "expected a colon")) {
8196 if (trySkipId("swizzle")) {
8197 Ok = parseSwizzleMacro(Imm);
8198 } else {
8199 Ok = parseSwizzleOffset(Imm);
8200 }
8201 }
8202
8203 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8204
8206 }
8207 return ParseStatus::NoMatch;
8208}
8209
8210bool
8211AMDGPUOperand::isSwizzle() const {
8212 return isImmTy(ImmTySwizzle);
8213}
8214
8215//===----------------------------------------------------------------------===//
8216// VGPR Index Mode
8217//===----------------------------------------------------------------------===//
8218
8219int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8220
8221 using namespace llvm::AMDGPU::VGPRIndexMode;
8222
8223 if (trySkipToken(AsmToken::RParen)) {
8224 return OFF;
8225 }
8226
8227 int64_t Imm = 0;
8228
8229 while (true) {
8230 unsigned Mode = 0;
8231 SMLoc S = getLoc();
8232
8233 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8234 if (trySkipId(IdSymbolic[ModeId])) {
8235 Mode = 1 << ModeId;
8236 break;
8237 }
8238 }
8239
8240 if (Mode == 0) {
8241 Error(S, (Imm == 0)?
8242 "expected a VGPR index mode or a closing parenthesis" :
8243 "expected a VGPR index mode");
8244 return UNDEF;
8245 }
8246
8247 if (Imm & Mode) {
8248 Error(S, "duplicate VGPR index mode");
8249 return UNDEF;
8250 }
8251 Imm |= Mode;
8252
8253 if (trySkipToken(AsmToken::RParen))
8254 break;
8255 if (!skipToken(AsmToken::Comma,
8256 "expected a comma or a closing parenthesis"))
8257 return UNDEF;
8258 }
8259
8260 return Imm;
8261}
8262
8263ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8264
8265 using namespace llvm::AMDGPU::VGPRIndexMode;
8266
8267 int64_t Imm = 0;
8268 SMLoc S = getLoc();
8269
8270 if (trySkipId("gpr_idx", AsmToken::LParen)) {
8271 Imm = parseGPRIdxMacro();
8272 if (Imm == UNDEF)
8273 return ParseStatus::Failure;
8274 } else {
8275 if (getParser().parseAbsoluteExpression(Imm))
8276 return ParseStatus::Failure;
8277 if (Imm < 0 || !isUInt<4>(Imm))
8278 return Error(S, "invalid immediate: only 4-bit values are legal");
8279 }
8280
8281 Operands.push_back(
8282 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8283 return ParseStatus::Success;
8284}
8285
8286bool AMDGPUOperand::isGPRIdxMode() const {
8287 return isImmTy(ImmTyGprIdxMode);
8288}
8289
8290//===----------------------------------------------------------------------===//
8291// sopp branch targets
8292//===----------------------------------------------------------------------===//
8293
8294ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8295
8296 // Make sure we are not parsing something
8297 // that looks like a label or an expression but is not.
8298 // This will improve error messages.
8299 if (isRegister() || isModifier())
8300 return ParseStatus::NoMatch;
8301
8302 if (!parseExpr(Operands))
8303 return ParseStatus::Failure;
8304
8305 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8306 assert(Opr.isImm() || Opr.isExpr());
8307 SMLoc Loc = Opr.getStartLoc();
8308
8309 // Currently we do not support arbitrary expressions as branch targets.
8310 // Only labels and absolute expressions are accepted.
8311 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8312 Error(Loc, "expected an absolute expression or a label");
8313 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8314 Error(Loc, "expected a 16-bit signed jump offset");
8315 }
8316
8317 return ParseStatus::Success;
8318}
8319
8320//===----------------------------------------------------------------------===//
8321// Boolean holding registers
8322//===----------------------------------------------------------------------===//
8323
8324ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8325 return parseReg(Operands);
8326}
8327
8328//===----------------------------------------------------------------------===//
8329// mubuf
8330//===----------------------------------------------------------------------===//
8331
8332void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8333 const OperandVector &Operands,
8334 bool IsAtomic) {
8335 OptionalImmIndexMap OptionalIdx;
8336 unsigned FirstOperandIdx = 1;
8337 bool IsAtomicReturn = false;
8338
8339 if (IsAtomic) {
8340 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
8342 }
8343
8344 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8345 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8346
8347 // Add the register arguments
8348 if (Op.isReg()) {
8349 Op.addRegOperands(Inst, 1);
8350 // Insert a tied src for atomic return dst.
8351 // This cannot be postponed as subsequent calls to
8352 // addImmOperands rely on correct number of MC operands.
8353 if (IsAtomicReturn && i == FirstOperandIdx)
8354 Op.addRegOperands(Inst, 1);
8355 continue;
8356 }
8357
8358 // Handle the case where soffset is an immediate
8359 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8360 Op.addImmOperands(Inst, 1);
8361 continue;
8362 }
8363
8364 // Handle tokens like 'offen' which are sometimes hard-coded into the
8365 // asm string. There are no MCInst operands for these.
8366 if (Op.isToken()) {
8367 continue;
8368 }
8369 assert(Op.isImm());
8370
8371 // Handle optional arguments
8372 OptionalIdx[Op.getImmTy()] = i;
8373 }
8374
8375 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8376 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8377}
8378
8379//===----------------------------------------------------------------------===//
8380// smrd
8381//===----------------------------------------------------------------------===//
8382
8383bool AMDGPUOperand::isSMRDOffset8() const {
8384 return isImmLiteral() && isUInt<8>(getImm());
8385}
8386
8387bool AMDGPUOperand::isSMEMOffset() const {
8388 // Offset range is checked later by validator.
8389 return isImmLiteral();
8390}
8391
8392bool AMDGPUOperand::isSMRDLiteralOffset() const {
8393 // 32-bit literals are only supported on CI and we only want to use them
8394 // when the offset is > 8-bits.
8395 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8396}
8397
8398//===----------------------------------------------------------------------===//
8399// vop3
8400//===----------------------------------------------------------------------===//
8401
8402static bool ConvertOmodMul(int64_t &Mul) {
8403 if (Mul != 1 && Mul != 2 && Mul != 4)
8404 return false;
8405
8406 Mul >>= 1;
8407 return true;
8408}
8409
8410static bool ConvertOmodDiv(int64_t &Div) {
8411 if (Div == 1) {
8412 Div = 0;
8413 return true;
8414 }
8415
8416 if (Div == 2) {
8417 Div = 3;
8418 return true;
8419 }
8420
8421 return false;
8422}
8423
8424// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8425// This is intentional and ensures compatibility with sp3.
8426// See bug 35397 for details.
8427bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8428 if (BoundCtrl == 0 || BoundCtrl == 1) {
8429 if (!isGFX11Plus())
8430 BoundCtrl = 1;
8431 return true;
8432 }
8433 return false;
8434}
8435
8436void AMDGPUAsmParser::onBeginOfFile() {
8437 if (!getParser().getStreamer().getTargetStreamer() ||
8438 getSTI().getTargetTriple().getArch() == Triple::r600)
8439 return;
8440
8441 if (!getTargetStreamer().getTargetID())
8442 getTargetStreamer().initializeTargetID(getSTI(),
8443 getSTI().getFeatureString());
8444
8445 if (isHsaAbi(getSTI()))
8446 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8447}
8448
8449/// Parse AMDGPU specific expressions.
8450///
8451/// expr ::= or(expr, ...) |
8452/// max(expr, ...)
8453///
8454bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8455 using AGVK = AMDGPUMCExpr::VariantKind;
8456
8457 if (isToken(AsmToken::Identifier)) {
8458 StringRef TokenId = getTokenStr();
8459 AGVK VK = StringSwitch<AGVK>(TokenId)
8460 .Case("max", AGVK::AGVK_Max)
8461 .Case("or", AGVK::AGVK_Or)
8462 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
8463 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
8464 .Case("alignto", AGVK::AGVK_AlignTo)
8465 .Case("occupancy", AGVK::AGVK_Occupancy)
8466 .Default(AGVK::AGVK_None);
8467
8468 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
8470 uint64_t CommaCount = 0;
8471 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
8472 lex(); // Eat '('
8473 while (true) {
8474 if (trySkipToken(AsmToken::RParen)) {
8475 if (Exprs.empty()) {
8476 Error(getToken().getLoc(),
8477 "empty " + Twine(TokenId) + " expression");
8478 return true;
8479 }
8480 if (CommaCount + 1 != Exprs.size()) {
8481 Error(getToken().getLoc(),
8482 "mismatch of commas in " + Twine(TokenId) + " expression");
8483 return true;
8484 }
8485 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
8486 return false;
8487 }
8488 const MCExpr *Expr;
8489 if (getParser().parseExpression(Expr, EndLoc))
8490 return true;
8491 Exprs.push_back(Expr);
8492 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
8493 if (LastTokenWasComma)
8494 CommaCount++;
8495 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
8496 Error(getToken().getLoc(),
8497 "unexpected token in " + Twine(TokenId) + " expression");
8498 return true;
8499 }
8500 }
8501 }
8502 }
8503 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
8504}
8505
8506ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8507 StringRef Name = getTokenStr();
8508 if (Name == "mul") {
8509 return parseIntWithPrefix("mul", Operands,
8510 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8511 }
8512
8513 if (Name == "div") {
8514 return parseIntWithPrefix("div", Operands,
8515 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8516 }
8517
8518 return ParseStatus::NoMatch;
8519}
8520
8521// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8522// the number of src operands present, then copies that bit into src0_modifiers.
8523static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
8524 int Opc = Inst.getOpcode();
8525 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8526 if (OpSelIdx == -1)
8527 return;
8528
8529 int SrcNum;
8530 const int Ops[] = { AMDGPU::OpName::src0,
8531 AMDGPU::OpName::src1,
8532 AMDGPU::OpName::src2 };
8533 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8534 ++SrcNum)
8535 ;
8536 assert(SrcNum > 0);
8537
8538 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8539
8540 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
8541 if (DstIdx == -1)
8542 return;
8543
8544 const MCOperand &DstOp = Inst.getOperand(DstIdx);
8545 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8546 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8547 if (DstOp.isReg() &&
8548 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
8550 ModVal |= SISrcMods::DST_OP_SEL;
8551 } else {
8552 if ((OpSel & (1 << SrcNum)) != 0)
8553 ModVal |= SISrcMods::DST_OP_SEL;
8554 }
8555 Inst.getOperand(ModIdx).setImm(ModVal);
8556}
8557
8558void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8559 const OperandVector &Operands) {
8560 cvtVOP3P(Inst, Operands);
8561 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8562}
8563
8564void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8565 OptionalImmIndexMap &OptionalIdx) {
8566 cvtVOP3P(Inst, Operands, OptionalIdx);
8567 cvtVOP3DstOpSelOnly(Inst, *getMRI());
8568}
8569
8570static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8571 return
8572 // 1. This operand is input modifiers
8573 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8574 // 2. This is not last operand
8575 && Desc.NumOperands > (OpNum + 1)
8576 // 3. Next operand is register class
8577 && Desc.operands()[OpNum + 1].RegClass != -1
8578 // 4. Next register is not tied to any other operand
8579 && Desc.getOperandConstraint(OpNum + 1,
8580 MCOI::OperandConstraint::TIED_TO) == -1;
8581}
8582
8583void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8584{
8585 OptionalImmIndexMap OptionalIdx;
8586 unsigned Opc = Inst.getOpcode();
8587
8588 unsigned I = 1;
8589 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8590 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8591 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8592 }
8593
8594 for (unsigned E = Operands.size(); I != E; ++I) {
8595 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8597 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8598 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
8599 Op.isInterpAttrChan()) {
8600 Inst.addOperand(MCOperand::createImm(Op.getImm()));
8601 } else if (Op.isImmModifier()) {
8602 OptionalIdx[Op.getImmTy()] = I;
8603 } else {
8604 llvm_unreachable("unhandled operand type");
8605 }
8606 }
8607
8608 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
8609 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8610 AMDGPUOperand::ImmTyHigh);
8611
8612 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8613 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8614 AMDGPUOperand::ImmTyClamp);
8615
8616 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8617 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8618 AMDGPUOperand::ImmTyOModSI);
8619}
8620
8621void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8622{
8623 OptionalImmIndexMap OptionalIdx;
8624 unsigned Opc = Inst.getOpcode();
8625
8626 unsigned I = 1;
8627 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8628 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8629 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8630 }
8631
8632 for (unsigned E = Operands.size(); I != E; ++I) {
8633 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8635 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8636 } else if (Op.isImmModifier()) {
8637 OptionalIdx[Op.getImmTy()] = I;
8638 } else {
8639 llvm_unreachable("unhandled operand type");
8640 }
8641 }
8642
8643 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
8644
8645 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8646 if (OpSelIdx != -1)
8647 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8648
8649 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8650
8651 if (OpSelIdx == -1)
8652 return;
8653
8654 const int Ops[] = { AMDGPU::OpName::src0,
8655 AMDGPU::OpName::src1,
8656 AMDGPU::OpName::src2 };
8657 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8658 AMDGPU::OpName::src1_modifiers,
8659 AMDGPU::OpName::src2_modifiers };
8660
8661 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8662
8663 for (int J = 0; J < 3; ++J) {
8664 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8665 if (OpIdx == -1)
8666 break;
8667
8668 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8669 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8670
8671 if ((OpSel & (1 << J)) != 0)
8672 ModVal |= SISrcMods::OP_SEL_0;
8673 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8674 (OpSel & (1 << 3)) != 0)
8675 ModVal |= SISrcMods::DST_OP_SEL;
8676
8677 Inst.getOperand(ModIdx).setImm(ModVal);
8678 }
8679}
8680
8681void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8682 OptionalImmIndexMap &OptionalIdx) {
8683 unsigned Opc = Inst.getOpcode();
8684
8685 unsigned I = 1;
8686 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8687 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8688 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8689 }
8690
8691 for (unsigned E = Operands.size(); I != E; ++I) {
8692 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8694 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8695 } else if (Op.isImmModifier()) {
8696 OptionalIdx[Op.getImmTy()] = I;
8697 } else {
8698 Op.addRegOrImmOperands(Inst, 1);
8699 }
8700 }
8701
8702 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
8703 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
8704 Inst.addOperand(Inst.getOperand(0));
8705 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8706 AMDGPUOperand::ImmTyByteSel);
8707 }
8708
8709 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8710 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8711 AMDGPUOperand::ImmTyClamp);
8712
8713 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8714 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8715 AMDGPUOperand::ImmTyOModSI);
8716
8717 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8718 // it has src2 register operand that is tied to dst operand
8719 // we don't allow modifiers for this operand in assembler so src2_modifiers
8720 // should be 0.
8721 if (isMAC(Opc)) {
8722 auto it = Inst.begin();
8723 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8724 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8725 ++it;
8726 // Copy the operand to ensure it's not invalidated when Inst grows.
8727 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8728 }
8729}
8730
8731void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8732 OptionalImmIndexMap OptionalIdx;
8733 cvtVOP3(Inst, Operands, OptionalIdx);
8734}
8735
8736void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8737 OptionalImmIndexMap &OptIdx) {
8738 const int Opc = Inst.getOpcode();
8739 const MCInstrDesc &Desc = MII.get(Opc);
8740
8741 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8742
8743 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8744 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8745 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
8746 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
8747 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8748 Inst.addOperand(Inst.getOperand(0));
8749 }
8750
8751 // Adding vdst_in operand is already covered for these DPP instructions in
8752 // cvtVOP3DPP.
8753 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
8754 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8755 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8756 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8757 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 ||
8758 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
8759 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
8760 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
8761 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
8762 assert(!IsPacked);
8763 Inst.addOperand(Inst.getOperand(0));
8764 }
8765
8766 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8767 // instruction, and then figure out where to actually put the modifiers
8768
8769 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8770 if (OpSelIdx != -1) {
8771 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8772 }
8773
8774 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8775 if (OpSelHiIdx != -1) {
8776 int DefaultVal = IsPacked ? -1 : 0;
8777 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8778 DefaultVal);
8779 }
8780
8781 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8782 if (NegLoIdx != -1)
8783 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8784
8785 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8786 if (NegHiIdx != -1)
8787 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8788
8789 const int Ops[] = { AMDGPU::OpName::src0,
8790 AMDGPU::OpName::src1,
8791 AMDGPU::OpName::src2 };
8792 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8793 AMDGPU::OpName::src1_modifiers,
8794 AMDGPU::OpName::src2_modifiers };
8795
8796 unsigned OpSel = 0;
8797 unsigned OpSelHi = 0;
8798 unsigned NegLo = 0;
8799 unsigned NegHi = 0;
8800
8801 if (OpSelIdx != -1)
8802 OpSel = Inst.getOperand(OpSelIdx).getImm();
8803
8804 if (OpSelHiIdx != -1)
8805 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8806
8807 if (NegLoIdx != -1)
8808 NegLo = Inst.getOperand(NegLoIdx).getImm();
8809
8810 if (NegHiIdx != -1)
8811 NegHi = Inst.getOperand(NegHiIdx).getImm();
8812
8813 for (int J = 0; J < 3; ++J) {
8814 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8815 if (OpIdx == -1)
8816 break;
8817
8818 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8819
8820 if (ModIdx == -1)
8821 continue;
8822
8823 uint32_t ModVal = 0;
8824
8825 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
8826 if (SrcOp.isReg() && getMRI()
8827 ->getRegClass(AMDGPU::VGPR_16RegClassID)
8828 .contains(SrcOp.getReg())) {
8829 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
8830 if (VGPRSuffixIsHi)
8831 ModVal |= SISrcMods::OP_SEL_0;
8832 } else {
8833 if ((OpSel & (1 << J)) != 0)
8834 ModVal |= SISrcMods::OP_SEL_0;
8835 }
8836
8837 if ((OpSelHi & (1 << J)) != 0)
8838 ModVal |= SISrcMods::OP_SEL_1;
8839
8840 if ((NegLo & (1 << J)) != 0)
8841 ModVal |= SISrcMods::NEG;
8842
8843 if ((NegHi & (1 << J)) != 0)
8844 ModVal |= SISrcMods::NEG_HI;
8845
8846 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8847 }
8848}
8849
8850void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8851 OptionalImmIndexMap OptIdx;
8852 cvtVOP3(Inst, Operands, OptIdx);
8853 cvtVOP3P(Inst, Operands, OptIdx);
8854}
8855
8857 unsigned i, unsigned Opc, unsigned OpName) {
8858 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
8859 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8860 else
8861 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
8862}
8863
8864void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
8865 unsigned Opc = Inst.getOpcode();
8866
8867 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
8868 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
8869 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
8870 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
8871 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
8872
8873 OptionalImmIndexMap OptIdx;
8874 for (unsigned i = 5; i < Operands.size(); ++i) {
8875 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8876 OptIdx[Op.getImmTy()] = i;
8877 }
8878
8879 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
8880 addOptionalImmOperand(Inst, Operands, OptIdx,
8881 AMDGPUOperand::ImmTyIndexKey8bit);
8882
8883 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
8884 addOptionalImmOperand(Inst, Operands, OptIdx,
8885 AMDGPUOperand::ImmTyIndexKey16bit);
8886
8887 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8888 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
8889
8890 cvtVOP3P(Inst, Operands, OptIdx);
8891}
8892
8893//===----------------------------------------------------------------------===//
8894// VOPD
8895//===----------------------------------------------------------------------===//
8896
8897ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8898 if (!hasVOPD(getSTI()))
8899 return ParseStatus::NoMatch;
8900
8901 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8902 SMLoc S = getLoc();
8903 lex();
8904 lex();
8905 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8906 SMLoc OpYLoc = getLoc();
8907 StringRef OpYName;
8908 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8909 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8910 return ParseStatus::Success;
8911 }
8912 return Error(OpYLoc, "expected a VOPDY instruction after ::");
8913 }
8914 return ParseStatus::NoMatch;
8915}
8916
8917// Create VOPD MCInst operands using parsed assembler operands.
8918void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8919 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8920 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8921 if (Op.isReg()) {
8922 Op.addRegOperands(Inst, 1);
8923 return;
8924 }
8925 if (Op.isImm()) {
8926 Op.addImmOperands(Inst, 1);
8927 return;
8928 }
8929 llvm_unreachable("Unhandled operand type in cvtVOPD");
8930 };
8931
8932 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8933
8934 // MCInst operands are ordered as follows:
8935 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8936
8937 for (auto CompIdx : VOPD::COMPONENTS) {
8938 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8939 }
8940
8941 for (auto CompIdx : VOPD::COMPONENTS) {
8942 const auto &CInfo = InstInfo[CompIdx];
8943 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8944 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8945 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8946 if (CInfo.hasSrc2Acc())
8947 addOp(CInfo.getIndexOfDstInParsedOperands());
8948 }
8949}
8950
8951//===----------------------------------------------------------------------===//
8952// dpp
8953//===----------------------------------------------------------------------===//
8954
8955bool AMDGPUOperand::isDPP8() const {
8956 return isImmTy(ImmTyDPP8);
8957}
8958
8959bool AMDGPUOperand::isDPPCtrl() const {
8960 using namespace AMDGPU::DPP;
8961
8962 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8963 if (result) {
8964 int64_t Imm = getImm();
8965 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8966 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8967 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8968 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8969 (Imm == DppCtrl::WAVE_SHL1) ||
8970 (Imm == DppCtrl::WAVE_ROL1) ||
8971 (Imm == DppCtrl::WAVE_SHR1) ||
8972 (Imm == DppCtrl::WAVE_ROR1) ||
8973 (Imm == DppCtrl::ROW_MIRROR) ||
8974 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8975 (Imm == DppCtrl::BCAST15) ||
8976 (Imm == DppCtrl::BCAST31) ||
8977 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8978 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8979 }
8980 return false;
8981}
8982
8983//===----------------------------------------------------------------------===//
8984// mAI
8985//===----------------------------------------------------------------------===//
8986
8987bool AMDGPUOperand::isBLGP() const {
8988 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8989}
8990
8991bool AMDGPUOperand::isS16Imm() const {
8992 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8993}
8994
8995bool AMDGPUOperand::isU16Imm() const {
8996 return isImmLiteral() && isUInt<16>(getImm());
8997}
8998
8999//===----------------------------------------------------------------------===//
9000// dim
9001//===----------------------------------------------------------------------===//
9002
9003bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9004 // We want to allow "dim:1D" etc.,
9005 // but the initial 1 is tokenized as an integer.
9006 std::string Token;
9007 if (isToken(AsmToken::Integer)) {
9008 SMLoc Loc = getToken().getEndLoc();
9009 Token = std::string(getTokenStr());
9010 lex();
9011 if (getLoc() != Loc)
9012 return false;
9013 }
9014
9015 StringRef Suffix;
9016 if (!parseId(Suffix))
9017 return false;
9018 Token += Suffix;
9019
9020 StringRef DimId = Token;
9021 if (DimId.starts_with("SQ_RSRC_IMG_"))
9022 DimId = DimId.drop_front(12);
9023
9025 if (!DimInfo)
9026 return false;
9027
9028 Encoding = DimInfo->Encoding;
9029 return true;
9030}
9031
9032ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9033 if (!isGFX10Plus())
9034 return ParseStatus::NoMatch;
9035
9036 SMLoc S = getLoc();
9037
9038 if (!trySkipId("dim", AsmToken::Colon))
9039 return ParseStatus::NoMatch;
9040
9041 unsigned Encoding;
9042 SMLoc Loc = getLoc();
9043 if (!parseDimId(Encoding))
9044 return Error(Loc, "invalid dim value");
9045
9046 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9047 AMDGPUOperand::ImmTyDim));
9048 return ParseStatus::Success;
9049}
9050
9051//===----------------------------------------------------------------------===//
9052// dpp
9053//===----------------------------------------------------------------------===//
9054
9055ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9056 SMLoc S = getLoc();
9057
9058 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9059 return ParseStatus::NoMatch;
9060
9061 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9062
9063 int64_t Sels[8];
9064
9065 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9066 return ParseStatus::Failure;
9067
9068 for (size_t i = 0; i < 8; ++i) {
9069 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9070 return ParseStatus::Failure;
9071
9072 SMLoc Loc = getLoc();
9073 if (getParser().parseAbsoluteExpression(Sels[i]))
9074 return ParseStatus::Failure;
9075 if (0 > Sels[i] || 7 < Sels[i])
9076 return Error(Loc, "expected a 3-bit value");
9077 }
9078
9079 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9080 return ParseStatus::Failure;
9081
9082 unsigned DPP8 = 0;
9083 for (size_t i = 0; i < 8; ++i)
9084 DPP8 |= (Sels[i] << (i * 3));
9085
9086 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9087 return ParseStatus::Success;
9088}
9089
9090bool
9091AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9092 const OperandVector &Operands) {
9093 if (Ctrl == "row_newbcast")
9094 return isGFX90A();
9095
9096 if (Ctrl == "row_share" ||
9097 Ctrl == "row_xmask")
9098 return isGFX10Plus();
9099
9100 if (Ctrl == "wave_shl" ||
9101 Ctrl == "wave_shr" ||
9102 Ctrl == "wave_rol" ||
9103 Ctrl == "wave_ror" ||
9104 Ctrl == "row_bcast")
9105 return isVI() || isGFX9();
9106
9107 return Ctrl == "row_mirror" ||
9108 Ctrl == "row_half_mirror" ||
9109 Ctrl == "quad_perm" ||
9110 Ctrl == "row_shl" ||
9111 Ctrl == "row_shr" ||
9112 Ctrl == "row_ror";
9113}
9114
9115int64_t
9116AMDGPUAsmParser::parseDPPCtrlPerm() {
9117 // quad_perm:[%d,%d,%d,%d]
9118
9119 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9120 return -1;
9121
9122 int64_t Val = 0;
9123 for (int i = 0; i < 4; ++i) {
9124 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9125 return -1;
9126
9127 int64_t Temp;
9128 SMLoc Loc = getLoc();
9129 if (getParser().parseAbsoluteExpression(Temp))
9130 return -1;
9131 if (Temp < 0 || Temp > 3) {
9132 Error(Loc, "expected a 2-bit value");
9133 return -1;
9134 }
9135
9136 Val += (Temp << i * 2);
9137 }
9138
9139 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9140 return -1;
9141
9142 return Val;
9143}
9144
9145int64_t
9146AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9147 using namespace AMDGPU::DPP;
9148
9149 // sel:%d
9150
9151 int64_t Val;
9152 SMLoc Loc = getLoc();
9153
9154 if (getParser().parseAbsoluteExpression(Val))
9155 return -1;
9156
9157 struct DppCtrlCheck {
9158 int64_t Ctrl;
9159 int Lo;
9160 int Hi;
9161 };
9162
9163 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9164 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
9165 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
9166 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
9167 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
9168 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
9169 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
9170 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
9171 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9172 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9173 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9174 .Default({-1, 0, 0});
9175
9176 bool Valid;
9177 if (Check.Ctrl == -1) {
9178 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
9179 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9180 } else {
9181 Valid = Check.Lo <= Val && Val <= Check.Hi;
9182 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
9183 }
9184
9185 if (!Valid) {
9186 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
9187 return -1;
9188 }
9189
9190 return Val;
9191}
9192
9193ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
9194 using namespace AMDGPU::DPP;
9195
9196 if (!isToken(AsmToken::Identifier) ||
9197 !isSupportedDPPCtrl(getTokenStr(), Operands))
9198 return ParseStatus::NoMatch;
9199
9200 SMLoc S = getLoc();
9201 int64_t Val = -1;
9203
9204 parseId(Ctrl);
9205
9206 if (Ctrl == "row_mirror") {
9207 Val = DppCtrl::ROW_MIRROR;
9208 } else if (Ctrl == "row_half_mirror") {
9209 Val = DppCtrl::ROW_HALF_MIRROR;
9210 } else {
9211 if (skipToken(AsmToken::Colon, "expected a colon")) {
9212 if (Ctrl == "quad_perm") {
9213 Val = parseDPPCtrlPerm();
9214 } else {
9215 Val = parseDPPCtrlSel(Ctrl);
9216 }
9217 }
9218 }
9219
9220 if (Val == -1)
9221 return ParseStatus::Failure;
9222
9223 Operands.push_back(
9224 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9225 return ParseStatus::Success;
9226}
9227
9228void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
9229 bool IsDPP8) {
9230 OptionalImmIndexMap OptionalIdx;
9231 unsigned Opc = Inst.getOpcode();
9232 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9233
9234 // MAC instructions are special because they have 'old'
9235 // operand which is not tied to dst (but assumed to be).
9236 // They also have dummy unused src2_modifiers.
9237 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
9238 int Src2ModIdx =
9239 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
9240 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9241 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
9242
9243 unsigned I = 1;
9244 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9245 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9246 }
9247
9248 int Fi = 0;
9249 for (unsigned E = Operands.size(); I != E; ++I) {
9250
9251 if (IsMAC) {
9252 int NumOperands = Inst.getNumOperands();
9253 if (OldIdx == NumOperands) {
9254 // Handle old operand
9255 constexpr int DST_IDX = 0;
9256 Inst.addOperand(Inst.getOperand(DST_IDX));
9257 } else if (Src2ModIdx == NumOperands) {
9258 // Add unused dummy src2_modifiers
9260 }
9261 }
9262
9263 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9264 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
9265 Inst.addOperand(Inst.getOperand(0));
9266 }
9267
9268 bool IsVOP3CvtSrDpp =
9269 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9270 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9271 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9272 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9273 if (IsVOP3CvtSrDpp) {
9274 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
9277 }
9278 }
9279
9280 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9282 if (TiedTo != -1) {
9283 assert((unsigned)TiedTo < Inst.getNumOperands());
9284 // handle tied old or src2 for MAC instructions
9285 Inst.addOperand(Inst.getOperand(TiedTo));
9286 }
9287 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9288 // Add the register arguments
9289 if (IsDPP8 && Op.isDppFI()) {
9290 Fi = Op.getImm();
9291 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9292 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9293 } else if (Op.isReg()) {
9294 Op.addRegOperands(Inst, 1);
9295 } else if (Op.isImm() &&
9296 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
9297 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
9298 Op.addImmOperands(Inst, 1);
9299 } else if (Op.isImm()) {
9300 OptionalIdx[Op.getImmTy()] = I;
9301 } else {
9302 llvm_unreachable("unhandled operand type");
9303 }
9304 }
9305
9306 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel))
9307 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9308 AMDGPUOperand::ImmTyByteSel);
9309
9310 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9311 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9312 AMDGPUOperand::ImmTyClamp);
9313
9314 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9315 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
9316
9317 if (Desc.TSFlags & SIInstrFlags::VOP3P)
9318 cvtVOP3P(Inst, Operands, OptionalIdx);
9319 else if (Desc.TSFlags & SIInstrFlags::VOP3)
9320 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
9321 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9322 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9323 }
9324
9325 if (IsDPP8) {
9326 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
9327 using namespace llvm::AMDGPU::DPP;
9328 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9329 } else {
9330 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
9331 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9332 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9333 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9334
9335 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
9336 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9337 AMDGPUOperand::ImmTyDppFI);
9338 }
9339}
9340
9341void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
9342 OptionalImmIndexMap OptionalIdx;
9343
9344 unsigned I = 1;
9345 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9346 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9347 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9348 }
9349
9350 int Fi = 0;
9351 for (unsigned E = Operands.size(); I != E; ++I) {
9352 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9354 if (TiedTo != -1) {
9355 assert((unsigned)TiedTo < Inst.getNumOperands());
9356 // handle tied old or src2 for MAC instructions
9357 Inst.addOperand(Inst.getOperand(TiedTo));
9358 }
9359 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9360 // Add the register arguments
9361 if (Op.isReg() && validateVccOperand(Op.getReg())) {
9362 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
9363 // Skip it.
9364 continue;
9365 }
9366
9367 if (IsDPP8) {
9368 if (Op.isDPP8()) {
9369 Op.addImmOperands(Inst, 1);
9370 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9371 Op.addRegWithFPInputModsOperands(Inst, 2);
9372 } else if (Op.isDppFI()) {
9373 Fi = Op.getImm();
9374 } else if (Op.isReg()) {
9375 Op.addRegOperands(Inst, 1);
9376 } else {
9377 llvm_unreachable("Invalid operand type");
9378 }
9379 } else {
9381 Op.addRegWithFPInputModsOperands(Inst, 2);
9382 } else if (Op.isReg()) {
9383 Op.addRegOperands(Inst, 1);
9384 } else if (Op.isDPPCtrl()) {
9385 Op.addImmOperands(Inst, 1);
9386 } else if (Op.isImm()) {
9387 // Handle optional arguments
9388 OptionalIdx[Op.getImmTy()] = I;
9389 } else {
9390 llvm_unreachable("Invalid operand type");
9391 }
9392 }
9393 }
9394
9395 if (IsDPP8) {
9396 using namespace llvm::AMDGPU::DPP;
9397 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9398 } else {
9399 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9400 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9401 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9402 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
9403 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9404 AMDGPUOperand::ImmTyDppFI);
9405 }
9406 }
9407}
9408
9409//===----------------------------------------------------------------------===//
9410// sdwa
9411//===----------------------------------------------------------------------===//
9412
9413ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9414 StringRef Prefix,
9415 AMDGPUOperand::ImmTy Type) {
9416 return parseStringOrIntWithPrefix(
9417 Operands, Prefix,
9418 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
9419 Type);
9420}
9421
9422ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9423 return parseStringOrIntWithPrefix(
9424 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
9425 AMDGPUOperand::ImmTySDWADstUnused);
9426}
9427
9428void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9429 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
9430}
9431
9432void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9433 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
9434}
9435
9436void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9437 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
9438}
9439
9440void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9441 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
9442}
9443
9444void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9445 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9446}
9447
9448void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9449 uint64_t BasicInstType,
9450 bool SkipDstVcc,
9451 bool SkipSrcVcc) {
9452 using namespace llvm::AMDGPU::SDWA;
9453
9454 OptionalImmIndexMap OptionalIdx;
9455 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9456 bool SkippedVcc = false;
9457
9458 unsigned I = 1;
9459 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9460 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9461 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9462 }
9463
9464 for (unsigned E = Operands.size(); I != E; ++I) {
9465 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9466 if (SkipVcc && !SkippedVcc && Op.isReg() &&
9467 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9468 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9469 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9470 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9471 // Skip VCC only if we didn't skip it on previous iteration.
9472 // Note that src0 and src1 occupy 2 slots each because of modifiers.
9473 if (BasicInstType == SIInstrFlags::VOP2 &&
9474 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9475 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9476 SkippedVcc = true;
9477 continue;
9478 }
9479 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
9480 SkippedVcc = true;
9481 continue;
9482 }
9483 }
9485 Op.addRegOrImmWithInputModsOperands(Inst, 2);
9486 } else if (Op.isImm()) {
9487 // Handle optional arguments
9488 OptionalIdx[Op.getImmTy()] = I;
9489 } else {
9490 llvm_unreachable("Invalid operand type");
9491 }
9492 SkippedVcc = false;
9493 }
9494
9495 const unsigned Opc = Inst.getOpcode();
9496 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9497 Opc != AMDGPU::V_NOP_sdwa_vi) {
9498 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9499 switch (BasicInstType) {
9500 case SIInstrFlags::VOP1:
9501 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9502 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9503 AMDGPUOperand::ImmTyClamp, 0);
9504
9505 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9506 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9507 AMDGPUOperand::ImmTyOModSI, 0);
9508
9509 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
9510 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9511 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9512
9513 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
9514 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9515 AMDGPUOperand::ImmTySDWADstUnused,
9516 DstUnused::UNUSED_PRESERVE);
9517
9518 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9519 break;
9520
9521 case SIInstrFlags::VOP2:
9522 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9523 AMDGPUOperand::ImmTyClamp, 0);
9524
9525 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
9526 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9527
9528 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9529 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
9530 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9531 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9532 break;
9533
9534 case SIInstrFlags::VOPC:
9535 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
9536 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9537 AMDGPUOperand::ImmTyClamp, 0);
9538 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9539 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9540 break;
9541
9542 default:
9543 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9544 }
9545 }
9546
9547 // special case v_mac_{f16, f32}:
9548 // it has src2 register operand that is tied to dst operand
9549 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9550 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9551 auto it = Inst.begin();
9552 std::advance(
9553 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9554 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9555 }
9556}
9557
9558/// Force static initialization.
9562}
9563
9564#define GET_REGISTER_MATCHER
9565#define GET_MATCHER_IMPLEMENTATION
9566#define GET_MNEMONIC_SPELL_CHECKER
9567#define GET_MNEMONIC_CHECKER
9568#include "AMDGPUGenAsmMatcher.inc"
9569
9570ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9571 unsigned MCK) {
9572 switch (MCK) {
9573 case MCK_addr64:
9574 return parseTokenOp("addr64", Operands);
9575 case MCK_done:
9576 return parseTokenOp("done", Operands);
9577 case MCK_idxen:
9578 return parseTokenOp("idxen", Operands);
9579 case MCK_lds:
9580 return parseTokenOp("lds", Operands);
9581 case MCK_offen:
9582 return parseTokenOp("offen", Operands);
9583 case MCK_off:
9584 return parseTokenOp("off", Operands);
9585 case MCK_row_95_en:
9586 return parseTokenOp("row_en", Operands);
9587 case MCK_gds:
9588 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
9589 case MCK_tfe:
9590 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
9591 }
9592 return tryCustomParseOperand(Operands, MCK);
9593}
9594
9595// This function should be defined after auto-generated include so that we have
9596// MatchClassKind enum defined
9597unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9598 unsigned Kind) {
9599 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9600 // But MatchInstructionImpl() expects to meet token and fails to validate
9601 // operand. This method checks if we are given immediate operand but expect to
9602 // get corresponding token.
9603 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9604 switch (Kind) {
9605 case MCK_addr64:
9606 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9607 case MCK_gds:
9608 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9609 case MCK_lds:
9610 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9611 case MCK_idxen:
9612 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9613 case MCK_offen:
9614 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9615 case MCK_tfe:
9616 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9617 case MCK_SSrc_b32:
9618 // When operands have expression values, they will return true for isToken,
9619 // because it is not possible to distinguish between a token and an
9620 // expression at parse time. MatchInstructionImpl() will always try to
9621 // match an operand as a token, when isToken returns true, and when the
9622 // name of the expression is not a valid token, the match will fail,
9623 // so we need to handle it here.
9624 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9625 case MCK_SSrc_f32:
9626 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9627 case MCK_SOPPBrTarget:
9628 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9629 case MCK_VReg32OrOff:
9630 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9631 case MCK_InterpSlot:
9632 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9633 case MCK_InterpAttr:
9634 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9635 case MCK_InterpAttrChan:
9636 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9637 case MCK_SReg_64:
9638 case MCK_SReg_64_XEXEC:
9639 // Null is defined as a 32-bit register but
9640 // it should also be enabled with 64-bit operands.
9641 // The following code enables it for SReg_64 operands
9642 // used as source and destination. Remaining source
9643 // operands are handled in isInlinableImm.
9644 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9645 default:
9646 return Match_InvalidOperand;
9647 }
9648}
9649
9650//===----------------------------------------------------------------------===//
9651// endpgm
9652//===----------------------------------------------------------------------===//
9653
9654ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9655 SMLoc S = getLoc();
9656 int64_t Imm = 0;
9657
9658 if (!parseExpr(Imm)) {
9659 // The operand is optional, if not present default to 0
9660 Imm = 0;
9661 }
9662
9663 if (!isUInt<16>(Imm))
9664 return Error(S, "expected a 16-bit value");
9665
9666 Operands.push_back(
9667 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9668 return ParseStatus::Success;
9669}
9670
9671bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9672
9673//===----------------------------------------------------------------------===//
9674// Split Barrier
9675//===----------------------------------------------------------------------===//
9676
9677bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static unsigned getSpecialRegForName(StringRef RegName)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, unsigned OpName)
static bool IsRevOpcode(const unsigned Opcode)
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, const MCRegisterInfo *MRI)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_READNONE
Definition: Compiler.h:216
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:131
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
@ Default
Definition: DwarfDebug.cpp:87
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
Definition: InlineInfo.cpp:180
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
unsigned unsigned DefaultVal
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
static bool Enabled
Definition: Statistic.cpp:46
StringSet - A set-like wrapper for the StringMap.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
BinaryOperator * Mul
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
support::ulittle16_t & Hi
Definition: aarch32.cpp:205
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
Definition: AMDGPUMCExpr.h:69
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
Definition: AMDGPUMCExpr.h:83
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5337
Class for arbitrary precision integers.
Definition: APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition: MCAsmMacro.h:110
bool is(TokenKind K) const
Definition: MCAsmMacro.h:82
TokenKind getKind() const
Definition: MCAsmMacro.h:81
This class represents an Operation in the Expression.
Register getReg() const
Base class for user error types.
Definition: Error.h:355
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:481
Class representing an expression and its matching format.
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:123
virtual MCStreamer & getStreamer()=0
Return the output streamer for the assembler.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:532
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:542
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:617
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:193
Context object for machine code objects.
Definition: MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:414
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:213
const MCSubtargetInfo * getSubtargetInfo() const
Definition: MCContext.h:418
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
SMLoc getLoc() const
Definition: MCInst.h:204
void setLoc(SMLoc loc)
Definition: MCInst.h:203
unsigned getOpcode() const
Definition: MCInst.h:198
iterator insert(iterator I, const MCOperand &Op)
Definition: MCInst.h:224
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
iterator begin()
Definition: MCInst.h:219
size_t size() const
Definition: MCInst.h:218
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:444
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
void setImm(int64_t Val)
Definition: MCInst.h:85
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
void setReg(unsigned Reg)
Set the register number.
Definition: MCInst.h:75
int64_t getImm() const
Definition: MCInst.h:80
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
bool isImm() const
Definition: MCInst.h:62
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
bool isReg() const
Definition: MCInst.h:61
bool isExpr() const
Definition: MCInst.h:65
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
virtual bool isReg() const =0
isReg - Is this a register operand?
virtual bool isMem() const =0
isMem - Is this a memory operand?
virtual MCRegister getReg() const =0
virtual bool isToken() const =0
isToken - Is this a token operand?
virtual bool isImm() const =0
isImm - Is this an immediate operand?
MCRegisterClass - Base class of TargetRegisterClass.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
unsigned getRegister(unsigned i) const
getRegister - Return the specified register in the class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
Streaming machine code generation interface.
Definition: MCStreamer.h:213
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
MCTargetStreamer * getTargetStreamer()
Definition: MCStreamer.h:309
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
FeatureBitset ToggleFeature(uint64_t FB)
Toggle a feature and return the re-computed feature bits.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
void setVariableValue(const MCExpr *Value)
Definition: MCSymbol.cpp:47
MCTargetAsmParser - Generic interface to target specific assembly parsers.
MCSubtargetInfo & copySTI()
Create a copy of STI and return a non-const reference to it.
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool ParseDirective(AsmToken DirectiveID)
ParseDirective - Parse a target specific assembler directive This method is deprecated,...
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
const MCInstrInfo & MII
void setAvailableFeatures(const FeatureBitset &Value)
const MCSubtargetInfo & getSTI() const
virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind)
Allow a target to add special case operand matching for things that tblgen doesn't/can't handle effec...
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
ParseInstruction - Parse one assembly instruction.
virtual unsigned checkTargetMatchPredicate(MCInst &Inst)
checkTargetMatchPredicate - Validate the instruction match against any complex target predicates not ...
virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
MatchAndEmitInstruction - Recognize a series of operands of a parsed instruction as an actual MCInst ...
Target specific streamer interface.
Definition: MCStreamer.h:94
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Represents a location in source code.
Definition: SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
constexpr const char * getPointer() const
Definition: SMLoc.h:34
constexpr bool isValid() const
Definition: SMLoc.h:29
Represents a range in source code.
Definition: SMLoc.h:48
SMLoc Start
Definition: SMLoc.h:50
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:290
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Register getReg() const
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:838
StringMapEntry - This is used to represent one value that is inserted into a StringMap.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition: StringRef.h:640
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:250
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:594
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:262
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
bool contains(StringRef key) const
Check if the set contains the given key.
Definition: StringSet.h:55
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
Key
PAL metadata keys.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo)
Get size of register operand.
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
unsigned mc2PseudoReg(unsigned Reg)
Convert hardware register Reg to a pseudo register.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isDPALU_DPP(const MCInstrDesc &OpDesc)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
bool isGFX9(const MCSubtargetInfo &STI)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this an AMDGPU specific source operand? These include registers, inline constants,...
bool hasMAIInsts(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool isSGPR(unsigned Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isInlineValue(unsigned Reg)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition: SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition: SIDefines.h:201
@ OPERAND_REG_IMM_V2FP16
Definition: SIDefines.h:211
@ OPERAND_REG_INLINE_C_V2INT32
Definition: SIDefines.h:227
@ OPERAND_REG_INLINE_C_FP64
Definition: SIDefines.h:223
@ OPERAND_REG_INLINE_C_BF16
Definition: SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2BF16
Definition: SIDefines.h:225
@ OPERAND_REG_IMM_V2INT16
Definition: SIDefines.h:212
@ OPERAND_REG_IMM_BF16
Definition: SIDefines.h:205
@ OPERAND_REG_INLINE_AC_V2FP16
Definition: SIDefines.h:246
@ OPERAND_REG_IMM_INT32
Operands with register or 32-bit immediate.
Definition: SIDefines.h:200
@ OPERAND_REG_IMM_V2BF16
Definition: SIDefines.h:210
@ OPERAND_REG_IMM_BF16_DEFERRED
Definition: SIDefines.h:207
@ OPERAND_REG_IMM_FP16
Definition: SIDefines.h:206
@ OPERAND_REG_INLINE_C_INT64
Definition: SIDefines.h:219
@ OPERAND_REG_INLINE_AC_BF16
Definition: SIDefines.h:240
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition: SIDefines.h:217
@ OPERAND_REG_INLINE_AC_INT16
Operands with an AccVGPR register or inline constant.
Definition: SIDefines.h:238
@ OPERAND_REG_IMM_FP64
Definition: SIDefines.h:204
@ OPERAND_REG_INLINE_C_V2FP16
Definition: SIDefines.h:226
@ OPERAND_REG_INLINE_AC_V2INT16
Definition: SIDefines.h:244
@ OPERAND_REG_INLINE_AC_FP16
Definition: SIDefines.h:241
@ OPERAND_REG_INLINE_AC_INT32
Definition: SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition: SIDefines.h:242
@ OPERAND_REG_INLINE_AC_V2BF16
Definition: SIDefines.h:245
@ OPERAND_REG_IMM_V2INT32
Definition: SIDefines.h:213
@ OPERAND_REG_IMM_FP32
Definition: SIDefines.h:203
@ OPERAND_INPUT_MODS
Definition: SIDefines.h:251
@ OPERAND_REG_INLINE_C_FP32
Definition: SIDefines.h:222
@ OPERAND_REG_INLINE_C_INT32
Definition: SIDefines.h:218
@ OPERAND_REG_INLINE_C_V2INT16
Definition: SIDefines.h:224
@ OPERAND_REG_IMM_V2FP32
Definition: SIDefines.h:214
@ OPERAND_REG_INLINE_AC_FP64
Definition: SIDefines.h:243
@ OPERAND_REG_INLINE_C_FP16
Definition: SIDefines.h:221
@ OPERAND_REG_IMM_INT16
Definition: SIDefines.h:202
@ OPERAND_REG_INLINE_C_V2FP32
Definition: SIDefines.h:228
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition: SIDefines.h:231
@ OPERAND_REG_IMM_FP32_DEFERRED
Definition: SIDefines.h:209
@ OPERAND_REG_IMM_FP16_DEFERRED
Definition: SIDefines.h:208
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1365
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
const uint64_t Version
Definition: CodeGenData.h:184
@ OPERAND_IMMEDIATE
Definition: MCInstrDesc.h:60
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
Reg
All possible values of the reg field in the ModR/M byte.
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
Format
The format used for serializing/deserializing remarks.
Definition: RemarkFormat.h:25
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition: Error.h:1099
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition: Alignment.h:217
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:255
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
void PrintError(const Twine &Msg)
Definition: Error.cpp:101
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
Target & getTheR600Target()
The target for R600 GPUs.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:154
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:193
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:159
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:403
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Target & getTheGCNTarget()
The target for GCN GPUs.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:260
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
unsigned M0(unsigned Val)
Definition: VE.h:375
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
#define N
RegisterKind Kind
StringLiteral Name
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
Definition: TargetParser.h:127
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
Represents the counter values to wait for in an s_waitcnt instruction.
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:281
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:279
static const fltSemantics & BFloat() LLVM_READNONE
Definition: APFloat.cpp:280
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:270
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...